1. Vacuum is updated for MVCC.

2. Much faster btree tuples deletion in the case when first on page
   index tuple is deleted (no movement to the left page(s)).
3. Remember blkno of new root page in BTPageOpaque of
   left/right siblings when root page is splitted.
This commit is contained in:
Vadim B. Mikheev 1999-03-28 20:32:42 +00:00
parent d4ed17842a
commit fdf6be80f9
16 changed files with 802 additions and 352 deletions

View File

@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.41 1999/02/13 23:14:22 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.42 1999/03/28 20:31:56 vadim Exp $
*
*
* INTERFACE ROUTINES
@ -1270,7 +1270,7 @@ l2:
newtup->t_data->t_cmin = GetCurrentCommandId();
StoreInvalidTransactionId(&(newtup->t_data->t_xmax));
newtup->t_data->t_infomask &= ~(HEAP_XACT_MASK);
newtup->t_data->t_infomask |= HEAP_XMAX_INVALID;
newtup->t_data->t_infomask |= (HEAP_XMAX_INVALID | HEAP_UPDATED);
/* logically delete old item */
TransactionIdStore(GetCurrentTransactionId(), &(oldtup.t_data->t_xmax));

View File

@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.35 1999/02/13 23:14:34 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.36 1999/03/28 20:31:56 vadim Exp $
*
*-------------------------------------------------------------------------
*/
@ -853,6 +853,8 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright)
lopaque->btpo_next = BufferGetBlockNumber(rbuf);
ropaque->btpo_next = oopaque->btpo_next;
lopaque->btpo_parent = ropaque->btpo_parent = oopaque->btpo_parent;
/*
* If the page we're splitting is not the rightmost page at its level
* in the tree, then the first (0) entry on the page is the high key
@ -1103,6 +1105,7 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
/* get a new root page */
rootbuf = _bt_getbuf(rel, P_NEW, BT_WRITE);
rootpage = BufferGetPage(rootbuf);
rootbknum = BufferGetBlockNumber(rootbuf);
_bt_pageinit(rootpage, BufferGetPageSize(rootbuf));
/* set btree special data */
@ -1119,6 +1122,10 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
lpage = BufferGetPage(lbuf);
rpage = BufferGetPage(rbuf);
((BTPageOpaque) PageGetSpecialPointer(lpage))->btpo_parent =
((BTPageOpaque) PageGetSpecialPointer(rpage))->btpo_parent =
rootbknum;
/*
* step over the high key on the left page while building the left
* page pointer.
@ -1156,11 +1163,13 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
pfree(new_item);
/* write and let go of the root buffer */
rootbknum = BufferGetBlockNumber(rootbuf);
_bt_wrtbuf(rel, rootbuf);
/* update metadata page with new root block number */
_bt_metaproot(rel, rootbknum, 0);
WriteNoReleaseBuffer(lbuf);
WriteNoReleaseBuffer(rbuf);
}
/*
@ -1559,6 +1568,7 @@ _bt_shift(Relation rel, Buffer buf, BTStack stack, int keysz,
pageop->btpo_flags |= BTP_CHAIN;
pageop->btpo_prev = npageop->btpo_prev; /* restore prev */
pageop->btpo_next = nbknum; /* next points to the new page */
pageop->btpo_parent = npageop->btpo_parent;
/* init shifted page opaque */
npageop->btpo_prev = bknum = BufferGetBlockNumber(buf);

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.18 1999/02/13 23:14:35 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.19 1999/03/28 20:31:57 vadim Exp $
*
* NOTES
* Postgres btree pages look like ordinary relation pages. The opaque
@ -421,6 +421,8 @@ _bt_pageinit(Page page, Size size)
MemSet(page, 0, size);
PageInit(page, size, sizeof(BTPageOpaqueData));
((BTPageOpaque) PageGetSpecialPointer(page))->btpo_parent =
InvalidBlockNumber;
}
/*

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.36 1999/02/21 03:48:27 scrappy Exp $
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.37 1999/03/28 20:31:58 vadim Exp $
*
* NOTES
* This file contains only the public interface routines.
@ -372,11 +372,6 @@ btinsert(Relation rel, Datum *datum, char *nulls, ItemPointer ht_ctid, Relation
pfree(btitem);
pfree(itup);
#ifdef NOT_USED
/* adjust any active scans that will be affected by this insertion */
_bt_adjscans(rel, &(res->pointerData), BT_INSERT);
#endif
return res;
}
@ -396,15 +391,9 @@ btgettuple(IndexScanDesc scan, ScanDirection dir)
if (ItemPointerIsValid(&(scan->currentItemData)))
{
/*
* Now we don't adjust scans on insertion (comments in
* nbtscan.c:_bt_scandel()) and I hope that we will unlock current
* index page before leaving index in LLL: this means that current
* index tuple could be moved right before we get here and we have
* to restore our scan position. We save heap TID pointed by
* current index tuple and use it. This will work untill we start
* to re-use (move heap tuples) without vacuum... - vadim 07/29/98
* Restore scan position using heap TID returned
* by previous call to btgettuple().
*/
_bt_restscan(scan);
res = _bt_next(scan, dir);
@ -612,16 +601,12 @@ void
btdelete(Relation rel, ItemPointer tid)
{
/* adjust any active scans that will be affected by this deletion */
_bt_adjscans(rel, tid, BT_DELETE);
_bt_adjscans(rel, tid);
/* delete the data from the page */
_bt_pagedel(rel, tid);
}
/*
* Reasons are in btgettuple... We have to find index item that
* points to heap tuple returned by previous call to btgettuple().
*/
static void
_bt_restscan(IndexScanDesc scan)
{
@ -637,6 +622,20 @@ _bt_restscan(IndexScanDesc scan)
BTItem item;
BlockNumber blkno;
/*
* We use this as flag when first index tuple on page
* is deleted but we do not move left (this would
* slowdown vacuum) - so we set current->ip_posid
* before first index tuple on the current page
* (_bt_step will move it right)...
*/
if (!ItemPointerIsValid(&target))
{
ItemPointerSetOffsetNumber(&(scan->currentItemData),
OffsetNumberPrev(P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY));
return;
}
if (maxoff >= offnum)
{

View File

@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/Attic/nbtscan.c,v 1.19 1999/02/13 23:14:36 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/Attic/nbtscan.c,v 1.20 1999/03/28 20:31:58 vadim Exp $
*
*
* NOTES
@ -43,8 +43,7 @@ typedef BTScanListData *BTScanList;
static BTScanList BTScans = (BTScanList) NULL;
static void _bt_scandel(IndexScanDesc scan, int op, BlockNumber blkno, OffsetNumber offno);
static bool _bt_scantouched(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno);
static void _bt_scandel(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno);
/*
* _bt_regscan() -- register a new scan.
@ -91,7 +90,7 @@ _bt_dropscan(IndexScanDesc scan)
* for a given deletion or insertion
*/
void
_bt_adjscans(Relation rel, ItemPointer tid, int op)
_bt_adjscans(Relation rel, ItemPointer tid)
{
BTScanList l;
Oid relid;
@ -100,41 +99,25 @@ _bt_adjscans(Relation rel, ItemPointer tid, int op)
for (l = BTScans; l != (BTScanList) NULL; l = l->btsl_next)
{
if (relid == RelationGetRelid(l->btsl_scan->relation))
_bt_scandel(l->btsl_scan, op,
_bt_scandel(l->btsl_scan,
ItemPointerGetBlockNumber(tid),
ItemPointerGetOffsetNumber(tid));
}
}
/*
* _bt_scandel() -- adjust a single scan
* _bt_scandel() -- adjust a single scan on deletion
*
* because each index page is always maintained as an ordered array of
* index tuples, the index tuples on a given page shift beneath any
* given scan. an index modification "behind" a scan position (i.e.,
* same page, lower or equal offset number) will therefore force us to
* adjust the scan in the following ways:
*
* - on insertion, we shift the scan forward by one item.
* - on deletion, we shift the scan backward by one item.
*
* note that:
*
* - we need not worry about the actual ScanDirection of the scan
* itself, since the problem is that the "current" scan position has
* shifted.
* - modifications "ahead" of our scan position do not change the
* array index of the current scan position and so can be ignored.
*/
static void
_bt_scandel(IndexScanDesc scan, int op, BlockNumber blkno, OffsetNumber offno)
_bt_scandel(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno)
{
ItemPointer current;
Buffer buf;
BTScanOpaque so;
if (!_bt_scantouched(scan, blkno, offno))
return;
ItemPointer current;
Buffer buf;
BTScanOpaque so;
OffsetNumber start;
Page page;
BTPageOpaque opaque;
so = (BTScanOpaque) scan->opaque;
buf = so->btso_curbuf;
@ -144,33 +127,23 @@ _bt_scandel(IndexScanDesc scan, int op, BlockNumber blkno, OffsetNumber offno)
&& ItemPointerGetBlockNumber(current) == blkno
&& ItemPointerGetOffsetNumber(current) >= offno)
{
switch (op)
page = BufferGetPage(buf);
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
start = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
if (ItemPointerGetOffsetNumber(current) == start)
ItemPointerSetInvalid(&(so->curHeapIptr));
else
{
/*
* Problems occure when current scan page is splitted!
* We saw "Non-functional updates" (ie index tuples were read twice)
* and partial updates ("good" tuples were not read at all) - due to
* losing scan position here. Look @ nbtree.c:btgettuple()
* what we do now... - vadim 07/29/98
case BT_INSERT:
_bt_step(scan, &buf, ForwardScanDirection);
break;
*/
case BT_DELETE:
_bt_step(scan, &buf, BackwardScanDirection);
break;
default:
elog(ERROR, "_bt_scandel: bad operation '%d'", op);
/* NOTREACHED */
}
so->btso_curbuf = buf;
if (ItemPointerIsValid(current))
{
Page page = BufferGetPage(buf);
BTItem btitem = (BTItem) PageGetItem(page,
PageGetItemId(page, ItemPointerGetOffsetNumber(current)));
_bt_step(scan, &buf, BackwardScanDirection);
so->btso_curbuf = buf;
if (ItemPointerIsValid(current))
{
Page pg = BufferGetPage(buf);
BTItem btitem = (BTItem) PageGetItem(pg,
PageGetItemId(pg, ItemPointerGetOffsetNumber(current)));
so->curHeapIptr = btitem->bti_itup.t_tid;
so->curHeapIptr = btitem->bti_itup.t_tid;
}
}
}
@ -179,65 +152,39 @@ _bt_scandel(IndexScanDesc scan, int op, BlockNumber blkno, OffsetNumber offno)
&& ItemPointerGetBlockNumber(current) == blkno
&& ItemPointerGetOffsetNumber(current) >= offno)
{
ItemPointerData tmp;
tmp = *current;
*current = scan->currentItemData;
scan->currentItemData = tmp;
so->btso_curbuf = so->btso_mrkbuf;
so->btso_mrkbuf = buf;
buf = so->btso_curbuf;
switch (op)
{
/*
* ...comments are above...
case BT_INSERT:
_bt_step(scan, &buf, ForwardScanDirection);
break;
*/
case BT_DELETE:
_bt_step(scan, &buf, BackwardScanDirection);
break;
default:
elog(ERROR, "_bt_scandel: bad operation '%d'", op);
/* NOTREACHED */
}
so->btso_curbuf = so->btso_mrkbuf;
so->btso_mrkbuf = buf;
tmp = *current;
*current = scan->currentItemData;
scan->currentItemData = tmp;
if (ItemPointerIsValid(current))
{
Page page = BufferGetPage(buf);
BTItem btitem = (BTItem) PageGetItem(page,
PageGetItemId(page, ItemPointerGetOffsetNumber(current)));
page = BufferGetPage(so->btso_mrkbuf);
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
start = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
so->mrkHeapIptr = btitem->bti_itup.t_tid;
if (ItemPointerGetOffsetNumber(current) == start)
ItemPointerSetInvalid(&(so->mrkHeapIptr));
else
{
ItemPointerData tmp;
tmp = *current;
*current = scan->currentItemData;
scan->currentItemData = tmp;
so->btso_curbuf = so->btso_mrkbuf;
so->btso_mrkbuf = buf;
buf = so->btso_curbuf;
_bt_step(scan, &buf, BackwardScanDirection);
so->btso_curbuf = so->btso_mrkbuf;
so->btso_mrkbuf = buf;
tmp = *current;
*current = scan->currentItemData;
scan->currentItemData = tmp;
if (ItemPointerIsValid(current))
{
Page pg = BufferGetPage(buf);
BTItem btitem = (BTItem) PageGetItem(pg,
PageGetItemId(pg, ItemPointerGetOffsetNumber(current)));
so->mrkHeapIptr = btitem->bti_itup.t_tid;
}
}
}
}
/*
* _bt_scantouched() -- check to see if a scan is affected by a given
* change to the index
*/
static bool
_bt_scantouched(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno)
{
ItemPointer current;
current = &(scan->currentItemData);
if (ItemPointerIsValid(current)
&& ItemPointerGetBlockNumber(current) == blkno
&& ItemPointerGetOffsetNumber(current) >= offno)
return true;
current = &(scan->currentMarkData);
if (ItemPointerIsValid(current)
&& ItemPointerGetBlockNumber(current) == blkno
&& ItemPointerGetOffsetNumber(current) >= offno)
return true;
return false;
}

View File

@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.41 1999/02/21 03:48:27 scrappy Exp $
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.42 1999/03/28 20:31:58 vadim Exp $
*
*-------------------------------------------------------------------------
*/
@ -706,15 +706,7 @@ _bt_next(IndexScanDesc scan, ScanDirection dir)
so = (BTScanOpaque) scan->opaque;
current = &(scan->currentItemData);
/*
* XXX 10 may 91: somewhere there's a bug in our management of the
* cached buffer for this scan. wei discovered it. the following is
* a workaround so he can work until i figure out what's going on.
*/
if (!BufferIsValid(so->btso_curbuf))
so->btso_curbuf = _bt_getbuf(rel, ItemPointerGetBlockNumber(current),
BT_READ);
Assert (BufferIsValid(so->btso_curbuf));
/* we still have the buffer pinned and locked */
buf = so->btso_curbuf;
@ -1069,7 +1061,11 @@ _bt_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
rel = scan->relation;
current = &(scan->currentItemData);
offnum = ItemPointerGetOffsetNumber(current);
/*
* Don't use ItemPointerGetOffsetNumber or you risk to get
* assertion due to ability of ip_posid to be equal 0.
*/
offnum = current->ip_posid;
page = BufferGetPage(*bufP);
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
so = (BTScanOpaque) scan->opaque;

View File

@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.32 1999/02/13 23:14:49 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.33 1999/03/28 20:31:59 vadim Exp $
*
* NOTES
* Transaction aborts can now occur two ways:
@ -933,7 +933,10 @@ CommitTransaction()
* 11/26/96
*/
if (MyProc != (PROC *) NULL)
{
MyProc->xid = InvalidTransactionId;
MyProc->xmin = InvalidTransactionId;
}
}
/* --------------------------------
@ -951,7 +954,10 @@ AbortTransaction()
* 11/26/96
*/
if (MyProc != (PROC *) NULL)
{
MyProc->xid = InvalidTransactionId;
MyProc->xmin = InvalidTransactionId;
}
/* ----------------
* check the current transaction state

View File

@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.96 1999/02/21 03:48:33 scrappy Exp $
* $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.97 1999/03/28 20:32:01 vadim Exp $
*
*-------------------------------------------------------------------------
*/
@ -65,6 +65,9 @@ static Portal vc_portal;
static int MESSAGE_LEVEL; /* message level */
static TransactionId XmaxRecent;
extern void GetXmaxRecent(TransactionId *xid);
#define swapLong(a,b) {long tmp; tmp=a; a=b; b=tmp;}
#define swapInt(a,b) {int tmp; tmp=a; a=b; b=tmp;}
#define swapDatum(a,b) {Datum tmp; tmp=a; a=b; b=tmp;}
@ -98,9 +101,11 @@ static void vc_free(VRelList vrl);
static void vc_getindices(Oid relid, int *nindices, Relation **Irel);
static void vc_clsindices(int nindices, Relation *Irel);
static void vc_mkindesc(Relation onerel, int nindices, Relation *Irel, IndDesc **Idesc);
static char *vc_find_eq(char *bot, int nelem, int size, char *elm, int (*compar) (char *, char *));
static int vc_cmp_blk(char *left, char *right);
static int vc_cmp_offno(char *left, char *right);
static void *vc_find_eq(void *bot, int nelem, int size, void *elm,
int (*compar) (const void *, const void *));
static int vc_cmp_blk(const void *left, const void *right);
static int vc_cmp_offno(const void *left, const void *right);
static int vc_cmp_vtlinks(const void *left, const void *right);
static bool vc_enough_space(VPageDescr vpd, Size len);
void
@ -502,6 +507,8 @@ vc_vacone(Oid relid, bool analyze, List *va_cols)
/* we require the relation to be locked until the indices are cleaned */
LockRelation(onerel, AccessExclusiveLock);
GetXmaxRecent(&XmaxRecent);
/* scan it */
vacuum_pages.vpl_num_pages = fraged_pages.vpl_num_pages = 0;
vc_scanheap(vacrelstats, onerel, &vacuum_pages, &fraged_pages);
@ -595,6 +602,7 @@ vc_scanheap(VRelStats *vacrelstats, Relation onerel,
vp;
uint32 tups_vacuumed,
num_tuples,
nkeep,
nunused,
ncrash,
empty_pages,
@ -609,22 +617,24 @@ vc_scanheap(VRelStats *vacrelstats, Relation onerel,
struct rusage ru0,
ru1;
bool do_shrinking = true;
VTupleLink vtlinks = (VTupleLink) palloc(100 * sizeof(VTupleLinkData));
int num_vtlinks = 0;
int free_vtlinks = 100;
getrusage(RUSAGE_SELF, &ru0);
tups_vacuumed = num_tuples = nunused = ncrash = empty_pages =
relname = (RelationGetRelationName(onerel))->data;
elog(MESSAGE_LEVEL, "--Relation %s--", relname);
tups_vacuumed = num_tuples = nkeep = nunused = ncrash = empty_pages =
new_pages = changed_pages = empty_end_pages = 0;
free_size = usable_free_size = 0;
relname = (RelationGetRelationName(onerel))->data;
nblocks = RelationGetNumberOfBlocks(onerel);
vpc = (VPageDescr) palloc(sizeof(VPageDescrData) + MaxOffsetNumber * sizeof(OffsetNumber));
vpc->vpd_offsets_used = 0;
elog(MESSAGE_LEVEL, "--Relation %s--", relname);
for (blkno = 0; blkno < nblocks; blkno++)
{
buf = ReadBuffer(onerel, blkno);
@ -686,6 +696,34 @@ vc_scanheap(VRelStats *vacrelstats, Relation onerel,
{
if (tuple.t_data->t_infomask & HEAP_XMIN_INVALID)
tupgone = true;
else if (tuple.t_data->t_infomask & HEAP_MOVED_OFF)
{
if (TransactionIdDidCommit((TransactionId)
tuple.t_data->t_cmin))
{
tuple.t_data->t_infomask |= HEAP_XMIN_INVALID;
tupgone = true;
}
else
{
tuple.t_data->t_infomask |= HEAP_XMIN_COMMITTED;
pgchanged = true;
}
}
else if (tuple.t_data->t_infomask & HEAP_MOVED_IN)
{
if (!TransactionIdDidCommit((TransactionId)
tuple.t_data->t_cmin))
{
tuple.t_data->t_infomask |= HEAP_XMIN_INVALID;
tupgone = true;
}
else
{
tuple.t_data->t_infomask |= HEAP_XMIN_COMMITTED;
pgchanged = true;
}
}
else
{
if (TransactionIdDidAbort(tuple.t_data->t_xmin))
@ -722,22 +760,37 @@ vc_scanheap(VRelStats *vacrelstats, Relation onerel,
!(tuple.t_data->t_infomask & HEAP_XMAX_INVALID))
{
if (tuple.t_data->t_infomask & HEAP_XMAX_COMMITTED)
tupgone = true;
{
if (tuple.t_data->t_infomask & HEAP_MARKED_FOR_UPDATE)
{
pgchanged = true;
tuple.t_data->t_infomask |= HEAP_XMAX_INVALID;
}
else
tupgone = true;
}
else if (TransactionIdDidAbort(tuple.t_data->t_xmax))
{
tuple.t_data->t_infomask |= HEAP_XMAX_INVALID;
pgchanged = true;
}
else if (TransactionIdDidCommit(tuple.t_data->t_xmax))
tupgone = true;
{
if (tuple.t_data->t_infomask & HEAP_MARKED_FOR_UPDATE)
{
tuple.t_data->t_infomask |= HEAP_XMAX_INVALID;
pgchanged = true;
}
else
tupgone = true;
}
else if (!TransactionIdIsInProgress(tuple.t_data->t_xmax))
{
/*
* Not Aborted, Not Committed, Not in Progress - so it
* from crashed process. - vadim 06/02/97
*/
tuple.t_data->t_infomask |= HEAP_XMAX_INVALID;;
tuple.t_data->t_infomask |= HEAP_XMAX_INVALID;
pgchanged = true;
}
else
@ -746,6 +799,41 @@ vc_scanheap(VRelStats *vacrelstats, Relation onerel,
relname, blkno, offnum, tuple.t_data->t_xmax);
do_shrinking = false;
}
/*
* If tuple is recently deleted then
* we must not remove it from relation.
*/
if (tupgone && tuple.t_data->t_xmax >= XmaxRecent &&
tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED)
{
tupgone = false;
nkeep++;
if (!(tuple.t_data->t_infomask & HEAP_XMAX_COMMITTED))
{
tuple.t_data->t_infomask |= HEAP_XMAX_COMMITTED;
pgchanged = true;
}
/*
* If we do shrinking and this tuple is updated one
* then remember it to construct updated tuple
* dependencies.
*/
if (do_shrinking && !(ItemPointerEquals(&(tuple.t_self),
&(tuple.t_data->t_ctid))))
{
if (free_vtlinks == 0)
{
free_vtlinks = 1000;
vtlinks = (VTupleLink) repalloc(vtlinks,
(free_vtlinks + num_vtlinks) *
sizeof(VTupleLinkData));
}
vtlinks[num_vtlinks].new_tid = tuple.t_data->t_ctid;
vtlinks[num_vtlinks].this_tid = tuple.t_self;
free_vtlinks--;
num_vtlinks++;
}
}
}
/*
@ -859,13 +947,31 @@ vc_scanheap(VRelStats *vacrelstats, Relation onerel,
}
}
if (usable_free_size > 0 && num_vtlinks > 0)
{
qsort((char *) vtlinks, num_vtlinks, sizeof (VTupleLinkData),
vc_cmp_vtlinks);
vacrelstats->vtlinks = vtlinks;
vacrelstats->num_vtlinks = num_vtlinks;
}
else
{
vacrelstats->vtlinks = NULL;
vacrelstats->num_vtlinks = 0;
pfree(vtlinks);
}
getrusage(RUSAGE_SELF, &ru1);
elog(MESSAGE_LEVEL, "Pages %u: Changed %u, Reapped %u, Empty %u, New %u; \
Tup %u: Vac %u, Crash %u, UnUsed %u, MinLen %u, MaxLen %u; Re-using: Free/Avail. Space %u/%u; EndEmpty/Avail. Pages %u/%u. Elapsed %u/%u sec.",
nblocks, changed_pages, vacuum_pages->vpl_num_pages, empty_pages, new_pages,
num_tuples, tups_vacuumed, ncrash, nunused, min_tlen, max_tlen,
free_size, usable_free_size, empty_end_pages, fraged_pages->vpl_num_pages,
Tup %u: Vac %u, Keep/VTL %u/%u, Crash %u, UnUsed %u, MinLen %u, MaxLen %u; \
Re-using: Free/Avail. Space %u/%u; EndEmpty/Avail. Pages %u/%u. \
Elapsed %u/%u sec.",
nblocks, changed_pages, vacuum_pages->vpl_num_pages, empty_pages,
new_pages, num_tuples, tups_vacuumed,
nkeep, vacrelstats->num_vtlinks, ncrash,
nunused, min_tlen, max_tlen, free_size, usable_free_size,
empty_end_pages, fraged_pages->vpl_num_pages,
ru1.ru_stime.tv_sec - ru0.ru_stime.tv_sec,
ru1.ru_utime.tv_sec - ru0.ru_utime.tv_sec);
@ -917,7 +1023,7 @@ vc_rpfheap(VRelStats *vacrelstats, Relation onerel,
*idcur;
int last_fraged_block,
last_vacuum_block,
i;
i = 0;
Size tuple_len;
int num_moved,
num_fraged_pages,
@ -1022,6 +1128,280 @@ vc_rpfheap(VRelStats *vacrelstats, Relation onerel,
tuple_len = tuple.t_len = ItemIdGetLength(itemid);
ItemPointerSet(&(tuple.t_self), blkno, offnum);
if (!(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED))
{
if ((TransactionId)tuple.t_data->t_cmin != myXID)
elog(ERROR, "Invalid XID in t_cmin");
if (tuple.t_data->t_infomask & HEAP_MOVED_OFF)
continue; /* already removed by me */
if (tuple.t_data->t_infomask & HEAP_MOVED_IN)
break;
elog(ERROR, "HEAP_MOVED_OFF/HEAP_MOVED_IN was expected");
}
/*
* If this tuple is in the chain of tuples created in
* updates by "recent" transactions then we have to
* move all chain of tuples to another places.
*/
if ((tuple.t_data->t_infomask & HEAP_UPDATED &&
tuple.t_data->t_xmin >= XmaxRecent) ||
(!(tuple.t_data->t_infomask & HEAP_XMAX_INVALID) &&
!(ItemPointerEquals(&(tuple.t_self), &(tuple.t_data->t_ctid)))))
{
Buffer Cbuf = buf;
Page Cpage;
ItemId Citemid;
ItemPointerData Ctid;
HeapTupleData tp = tuple;
Size tlen = tuple_len;
VTupleMove vtmove = (VTupleMove)
palloc(100 * sizeof(VTupleMoveData));
int num_vtmove = 0;
int free_vtmove = 100;
VPageDescr to_vpd = fraged_pages->vpl_pagedesc[0];
int to_item = 0;
bool freeCbuf = false;
int ti;
if (vacrelstats->vtlinks == NULL)
elog(ERROR, "No one parent tuple was found");
if (cur_buffer != InvalidBuffer)
{
WriteBuffer(cur_buffer);
cur_buffer = InvalidBuffer;
}
/*
* If this tuple is in the begin/middle of the chain
* then we have to move to the end of chain.
*/
while (!(tp.t_data->t_infomask & HEAP_XMAX_INVALID) &&
!(ItemPointerEquals(&(tp.t_self), &(tp.t_data->t_ctid))))
{
Ctid = tp.t_data->t_ctid;
if (freeCbuf)
ReleaseBuffer(Cbuf);
freeCbuf = true;
Cbuf = ReadBuffer(onerel,
ItemPointerGetBlockNumber(&Ctid));
Cpage = BufferGetPage(Cbuf);
Citemid = PageGetItemId(Cpage,
ItemPointerGetOffsetNumber(&Ctid));
if (!ItemIdIsUsed(Citemid))
elog(ERROR, "Child itemid marked as unused");
tp.t_data = (HeapTupleHeader) PageGetItem(Cpage, Citemid);
tp.t_self = Ctid;
tlen = tp.t_len = ItemIdGetLength(Citemid);
}
/* first, can chain be moved ? */
for ( ; ; )
{
if (!vc_enough_space(to_vpd, tlen))
{
if (to_vpd != last_fraged_page &&
!vc_enough_space(to_vpd, vacrelstats->min_tlen))
{
Assert(num_fraged_pages > to_item + 1);
memmove(fraged_pages->vpl_pagedesc + to_item,
fraged_pages->vpl_pagedesc + to_item + 1,
sizeof(VPageDescr *) * (num_fraged_pages - to_item - 1));
num_fraged_pages--;
Assert(last_fraged_page == fraged_pages->vpl_pagedesc[num_fraged_pages - 1]);
}
for (i = 0; i < num_fraged_pages; i++)
{
if (vc_enough_space(fraged_pages->vpl_pagedesc[i], tlen))
break;
}
if (i == num_fraged_pages) /* can't move item anywhere */
{
for (i = 0; i < num_vtmove; i++)
{
Assert(vtmove[i].vpd->vpd_offsets_used > 0);
(vtmove[i].vpd->vpd_offsets_used)--;
}
num_vtmove = 0;
break;
}
to_item = i;
to_vpd = fraged_pages->vpl_pagedesc[to_item];
}
to_vpd->vpd_free -= DOUBLEALIGN(tlen);
if (to_vpd->vpd_offsets_used >= to_vpd->vpd_offsets_free)
to_vpd->vpd_free -= DOUBLEALIGN(sizeof(ItemIdData));
(to_vpd->vpd_offsets_used)++;
if (free_vtmove == 0)
{
free_vtmove = 1000;
vtmove = (VTupleMove) repalloc(vtmove,
(free_vtmove + num_vtmove) *
sizeof(VTupleMoveData));
}
vtmove[num_vtmove].tid = tp.t_self;
vtmove[num_vtmove].vpd = to_vpd;
if (to_vpd->vpd_offsets_used == 1)
vtmove[num_vtmove].cleanVpd = true;
else
vtmove[num_vtmove].cleanVpd = false;
free_vtmove--;
num_vtmove++;
/*
* All done ?
*/
if (!(tp.t_data->t_infomask & HEAP_UPDATED) ||
tp.t_data->t_xmin < XmaxRecent)
break;
/*
* Well, try to find tuple with old row version
*/
for ( ; ; )
{
Buffer Pbuf;
Page Ppage;
ItemId Pitemid;
HeapTupleData Ptp;
VTupleLinkData vtld,
*vtlp;
vtld.new_tid = tp.t_self;
vtlp = (VTupleLink)
vc_find_eq((void *) (vacrelstats->vtlinks),
vacrelstats->num_vtlinks,
sizeof(VTupleLinkData),
(void *) &vtld,
vc_cmp_vtlinks);
if (vtlp == NULL)
elog(ERROR, "Parent tuple was not found");
tp.t_self = vtlp->this_tid;
Pbuf = ReadBuffer(onerel,
ItemPointerGetBlockNumber(&(tp.t_self)));
Ppage = BufferGetPage(Pbuf);
Pitemid = PageGetItemId(Ppage,
ItemPointerGetOffsetNumber(&(tp.t_self)));
if (!ItemIdIsUsed(Pitemid))
elog(ERROR, "Parent itemid marked as unused");
Ptp.t_data = (HeapTupleHeader) PageGetItem(Ppage, Pitemid);
Assert(Ptp.t_data->t_xmax == tp.t_data->t_xmin);
/*
* If this tuple is updated version of row and
* it was created by the same transaction then
* no one is interested in this tuple -
* mark it as removed.
*/
if (Ptp.t_data->t_infomask & HEAP_UPDATED &&
Ptp.t_data->t_xmin == Ptp.t_data->t_xmax)
{
TransactionIdStore(myXID,
(TransactionId*) &(Ptp.t_data->t_cmin));
Ptp.t_data->t_infomask &=
~(HEAP_XMIN_COMMITTED|HEAP_XMIN_INVALID|HEAP_MOVED_IN);
Ptp.t_data->t_infomask |= HEAP_MOVED_OFF;
WriteBuffer(Pbuf);
continue;
}
tp.t_data = Ptp.t_data;
tlen = tp.t_len = ItemIdGetLength(Pitemid);
if (freeCbuf)
ReleaseBuffer(Cbuf);
Cbuf = Pbuf;
freeCbuf = true;
break;
}
}
if (freeCbuf)
ReleaseBuffer(Cbuf);
if (num_vtmove == 0) /* chain can't be moved */
{
pfree(vtmove);
break;
}
ItemPointerSetInvalid(&Ctid);
for (ti = 0; ti < num_vtmove; ti++)
{
/* Get tuple from chain */
tuple.t_self = vtmove[ti].tid;
Cbuf = ReadBuffer(onerel,
ItemPointerGetBlockNumber(&(tuple.t_self)));
Cpage = BufferGetPage(Cbuf);
Citemid = PageGetItemId(Cpage,
ItemPointerGetOffsetNumber(&(tuple.t_self)));
tuple.t_data = (HeapTupleHeader) PageGetItem(Cpage, Citemid);
tuple_len = tuple.t_len = ItemIdGetLength(Citemid);
/* Get page to move in */
cur_buffer = ReadBuffer(onerel, vtmove[ti].vpd->vpd_blkno);
ToPage = BufferGetPage(cur_buffer);
/* if this page was not used before - clean it */
if (!PageIsEmpty(ToPage) && vtmove[i].cleanVpd)
vc_vacpage(ToPage, vtmove[ti].vpd);
heap_copytuple_with_tuple(&tuple, &newtup);
RelationInvalidateHeapTuple(onerel, &tuple);
TransactionIdStore(myXID, (TransactionId*) &(newtup.t_data->t_cmin));
newtup.t_data->t_infomask &=
~(HEAP_XMIN_COMMITTED|HEAP_XMIN_INVALID|HEAP_MOVED_OFF);
newtup.t_data->t_infomask |= HEAP_MOVED_IN;
newoff = PageAddItem(ToPage, (Item) newtup.t_data, tuple_len,
InvalidOffsetNumber, LP_USED);
if (newoff == InvalidOffsetNumber)
{
elog(ERROR, "\
moving chain: failed to add item with len = %u to page %u",
tuple_len, vtmove[ti].vpd->vpd_blkno);
}
newitemid = PageGetItemId(ToPage, newoff);
pfree(newtup.t_data);
newtup.t_data = (HeapTupleHeader) PageGetItem(ToPage, newitemid);
ItemPointerSet(&(newtup.t_self), vtmove[i].vpd->vpd_blkno, newoff);
/*
* Set t_ctid pointing to itself for last tuple in
* chain and to next tuple in chain otherwise.
*/
if (!ItemPointerIsValid(&Ctid))
newtup.t_data->t_ctid = newtup.t_self;
else
newtup.t_data->t_ctid = Ctid;
Ctid = newtup.t_self;
TransactionIdStore(myXID, (TransactionId*) &(tuple.t_data->t_cmin));
tuple.t_data->t_infomask &=
~(HEAP_XMIN_COMMITTED|HEAP_XMIN_INVALID|HEAP_MOVED_IN);
tuple.t_data->t_infomask |= HEAP_MOVED_OFF;
num_moved++;
if (Cbuf == buf)
vpc->vpd_offsets[vpc->vpd_offsets_free++] =
ItemPointerGetOffsetNumber(&(tuple.t_self));
if (Irel != (Relation *) NULL)
{
for (i = 0, idcur = Idesc; i < nindices; i++, idcur++)
{
FormIndexDatum(idcur->natts,
(AttrNumber *) &(idcur->tform->indkey[0]),
&newtup,
tupdesc,
idatum,
inulls,
idcur->finfoP);
iresult = index_insert(Irel[i],
idatum,
inulls,
&newtup.t_self,
onerel);
if (iresult)
pfree(iresult);
}
}
WriteBuffer(cur_buffer);
if (Cbuf == buf)
ReleaseBuffer(Cbuf);
else
WriteBuffer(Cbuf);
}
cur_buffer = InvalidBuffer;
pfree(vtmove);
continue;
}
/* try to find new page for this tuple */
if (cur_buffer == InvalidBuffer ||
!vc_enough_space(cur_page, tuple_len))
@ -1070,13 +1450,14 @@ vc_rpfheap(VRelStats *vacrelstats, Relation onerel,
RelationInvalidateHeapTuple(onerel, &tuple);
/* store transaction information */
TransactionIdStore(myXID, &(newtup.t_data->t_xmin));
newtup.t_data->t_cmin = myCID;
StoreInvalidTransactionId(&(newtup.t_data->t_xmax));
/* set xmin to unknown and xmax to invalid */
newtup.t_data->t_infomask &= ~(HEAP_XACT_MASK);
newtup.t_data->t_infomask |= HEAP_XMAX_INVALID;
/*
* Mark new tuple as moved_in by vacuum and
* store vacuum XID in t_cmin !!!
*/
TransactionIdStore(myXID, (TransactionId*) &(newtup.t_data->t_cmin));
newtup.t_data->t_infomask &=
~(HEAP_XMIN_COMMITTED|HEAP_XMIN_INVALID|HEAP_MOVED_OFF);
newtup.t_data->t_infomask |= HEAP_MOVED_IN;
/* add tuple to the page */
newoff = PageAddItem(ToPage, (Item) newtup.t_data, tuple_len,
@ -1094,11 +1475,14 @@ failed to add item with len = %u to page %u (free space %u, nusd %u, noff %u)",
ItemPointerSet(&(newtup.t_data->t_ctid), cur_page->vpd_blkno, newoff);
newtup.t_self = newtup.t_data->t_ctid;
/* now logically delete end-tuple */
TransactionIdStore(myXID, &(tuple.t_data->t_xmax));
tuple.t_data->t_cmax = myCID;
/* set xmax to unknown */
tuple.t_data->t_infomask &= ~(HEAP_XMAX_INVALID | HEAP_XMAX_COMMITTED);
/*
* Mark old tuple as moved_off by vacuum and
* store vacuum XID in t_cmin !!!
*/
TransactionIdStore(myXID, (TransactionId*) &(tuple.t_data->t_cmin));
tuple.t_data->t_infomask &=
~(HEAP_XMIN_COMMITTED|HEAP_XMIN_INVALID|HEAP_MOVED_IN);
tuple.t_data->t_infomask |= HEAP_MOVED_OFF;
cur_page->vpd_offsets_used++;
num_moved++;
@ -1131,6 +1515,8 @@ failed to add item with len = %u to page %u (free space %u, nusd %u, noff %u)",
if (vpc->vpd_offsets_free > 0) /* some tuples were moved */
{
qsort((char *) (vpc->vpd_offsets), vpc->vpd_offsets_free,
sizeof(OffsetNumber), vc_cmp_offno);
vc_reappage(&Nvpl, vpc);
WriteBuffer(buf);
}
@ -1167,7 +1553,7 @@ failed to add item with len = %u to page %u (free space %u, nusd %u, noff %u)",
}
/*
* Clean uncleaned reapped pages from vacuum_pages list and set xmin
* Clean uncleaned reapped pages from vacuum_pages list list and set xmin
* committed for inserted tuples
*/
checked_moved = 0;
@ -1178,16 +1564,10 @@ failed to add item with len = %u to page %u (free space %u, nusd %u, noff %u)",
page = BufferGetPage(buf);
if ((*vpp)->vpd_offsets_used == 0) /* this page was not used */
{
/*
* noff == 0 in empty pages only - such pages should be
* re-used
*/
Assert((*vpp)->vpd_offsets_free > 0);
vc_vacpage(page, *vpp);
if (!PageIsEmpty(page))
vc_vacpage(page, *vpp);
}
else
/* this page was used */
else /* this page was used */
{
num_tuples = 0;
max_offset = PageGetMaxOffsetNumber(page);
@ -1199,10 +1579,19 @@ failed to add item with len = %u to page %u (free space %u, nusd %u, noff %u)",
if (!ItemIdIsUsed(itemid))
continue;
tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
if (TransactionIdEquals((TransactionId) tuple.t_data->t_xmin, myXID))
if (!(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED))
{
tuple.t_data->t_infomask |= HEAP_XMIN_COMMITTED;
num_tuples++;
if ((TransactionId)tuple.t_data->t_cmin != myXID)
elog(ERROR, "Invalid XID in t_cmin (2)");
if (tuple.t_data->t_infomask & HEAP_MOVED_IN)
{
tuple.t_data->t_infomask |= HEAP_XMIN_COMMITTED;
num_tuples++;
}
else if (tuple.t_data->t_infomask & HEAP_MOVED_OFF)
tuple.t_data->t_infomask |= HEAP_XMIN_INVALID;
else
elog(ERROR, "HEAP_MOVED_OFF/HEAP_MOVED_IN was expected (2)");
}
}
Assert((*vpp)->vpd_offsets_used == num_tuples);
@ -1244,16 +1633,13 @@ Elapsed %u/%u sec.",
}
/*
* clean moved tuples from last page in Nvpl list if some tuples
* left there
* clean moved tuples from last page in Nvpl list
*/
if (vpc->vpd_offsets_free > 0 && offnum <= maxoff)
if (vpc->vpd_blkno == blkno - 1 && vpc->vpd_offsets_free > 0)
{
Assert(vpc->vpd_blkno == blkno - 1);
buf = ReadBuffer(onerel, vpc->vpd_blkno);
page = BufferGetPage(buf);
num_tuples = 0;
maxoff = offnum;
for (offnum = FirstOffsetNumber;
offnum < maxoff;
offnum = OffsetNumberNext(offnum))
@ -1262,9 +1648,20 @@ Elapsed %u/%u sec.",
if (!ItemIdIsUsed(itemid))
continue;
tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
Assert(TransactionIdEquals((TransactionId) tuple.t_data->t_xmax, myXID));
itemid->lp_flags &= ~LP_USED;
num_tuples++;
if (!(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED))
{
if ((TransactionId)tuple.t_data->t_cmin != myXID)
elog(ERROR, "Invalid XID in t_cmin (3)");
if (tuple.t_data->t_infomask & HEAP_MOVED_OFF)
{
itemid->lp_flags &= ~LP_USED;
num_tuples++;
}
else
elog(ERROR, "HEAP_MOVED_OFF was expected");
}
}
Assert(vpc->vpd_offsets_free == num_tuples);
PageRepairFragmentation(page);
@ -1298,6 +1695,8 @@ Elapsed %u/%u sec.",
}
pfree(vpc);
if (vacrelstats->vtlinks != NULL)
pfree(vacrelstats->vtlinks);
} /* vc_rpfheap */
@ -1522,8 +1921,8 @@ vc_tidreapped(ItemPointer itemptr, VPageList vpl)
ioffno = ItemPointerGetOffsetNumber(itemptr);
vp = &vpd;
vpp = (VPageDescr *) vc_find_eq((char *) (vpl->vpl_pagedesc),
vpl->vpl_num_pages, sizeof(VPageDescr), (char *) &vp,
vpp = (VPageDescr *) vc_find_eq((void *) (vpl->vpl_pagedesc),
vpl->vpl_num_pages, sizeof(VPageDescr), (void *) &vp,
vc_cmp_blk);
if (vpp == (VPageDescr *) NULL)
@ -1537,8 +1936,8 @@ vc_tidreapped(ItemPointer itemptr, VPageList vpl)
return vp;
}
voff = (OffsetNumber *) vc_find_eq((char *) (vp->vpd_offsets),
vp->vpd_offsets_free, sizeof(OffsetNumber), (char *) &ioffno,
voff = (OffsetNumber *) vc_find_eq((void *) (vp->vpd_offsets),
vp->vpd_offsets_free, sizeof(OffsetNumber), (void *) &ioffno,
vc_cmp_offno);
if (voff == (OffsetNumber *) NULL)
@ -1998,8 +2397,9 @@ vc_free(VRelList vrl)
MemoryContextSwitchTo(old);
}
static char *
vc_find_eq(char *bot, int nelem, int size, char *elm, int (*compar) (char *, char *))
static void *
vc_find_eq(void *bot, int nelem, int size, void *elm,
int (*compar) (const void *, const void *))
{
int res;
int last = nelem - 1;
@ -2053,7 +2453,7 @@ vc_find_eq(char *bot, int nelem, int size, char *elm, int (*compar) (char *, cha
} /* vc_find_eq */
static int
vc_cmp_blk(char *left, char *right)
vc_cmp_blk(const void *left, const void *right)
{
BlockNumber lblk,
rblk;
@ -2070,7 +2470,7 @@ vc_cmp_blk(char *left, char *right)
} /* vc_cmp_blk */
static int
vc_cmp_offno(char *left, char *right)
vc_cmp_offno(const void *left, const void *right)
{
if (*(OffsetNumber *) left < *(OffsetNumber *) right)
@ -2081,6 +2481,33 @@ vc_cmp_offno(char *left, char *right)
} /* vc_cmp_offno */
static int
vc_cmp_vtlinks(const void *left, const void *right)
{
if (((VTupleLink)left)->new_tid.ip_blkid.bi_hi <
((VTupleLink)right)->new_tid.ip_blkid.bi_hi)
return -1;
if (((VTupleLink)left)->new_tid.ip_blkid.bi_hi >
((VTupleLink)right)->new_tid.ip_blkid.bi_hi)
return 1;
/* bi_hi-es are equal */
if (((VTupleLink)left)->new_tid.ip_blkid.bi_lo <
((VTupleLink)right)->new_tid.ip_blkid.bi_lo)
return -1;
if (((VTupleLink)left)->new_tid.ip_blkid.bi_lo >
((VTupleLink)right)->new_tid.ip_blkid.bi_lo)
return 1;
/* bi_lo-es are equal */
if (((VTupleLink)left)->new_tid.ip_posid <
((VTupleLink)right)->new_tid.ip_posid)
return -1;
if (((VTupleLink)left)->new_tid.ip_posid >
((VTupleLink)right)->new_tid.ip_posid)
return 1;
return 0;
}
static void
vc_getindices(Oid relid, int *nindices, Relation **Irel)
@ -2230,7 +2657,7 @@ vc_enough_space(VPageDescr vpd, Size len)
return true; /* and len <= free_space */
/* ok. noff_usd >= noff_free and so we'll have to allocate new itemid */
if (len <= vpd->vpd_free - sizeof(ItemIdData))
if (len + DOUBLEALIGN(sizeof(ItemIdData)) <= vpd->vpd_free)
return true;
return false;

View File

@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.49 1999/02/21 03:49:21 scrappy Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.50 1999/03/28 20:32:17 vadim Exp $
*
*-------------------------------------------------------------------------
*/
@ -95,7 +95,7 @@ static BufferDesc *BufferAlloc(Relation reln, BlockNumber blockNum,
static int FlushBuffer(Buffer buffer, bool release);
static void BufferSync(void);
static int BufferReplace(BufferDesc *bufHdr, bool bufferLockHeld);
static void PrintBufferDescs(void);
void PrintBufferDescs(void);
/* not static but used by vacuum only ... */
int BlowawayRelationBuffers(Relation rel, BlockNumber block);
@ -1208,23 +1208,24 @@ int
BufferPoolCheckLeak()
{
int i;
int error = 0;
int result = 0;
for (i = 1; i <= NBuffers; i++)
{
if (BufferIsValid(i))
{
BufferDesc *buf = &(BufferDescriptors[i - 1]);
elog(NOTICE,
"buffer leak [%d] detected in BufferPoolCheckLeak()", i - 1);
error = 1;
"Buffer Leak: [%03d] (freeNext=%d, freePrev=%d, \
relname=%s, blockNum=%d, flags=0x%x, refcount=%d %d)",
i - 1, buf->freeNext, buf->freePrev,
buf->sb_relname, buf->tag.blockNum, buf->flags,
buf->refcount, PrivateRefCount[i - 1]);
result = 1;
}
}
if (error)
{
PrintBufferDescs();
return 1;
}
return 0;
return (result);
}
/* ------------------------------------------------
@ -1465,7 +1466,7 @@ DropBuffers(Oid dbid)
* use only.
* -----------------------------------------------------------------
*/
static void
void
PrintBufferDescs()
{
int i;
@ -1474,16 +1475,14 @@ PrintBufferDescs()
if (IsUnderPostmaster)
{
SpinAcquire(BufMgrLock);
#ifdef NOT_USED
for (i = 0; i < NBuffers; ++i, ++buf)
{
elog(NOTICE, "[%02d] (freeNext=%d, freePrev=%d, relname=%s, \
elog(DEBUG, "[%02d] (freeNext=%d, freePrev=%d, relname=%s, \
blockNum=%d, flags=0x%x, refcount=%d %d)",
i, buf->freeNext, buf->freePrev,
buf->sb_relname, buf->tag.blockNum, buf->flags,
buf->refcount, PrivateRefCount[i]);
}
#endif
SpinRelease(BufMgrLock);
}
else

View File

@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/shmem.c,v 1.37 1999/02/22 06:16:48 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/shmem.c,v 1.38 1999/03/28 20:32:22 vadim Exp $
*
*-------------------------------------------------------------------------
*/
@ -636,12 +636,13 @@ TransactionIdIsInProgress(TransactionId xid)
Snapshot
GetSnapshotData(bool serializable)
{
Snapshot snapshot = (Snapshot) malloc(sizeof(SnapshotData));
ShmemIndexEnt *result;
PROC *proc;
TransactionId cid = GetCurrentTransactionId();
uint32 count = 0;
uint32 have = 32;
Snapshot snapshot = (Snapshot) malloc(sizeof(SnapshotData));
ShmemIndexEnt *result;
PROC *proc;
TransactionId cid = GetCurrentTransactionId();
TransactionId xid;
uint32 count = 0;
uint32 have = 32;
Assert(ShmemIndex);
@ -669,19 +670,20 @@ GetSnapshotData(bool serializable)
strncmp(result->key, "PID ", 4) != 0)
continue;
proc = (PROC *) MAKE_PTR(result->location);
if (proc == MyProc || proc->xid < FirstTransactionId)
xid = proc->xid; /* we don't use spin-locking in xact.c ! */
if (proc == MyProc || xid < FirstTransactionId)
continue;
if (proc->xid < snapshot->xmin)
snapshot->xmin = proc->xid;
else if (proc->xid > snapshot->xmax)
snapshot->xmax = proc->xid;
if (xid < snapshot->xmin)
snapshot->xmin = xid;
else if (xid > snapshot->xmax)
snapshot->xmax = xid;
if (have == 0)
{
snapshot->xip = (TransactionId *) realloc(snapshot->xip,
(count + 32) * sizeof(TransactionId));
have = 32;
}
snapshot->xip[count] = proc->xid;
snapshot->xip[count] = xid;
have--;
count++;
}
@ -692,3 +694,48 @@ GetSnapshotData(bool serializable)
elog(ERROR, "GetSnapshotData: ShmemIndex corrupted");
return NULL;
}
/*
* GetXmaxRecent -- returns oldest transaction that was running
* when all current transaction was started.
* It's used by vacuum to decide what deleted
* tuples must be preserved in a table.
*
* And yet another strange func for this place... - vadim 03/18/99
*/
void
GetXmaxRecent(TransactionId *XmaxRecent)
{
ShmemIndexEnt *result;
PROC *proc;
TransactionId xmin;
Assert(ShmemIndex);
ReadNewTransactionId(XmaxRecent);
SpinAcquire(ShmemIndexLock);
hash_seq((HTAB *) NULL);
while ((result = (ShmemIndexEnt *) hash_seq(ShmemIndex)) != NULL)
{
if (result == (ShmemIndexEnt *) TRUE)
{
SpinRelease(ShmemIndexLock);
return;
}
if (result->location == INVALID_OFFSET ||
strncmp(result->key, "PID ", 4) != 0)
continue;
proc = (PROC *) MAKE_PTR(result->location);
xmin = proc->xmin; /* we don't use spin-locking in xact.c ! */
if (proc == MyProc || xmin < FirstTransactionId)
continue;
if (xmin < *XmaxRecent)
*XmaxRecent = xmin;
}
SpinRelease(ShmemIndexLock);
elog(ERROR, "GetXmaxRecent: ShmemIndex corrupted");
return NULL;
}

View File

@ -12,7 +12,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/Attic/multi.c,v 1.27 1999/02/13 23:18:27 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/Attic/multi.c,v 1.28 1999/03/28 20:32:25 vadim Exp $
*
* NOTES:
* (1) The lock.c module assumes that the caller here is doing
@ -34,55 +34,6 @@ static bool MultiAcquire(LOCKMETHOD lockmethod, LOCKTAG *tag,
static bool MultiRelease(LOCKMETHOD lockmethod, LOCKTAG *tag,
LOCKMODE lockmode, PG_LOCK_LEVEL level);
#ifdef LowLevelLocking
static MASK MultiConflicts[] = {
(int) NULL,
/* RowShareLock */
(1 << ExclusiveLock),
/* RowExclusiveLock */
(1 << ExclusiveLock) | (1 << ShareRowExclusiveLock) | (1 << ShareLock),
/* ShareLock */
(1 << ExclusiveLock) | (1 << ShareRowExclusiveLock) |
(1 << RowExclusiveLock),
/* ShareRowExclusiveLock */
(1 << ExclusiveLock) | (1 << ShareRowExclusiveLock) |
(1 << ShareLock) | (1 << RowExclusiveLock),
/* ExclusiveLock */
(1 << ExclusiveLock) | (1 << ShareRowExclusiveLock) | (1 << ShareLock) |
(1 << RowExclusiveLock) | (1 << RowShareLock),
/* ObjShareLock */
(1 << ObjExclusiveLock),
/* ObjExclusiveLock */
(1 << ObjExclusiveLock) | (1 << ObjShareLock),
/* ExtendLock */
(1 << ExtendLock)
};
/*
* write locks have higher priority than read locks and extend locks. May
* want to treat INTENT locks differently.
*/
static int MultiPrios[] = {
(int) NULL,
2,
1,
2,
1,
1
};
#else
/*
* INTENT indicates to higher level that a lower level lock has been
* set. For example, a write lock on a tuple conflicts with a write
@ -121,8 +72,6 @@ static int MultiPrios[] = {
1
};
#endif /* !LowLevelLocking */
/*
* Lock table identifier for this lock table. The multi-level
* lock table is ONE lock table, not three.

View File

@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.51 1999/02/21 01:41:45 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.52 1999/03/28 20:32:26 vadim Exp $
*
*-------------------------------------------------------------------------
*/
@ -46,7 +46,7 @@
* This is so that we can support more backends. (system-wide semaphore
* sets run out pretty fast.) -ay 4/95
*
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.51 1999/02/21 01:41:45 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.52 1999/03/28 20:32:26 vadim Exp $
*/
#include <sys/time.h>
#include <unistd.h>
@ -300,9 +300,7 @@ InitProcess(IPCKey key)
MyProc->pid = MyProcPid;
MyProc->xid = InvalidTransactionId;
#ifdef LowLevelLocking
MyProc->xmin = InvalidTransactionId;
#endif
/* ----------------
* Start keeping spin lock stats from here on. Any botch before

View File

@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/time/tqual.c,v 1.25 1999/02/13 23:20:19 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/utils/time/tqual.c,v 1.26 1999/03/28 20:32:29 vadim Exp $
*
*-------------------------------------------------------------------------
*/
@ -86,11 +86,26 @@ HeapTupleSatisfiesItself(HeapTupleHeader tuple)
if (!(tuple->t_infomask & HEAP_XMIN_COMMITTED))
{
if (tuple->t_infomask & HEAP_XMIN_INVALID) /* xid invalid or
* aborted */
if (tuple->t_infomask & HEAP_XMIN_INVALID)
return false;
if (TransactionIdIsCurrentTransactionId(tuple->t_xmin))
if (tuple->t_infomask & HEAP_MOVED_OFF)
{
if (TransactionIdDidCommit((TransactionId)tuple->t_cmin))
{
tuple->t_infomask |= HEAP_XMIN_INVALID;
return false;
}
}
else if (tuple->t_infomask & HEAP_MOVED_IN)
{
if (!TransactionIdDidCommit((TransactionId)tuple->t_cmin))
{
tuple->t_infomask |= HEAP_XMIN_INVALID;
return false;
}
}
else if (TransactionIdIsCurrentTransactionId(tuple->t_xmin))
{
if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
return true;
@ -98,14 +113,12 @@ HeapTupleSatisfiesItself(HeapTupleHeader tuple)
return true;
return false;
}
if (!TransactionIdDidCommit(tuple->t_xmin))
else if (!TransactionIdDidCommit(tuple->t_xmin))
{
if (TransactionIdDidAbort(tuple->t_xmin))
tuple->t_infomask |= HEAP_XMIN_INVALID; /* aborted */
return false;
}
tuple->t_infomask |= HEAP_XMIN_COMMITTED;
}
/* the tuple was inserted validly */
@ -200,11 +213,26 @@ HeapTupleSatisfiesNow(HeapTupleHeader tuple)
if (!(tuple->t_infomask & HEAP_XMIN_COMMITTED))
{
if (tuple->t_infomask & HEAP_XMIN_INVALID) /* xid invalid or
* aborted */
if (tuple->t_infomask & HEAP_XMIN_INVALID)
return false;
if (TransactionIdIsCurrentTransactionId(tuple->t_xmin))
if (tuple->t_infomask & HEAP_MOVED_OFF)
{
if (TransactionIdDidCommit((TransactionId)tuple->t_cmin))
{
tuple->t_infomask |= HEAP_XMIN_INVALID;
return false;
}
}
else if (tuple->t_infomask & HEAP_MOVED_IN)
{
if (!TransactionIdDidCommit((TransactionId)tuple->t_cmin))
{
tuple->t_infomask |= HEAP_XMIN_INVALID;
return false;
}
}
else if (TransactionIdIsCurrentTransactionId(tuple->t_xmin))
{
if (CommandIdGEScanCommandId(tuple->t_cmin))
return false; /* inserted after scan started */
@ -222,18 +250,12 @@ HeapTupleSatisfiesNow(HeapTupleHeader tuple)
else
return false; /* deleted before scan started */
}
/*
* this call is VERY expensive - requires a log table lookup.
*/
if (!TransactionIdDidCommit(tuple->t_xmin))
else if (!TransactionIdDidCommit(tuple->t_xmin))
{
if (TransactionIdDidAbort(tuple->t_xmin))
tuple->t_infomask |= HEAP_XMIN_INVALID; /* aborted */
return false;
}
tuple->t_infomask |= HEAP_XMIN_COMMITTED;
}
@ -288,7 +310,23 @@ HeapTupleSatisfiesUpdate(HeapTuple tuple)
if (th->t_infomask & HEAP_XMIN_INVALID) /* xid invalid or aborted */
return HeapTupleInvisible;
if (TransactionIdIsCurrentTransactionId(th->t_xmin))
if (th->t_infomask & HEAP_MOVED_OFF)
{
if (TransactionIdDidCommit((TransactionId)th->t_cmin))
{
th->t_infomask |= HEAP_XMIN_INVALID;
return HeapTupleInvisible;
}
}
else if (th->t_infomask & HEAP_MOVED_IN)
{
if (!TransactionIdDidCommit((TransactionId)th->t_cmin))
{
th->t_infomask |= HEAP_XMIN_INVALID;
return HeapTupleInvisible;
}
}
else if (TransactionIdIsCurrentTransactionId(th->t_xmin))
{
if (CommandIdGEScanCommandId(th->t_cmin) && !heapisoverride())
return HeapTupleInvisible; /* inserted after scan started */
@ -306,19 +344,12 @@ HeapTupleSatisfiesUpdate(HeapTuple tuple)
else
return HeapTupleInvisible; /* updated before scan started */
}
/*
* This call is VERY expensive - requires a log table lookup.
* Actually, this should be done by query before...
*/
if (!TransactionIdDidCommit(th->t_xmin))
else if (!TransactionIdDidCommit(th->t_xmin))
{
if (TransactionIdDidAbort(th->t_xmin))
th->t_infomask |= HEAP_XMIN_INVALID; /* aborted */
return HeapTupleInvisible;
}
th->t_infomask |= HEAP_XMIN_COMMITTED;
}
@ -375,10 +406,26 @@ HeapTupleSatisfiesDirty(HeapTupleHeader tuple)
if (!(tuple->t_infomask & HEAP_XMIN_COMMITTED))
{
if (tuple->t_infomask & HEAP_XMIN_INVALID) /* xid invalid or aborted */
if (tuple->t_infomask & HEAP_XMIN_INVALID)
return false;
if (TransactionIdIsCurrentTransactionId(tuple->t_xmin))
if (tuple->t_infomask & HEAP_MOVED_OFF)
{
if (TransactionIdDidCommit((TransactionId)tuple->t_cmin))
{
tuple->t_infomask |= HEAP_XMIN_INVALID;
return false;
}
}
else if (tuple->t_infomask & HEAP_MOVED_IN)
{
if (!TransactionIdDidCommit((TransactionId)tuple->t_cmin))
{
tuple->t_infomask |= HEAP_XMIN_INVALID;
return false;
}
}
else if (TransactionIdIsCurrentTransactionId(tuple->t_xmin))
{
if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
return true;
@ -390,8 +437,7 @@ HeapTupleSatisfiesDirty(HeapTupleHeader tuple)
return false;
}
if (!TransactionIdDidCommit(tuple->t_xmin))
else if (!TransactionIdDidCommit(tuple->t_xmin))
{
if (TransactionIdDidAbort(tuple->t_xmin))
{
@ -401,7 +447,6 @@ HeapTupleSatisfiesDirty(HeapTupleHeader tuple)
SnapshotDirty->xmin = tuple->t_xmin;
return true; /* in insertion by other */
}
tuple->t_infomask |= HEAP_XMIN_COMMITTED;
}
@ -451,11 +496,26 @@ HeapTupleSatisfiesSnapshot(HeapTupleHeader tuple, Snapshot snapshot)
if (!(tuple->t_infomask & HEAP_XMIN_COMMITTED))
{
if (tuple->t_infomask & HEAP_XMIN_INVALID) /* xid invalid or
* aborted */
if (tuple->t_infomask & HEAP_XMIN_INVALID)
return false;
if (TransactionIdIsCurrentTransactionId(tuple->t_xmin))
if (tuple->t_infomask & HEAP_MOVED_OFF)
{
if (TransactionIdDidCommit((TransactionId)tuple->t_cmin))
{
tuple->t_infomask |= HEAP_XMIN_INVALID;
return false;
}
}
else if (tuple->t_infomask & HEAP_MOVED_IN)
{
if (!TransactionIdDidCommit((TransactionId)tuple->t_cmin))
{
tuple->t_infomask |= HEAP_XMIN_INVALID;
return false;
}
}
else if (TransactionIdIsCurrentTransactionId(tuple->t_xmin))
{
if (CommandIdGEScanCommandId(tuple->t_cmin))
return false; /* inserted after scan started */
@ -473,18 +533,12 @@ HeapTupleSatisfiesSnapshot(HeapTupleHeader tuple, Snapshot snapshot)
else
return false; /* deleted before scan started */
}
/*
* this call is VERY expensive - requires a log table lookup.
*/
if (!TransactionIdDidCommit(tuple->t_xmin))
else if (!TransactionIdDidCommit(tuple->t_xmin))
{
if (TransactionIdDidAbort(tuple->t_xmin))
tuple->t_infomask |= HEAP_XMIN_INVALID; /* aborted */
return false;
}
tuple->t_infomask |= HEAP_XMIN_COMMITTED;
}

View File

@ -6,7 +6,7 @@
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: htup.h,v 1.13 1999/02/13 23:20:54 momjian Exp $
* $Id: htup.h,v 1.14 1999/03/28 20:32:30 vadim Exp $
*
*-------------------------------------------------------------------------
*/
@ -117,8 +117,11 @@ typedef HeapTupleData *HeapTuple;
#define HEAP_XMAX_COMMITTED 0x0400 /* t_xmax committed */
#define HEAP_XMAX_INVALID 0x0800 /* t_xmax invalid/aborted */
#define HEAP_MARKED_FOR_UPDATE 0x1000 /* marked for UPDATE */
#define HEAP_UPDATED 0x2000 /* this is UPDATEd version of row */
#define HEAP_MOVED_OFF 0x4000 /* removed or moved to another place by vacuum */
#define HEAP_MOVED_IN 0x8000 /* moved from another place by vacuum */
#define HEAP_XACT_MASK 0x0F00 /* */
#define HEAP_XACT_MASK 0xFF00 /* */
#define HeapTupleNoNulls(tuple) \
(!(((HeapTuple) (tuple))->t_data->t_infomask & HEAP_HASNULL))

View File

@ -6,7 +6,7 @@
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: nbtree.h,v 1.23 1999/02/13 23:20:55 momjian Exp $
* $Id: nbtree.h,v 1.24 1999/03/28 20:32:34 vadim Exp $
*
*-------------------------------------------------------------------------
*/
@ -42,6 +42,7 @@ typedef struct BTPageOpaqueData
{
BlockNumber btpo_prev;
BlockNumber btpo_next;
BlockNumber btpo_parent;
uint16 btpo_flags;
#define BTP_LEAF (1 << 0)
@ -176,13 +177,6 @@ typedef struct BTPageState
#define BT_INSERTION 0
#define BT_DESCENT 1
/*
* We must classify index modification types for the benefit of
* _bt_adjscans.
*/
#define BT_INSERT 0
#define BT_DELETE 1
/*
* In general, the btree code tries to localize its knowledge about
* page layout to a couple of routines. However, we need a special
@ -268,7 +262,7 @@ extern void btdelete(Relation rel, ItemPointer tid);
*/
extern void _bt_regscan(IndexScanDesc scan);
extern void _bt_dropscan(IndexScanDesc scan);
extern void _bt_adjscans(Relation rel, ItemPointer tid, int op);
extern void _bt_adjscans(Relation rel, ItemPointer tid);
/*
* prototypes for functions in nbtsearch.c

View File

@ -6,7 +6,7 @@
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: vacuum.h,v 1.17 1999/02/13 23:21:20 momjian Exp $
* $Id: vacuum.h,v 1.18 1999/03/28 20:32:38 vadim Exp $
*
*-------------------------------------------------------------------------
*/
@ -95,16 +95,35 @@ typedef struct VRelListData
typedef VRelListData *VRelList;
typedef struct VTupleLinkData
{
ItemPointerData new_tid;
ItemPointerData this_tid;
} VTupleLinkData;
typedef VTupleLinkData *VTupleLink;
typedef struct VTupleMoveData
{
ItemPointerData tid; /* tuple ID */
VPageDescr vpd; /* where to move */
bool cleanVpd; /* clean vpd before using */
} VTupleMoveData;
typedef VTupleMoveData *VTupleMove;
typedef struct VRelStats
{
Oid relid;
int num_tuples;
int num_pages;
Size min_tlen;
Size max_tlen;
bool hasindex;
int va_natts; /* number of attrs being analyzed */
VacAttrStats *vacattrstats;
Oid relid;
int num_tuples;
int num_pages;
Size min_tlen;
Size max_tlen;
bool hasindex;
int va_natts; /* number of attrs being analyzed */
VacAttrStats *vacattrstats;
int num_vtlinks;
VTupleLink vtlinks;
} VRelStats;
extern bool VacuumRunning;