Repair subtle VACUUM bug that led to 'HEAP_MOVED_IN was not expected'

errors.  VACUUM normally compacts the table back-to-front, and stops
as soon as it gets to a page that it has moved some tuples onto.
(This logic doesn't make for a complete packing of the table, but it
should be pretty close.)  But the way it was checking whether it had
got to a page with some moved-in tuples was to look at whether the
current page was the same as the last page of the list of pages that
have enough free space to be move-in targets.  And there was other
code that would remove pages from that list once they got full.
There was a kluge that prevented the last list entry from being
removed, but it didn't get the job done.  Fixed by keeping a separate
variable that contains the largest block number into which a tuple
has been moved.  There's no longer any need to protect the last element
of the fraged_pages list.
Also, fix NOTICE messages to describe elapsed user/system CPU time
correctly.
This commit is contained in:
Tom Lane 2000-01-10 04:09:50 +00:00
parent b86ca72f39
commit fcb7c14d02
1 changed files with 111 additions and 77 deletions

View File

@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.133 1999/12/29 10:13:20 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.134 2000/01/10 04:09:50 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -95,6 +95,8 @@ static int vc_cmp_blk(const void *left, const void *right);
static int vc_cmp_offno(const void *left, const void *right);
static int vc_cmp_vtlinks(const void *left, const void *right);
static bool vc_enough_space(VPageDescr vpd, Size len);
static char *vc_show_rusage(struct rusage *ru0);
void
vacuum(char *vacrel, bool verbose, bool analyze, List *va_spec)
@ -637,12 +639,11 @@ vc_scanheap(VRelStats *vacrelstats, Relation onerel,
Size min_tlen = MaxTupleSize;
Size max_tlen = 0;
int32 i;
struct rusage ru0,
ru1;
bool do_shrinking = true;
VTupleLink vtlinks = (VTupleLink) palloc(100 * sizeof(VTupleLinkData));
int num_vtlinks = 0;
int free_vtlinks = 100;
struct rusage ru0;
getrusage(RUSAGE_SELF, &ru0);
@ -987,25 +988,21 @@ vc_scanheap(VRelStats *vacrelstats, Relation onerel,
pfree(vtlinks);
}
getrusage(RUSAGE_SELF, &ru1);
elog(MESSAGE_LEVEL, "Pages %u: Changed %u, Reapped %u, Empty %u, New %u; \
Tup %u: Vac %u, Keep/VTL %u/%u, Crash %u, UnUsed %u, MinLen %u, MaxLen %u; \
Re-using: Free/Avail. Space %u/%u; EndEmpty/Avail. Pages %u/%u. \
Elapsed %u/%u sec.",
Re-using: Free/Avail. Space %u/%u; EndEmpty/Avail. Pages %u/%u. %s",
nblocks, changed_pages, vacuum_pages->vpl_num_pages, empty_pages,
new_pages, num_tuples, tups_vacuumed,
nkeep, vacrelstats->num_vtlinks, ncrash,
nunused, min_tlen, max_tlen, free_size, usable_free_size,
empty_end_pages, fraged_pages->vpl_num_pages,
ru1.ru_stime.tv_sec - ru0.ru_stime.tv_sec,
ru1.ru_utime.tv_sec - ru0.ru_utime.tv_sec);
vc_show_rusage(&ru0));
} /* vc_scanheap */
/*
* vc_rpfheap() -- try to repaire relation' fragmentation
* vc_rpfheap() -- try to repair relation's fragmentation
*
* This routine marks dead tuples as unused and tries re-use dead space
* by moving tuples (and inserting indices if needed). It constructs
@ -1016,7 +1013,8 @@ Elapsed %u/%u sec.",
*/
static void
vc_rpfheap(VRelStats *vacrelstats, Relation onerel,
VPageList vacuum_pages, VPageList fraged_pages, int nindices, Relation *Irel)
VPageList vacuum_pages, VPageList fraged_pages,
int nindices, Relation *Irel)
{
TransactionId myXID;
CommandId myCID;
@ -1040,14 +1038,13 @@ vc_rpfheap(VRelStats *vacrelstats, Relation onerel,
InsertIndexResult iresult;
VPageListData Nvpl;
VPageDescr cur_page = NULL,
last_fraged_page,
last_vacuum_page,
vpc,
*vpp;
int cur_item = 0;
IndDesc *Idesc,
*idcur;
int last_fraged_block,
int last_move_dest_block = -1,
last_vacuum_block,
i = 0;
Size tuple_len;
@ -1060,8 +1057,7 @@ vc_rpfheap(VRelStats *vacrelstats, Relation onerel,
bool isempty,
dowrite,
chain_tuple_moved;
struct rusage ru0,
ru1;
struct rusage ru0;
getrusage(RUSAGE_SELF, &ru0);
@ -1078,26 +1074,32 @@ vc_rpfheap(VRelStats *vacrelstats, Relation onerel,
Nvpl.vpl_num_pages = 0;
num_fraged_pages = fraged_pages->vpl_num_pages;
last_fraged_page = fraged_pages->vpl_pagedesc[num_fraged_pages - 1];
last_fraged_block = last_fraged_page->vpd_blkno;
Assert(vacuum_pages->vpl_num_pages > vacuum_pages->vpl_empty_end_pages);
vacuumed_pages = vacuum_pages->vpl_num_pages - vacuum_pages->vpl_empty_end_pages;
last_vacuum_page = vacuum_pages->vpl_pagedesc[vacuumed_pages - 1];
last_vacuum_block = last_vacuum_page->vpd_blkno;
Assert(last_vacuum_block >= last_fraged_block);
cur_buffer = InvalidBuffer;
num_moved = 0;
vpc = (VPageDescr) palloc(sizeof(VPageDescrData) + MaxOffsetNumber * sizeof(OffsetNumber));
vpc->vpd_offsets_used = vpc->vpd_offsets_free = 0;
/*
* Scan pages backwards from the last nonempty page, trying to move
* tuples down to lower pages. Quit when we reach a page that we
* have moved any tuples onto. Note that if a page is still in the
* fraged_pages list (list of candidate move-target pages) when we
* reach it, we will remove it from the list. This ensures we never
* move a tuple up to a higher page number.
*
* NB: this code depends on the vacuum_pages and fraged_pages lists
* being in order, and on fraged_pages being a subset of vacuum_pages.
*/
nblocks = vacrelstats->num_pages;
for (blkno = nblocks - vacuum_pages->vpl_empty_end_pages - 1;; blkno--)
for (blkno = nblocks - vacuum_pages->vpl_empty_end_pages - 1;
blkno > last_move_dest_block;
blkno--)
{
/* if it's reapped page and it was used by me - quit */
if (blkno == last_fraged_block && last_fraged_page->vpd_offsets_used > 0)
break;
buf = ReadBuffer(onerel, blkno);
page = BufferGetPage(buf);
@ -1117,21 +1119,24 @@ vc_rpfheap(VRelStats *vacrelstats, Relation onerel,
else
Assert(isempty);
--vacuumed_pages;
Assert(vacuumed_pages > 0);
/* get prev reapped page from vacuum_pages */
last_vacuum_page = vacuum_pages->vpl_pagedesc[vacuumed_pages - 1];
last_vacuum_block = last_vacuum_page->vpd_blkno;
if (blkno == last_fraged_block) /* this page in
* fraged_pages too */
if (vacuumed_pages > 0)
{
--num_fraged_pages;
Assert(num_fraged_pages > 0);
Assert(last_fraged_page->vpd_offsets_used == 0);
/* get prev reapped page from fraged_pages */
last_fraged_page = fraged_pages->vpl_pagedesc[num_fraged_pages - 1];
last_fraged_block = last_fraged_page->vpd_blkno;
/* get prev reapped page from vacuum_pages */
last_vacuum_page = vacuum_pages->vpl_pagedesc[vacuumed_pages - 1];
last_vacuum_block = last_vacuum_page->vpd_blkno;
}
else
{
last_vacuum_page = NULL;
last_vacuum_block = -1;
}
if (num_fraged_pages > 0 &&
blkno ==
fraged_pages->vpl_pagedesc[num_fraged_pages-1]->vpd_blkno)
{
/* page is in fraged_pages too; remove it */
--num_fraged_pages;
}
Assert(last_fraged_block <= last_vacuum_block);
if (isempty)
{
ReleaseBuffer(buf);
@ -1217,10 +1222,10 @@ vc_rpfheap(VRelStats *vacrelstats, Relation onerel,
HeapTupleData tp = tuple;
Size tlen = tuple_len;
VTupleMove vtmove = (VTupleMove)
palloc(100 * sizeof(VTupleMoveData));
palloc(100 * sizeof(VTupleMoveData));
int num_vtmove = 0;
int free_vtmove = 100;
VPageDescr to_vpd = fraged_pages->vpl_pagedesc[0];
VPageDescr to_vpd = NULL;
int to_item = 0;
bool freeCbuf = false;
int ti;
@ -1276,17 +1281,20 @@ vc_rpfheap(VRelStats *vacrelstats, Relation onerel,
/* first, can chain be moved ? */
for (;;)
{
if (!vc_enough_space(to_vpd, tlen))
if (to_vpd == NULL ||
!vc_enough_space(to_vpd, tlen))
{
if (to_vpd != last_fraged_page &&
!vc_enough_space(to_vpd, vacrelstats->min_tlen))
/* if to_vpd no longer has enough free space to be
* useful, remove it from fraged_pages list
*/
if (to_vpd != NULL &&
!vc_enough_space(to_vpd, vacrelstats->min_tlen))
{
Assert(num_fraged_pages > to_item + 1);
Assert(num_fraged_pages > to_item);
memmove(fraged_pages->vpl_pagedesc + to_item,
fraged_pages->vpl_pagedesc + to_item + 1,
sizeof(VPageDescr *) * (num_fraged_pages - to_item - 1));
fraged_pages->vpl_pagedesc + to_item + 1,
sizeof(VPageDescr) * (num_fraged_pages - to_item - 1));
num_fraged_pages--;
Assert(last_fraged_page == fraged_pages->vpl_pagedesc[num_fraged_pages - 1]);
}
for (i = 0; i < num_fraged_pages; i++)
{
@ -1477,6 +1485,8 @@ moving chain: failed to add item with len = %u to page %u",
newtup.t_datamcxt = NULL;
newtup.t_data = (HeapTupleHeader) PageGetItem(ToPage, newitemid);
ItemPointerSet(&(newtup.t_self), vtmove[ti].vpd->vpd_blkno, newoff);
if (((int) vtmove[ti].vpd->vpd_blkno) > last_move_dest_block)
last_move_dest_block = vtmove[ti].vpd->vpd_blkno;
/*
* Set t_ctid pointing to itself for last tuple in
@ -1545,23 +1555,17 @@ moving chain: failed to add item with len = %u to page %u",
{
WriteBuffer(cur_buffer);
cur_buffer = InvalidBuffer;
/*
* If no one tuple can't be added to this page -
* remove page from fraged_pages. - vadim 11/27/96
*
* But we can't remove last page - this is our
* "show-stopper" !!! - vadim 02/25/98
* If previous target page is now too full to add
* *any* tuple to it, remove it from fraged_pages.
*/
if (cur_page != last_fraged_page &&
!vc_enough_space(cur_page, vacrelstats->min_tlen))
if (!vc_enough_space(cur_page, vacrelstats->min_tlen))
{
Assert(num_fraged_pages > cur_item + 1);
Assert(num_fraged_pages > cur_item);
memmove(fraged_pages->vpl_pagedesc + cur_item,
fraged_pages->vpl_pagedesc + cur_item + 1,
sizeof(VPageDescr *) * (num_fraged_pages - cur_item - 1));
sizeof(VPageDescr) * (num_fraged_pages - cur_item - 1));
num_fraged_pages--;
Assert(last_fraged_page == fraged_pages->vpl_pagedesc[num_fraged_pages - 1]);
}
}
for (i = 0; i < num_fraged_pages; i++)
@ -1623,6 +1627,9 @@ failed to add item with len = %u to page %u (free space %u, nusd %u, noff %u)",
cur_page->vpd_offsets_used++;
num_moved++;
cur_page->vpd_free = ((PageHeader) ToPage)->pd_upper - ((PageHeader) ToPage)->pd_lower;
if (((int) cur_page->vpd_blkno) > last_move_dest_block)
last_move_dest_block = cur_page->vpd_blkno;
vpc->vpd_offsets[vpc->vpd_offsets_free++] = offnum;
/* insert index' tuples if needed */
@ -1789,14 +1796,10 @@ failed to add item with len = %u to page %u (free space %u, nusd %u, noff %u)",
}
Assert(num_moved == checked_moved);
getrusage(RUSAGE_SELF, &ru1);
elog(MESSAGE_LEVEL, "Rel %s: Pages: %u --> %u; Tuple(s) moved: %u. \
Elapsed %u/%u sec.",
elog(MESSAGE_LEVEL, "Rel %s: Pages: %u --> %u; Tuple(s) moved: %u. %s",
RelationGetRelationName(onerel),
nblocks, blkno, num_moved,
ru1.ru_stime.tv_sec - ru0.ru_stime.tv_sec,
ru1.ru_utime.tv_sec - ru0.ru_utime.tv_sec);
vc_show_rusage(&ru0));
if (Nvpl.vpl_num_pages > 0)
{
@ -1950,7 +1953,7 @@ vc_vacheap(VRelStats *vacrelstats, Relation onerel, VPageList vacuum_pages)
/*
* vc_vacpage() -- free dead tuples on a page
* and repaire its fragmentation.
* and repair its fragmentation.
*/
static void
vc_vacpage(Page page, VPageDescr vpd)
@ -1958,6 +1961,9 @@ vc_vacpage(Page page, VPageDescr vpd)
ItemId itemid;
int i;
/* There shouldn't be any tuples moved onto the page yet! */
Assert(vpd->vpd_offsets_used == 0);
for (i = 0; i < vpd->vpd_offsets_free; i++)
{
itemid = &(((PageHeader) page)->pd_linp[vpd->vpd_offsets[i] - 1]);
@ -1978,8 +1984,7 @@ vc_scanoneind(Relation indrel, int num_tuples)
IndexScanDesc iscan;
int nitups;
int nipages;
struct rusage ru0,
ru1;
struct rusage ru0;
getrusage(RUSAGE_SELF, &ru0);
@ -2000,12 +2005,9 @@ vc_scanoneind(Relation indrel, int num_tuples)
nipages = RelationGetNumberOfBlocks(indrel);
vc_updstats(RelationGetRelid(indrel), nipages, nitups, false, NULL);
getrusage(RUSAGE_SELF, &ru1);
elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %u. Elapsed %u/%u sec.",
elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %u. %s",
RelationGetRelationName(indrel), nipages, nitups,
ru1.ru_stime.tv_sec - ru0.ru_stime.tv_sec,
ru1.ru_utime.tv_sec - ru0.ru_utime.tv_sec);
vc_show_rusage(&ru0));
if (nitups != num_tuples)
elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%u) IS NOT THE SAME AS HEAP' (%u).\
@ -2036,8 +2038,7 @@ vc_vaconeind(VPageList vpl, Relation indrel, int num_tuples, int keep_tuples)
int num_index_tuples;
int num_pages;
VPageDescr vp;
struct rusage ru0,
ru1;
struct rusage ru0;
getrusage(RUSAGE_SELF, &ru0);
@ -2081,13 +2082,10 @@ vc_vaconeind(VPageList vpl, Relation indrel, int num_tuples, int keep_tuples)
num_pages = RelationGetNumberOfBlocks(indrel);
vc_updstats(RelationGetRelid(indrel), num_pages, num_index_tuples, false, NULL);
getrusage(RUSAGE_SELF, &ru1);
elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %u: Deleted %u. Elapsed %u/%u sec.",
elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %u: Deleted %u. %s",
RelationGetRelationName(indrel), num_pages,
num_index_tuples - keep_tuples, tups_vacuumed,
ru1.ru_stime.tv_sec - ru0.ru_stime.tv_sec,
ru1.ru_utime.tv_sec - ru0.ru_utime.tv_sec);
vc_show_rusage(&ru0));
if (num_index_tuples != num_tuples + keep_tuples)
elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%u) IS NOT THE SAME AS HEAP' (%u).\
@ -2905,3 +2903,39 @@ vc_enough_space(VPageDescr vpd, Size len)
return false;
} /* vc_enough_space */
/*
* Compute elapsed time since ru0 usage snapshot, and format into
* a displayable string. Result is in a static string, which is
* tacky, but no one ever claimed that the Postgres backend is
* threadable...
*/
static char *
vc_show_rusage(struct rusage *ru0)
{
static char result[64];
struct rusage ru1;
getrusage(RUSAGE_SELF, &ru1);
if (ru1.ru_stime.tv_usec < ru0->ru_stime.tv_usec)
{
ru1.ru_stime.tv_sec--;
ru1.ru_stime.tv_usec += 1000000;
}
if (ru1.ru_utime.tv_usec < ru0->ru_utime.tv_usec)
{
ru1.ru_utime.tv_sec--;
ru1.ru_utime.tv_usec += 1000000;
}
snprintf(result, sizeof(result),
"CPU %d.%02ds/%d.%02du sec.",
(int) (ru1.ru_stime.tv_sec - ru0->ru_stime.tv_sec),
(int) (ru1.ru_stime.tv_usec - ru0->ru_stime.tv_usec) / 10000,
(int) (ru1.ru_utime.tv_sec - ru0->ru_utime.tv_sec),
(int) (ru1.ru_utime.tv_usec - ru0->ru_utime.tv_usec) / 10000);
return result;
}