722 lines
21 KiB
C
722 lines
21 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* hashsearch.c
|
|
* search code for postgres hash tables
|
|
*
|
|
* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
*
|
|
* IDENTIFICATION
|
|
* src/backend/access/hash/hashsearch.c
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
#include "access/hash.h"
|
|
#include "access/relscan.h"
|
|
#include "miscadmin.h"
|
|
#include "pgstat.h"
|
|
#include "storage/predicate.h"
|
|
#include "utils/rel.h"
|
|
|
|
static bool _hash_readpage(IndexScanDesc scan, Buffer *bufP,
|
|
ScanDirection dir);
|
|
static int _hash_load_qualified_items(IndexScanDesc scan, Page page,
|
|
OffsetNumber offnum, ScanDirection dir);
|
|
static inline void _hash_saveitem(HashScanOpaque so, int itemIndex,
|
|
OffsetNumber offnum, IndexTuple itup);
|
|
static void _hash_readnext(IndexScanDesc scan, Buffer *bufp,
|
|
Page *pagep, HashPageOpaque *opaquep);
|
|
|
|
/*
|
|
* _hash_next() -- Get the next item in a scan.
|
|
*
|
|
* On entry, so->currPos describes the current page, which may
|
|
* be pinned but not locked, and so->currPos.itemIndex identifies
|
|
* which item was previously returned.
|
|
*
|
|
* On successful exit, scan->xs_ctup.t_self is set to the TID
|
|
* of the next heap tuple. so->currPos is updated as needed.
|
|
*
|
|
* On failure exit (no more tuples), we return false with pin
|
|
* held on bucket page but no pins or locks held on overflow
|
|
* page.
|
|
*/
|
|
bool
|
|
_hash_next(IndexScanDesc scan, ScanDirection dir)
|
|
{
|
|
Relation rel = scan->indexRelation;
|
|
HashScanOpaque so = (HashScanOpaque) scan->opaque;
|
|
HashScanPosItem *currItem;
|
|
BlockNumber blkno;
|
|
Buffer buf;
|
|
bool end_of_scan = false;
|
|
|
|
/*
|
|
* Advance to the next tuple on the current page; or if done, try to read
|
|
* data from the next or previous page based on the scan direction. Before
|
|
* moving to the next or previous page make sure that we deal with all the
|
|
* killed items.
|
|
*/
|
|
if (ScanDirectionIsForward(dir))
|
|
{
|
|
if (++so->currPos.itemIndex > so->currPos.lastItem)
|
|
{
|
|
if (so->numKilled > 0)
|
|
_hash_kill_items(scan);
|
|
|
|
blkno = so->currPos.nextPage;
|
|
if (BlockNumberIsValid(blkno))
|
|
{
|
|
buf = _hash_getbuf(rel, blkno, HASH_READ, LH_OVERFLOW_PAGE);
|
|
TestForOldSnapshot(scan->xs_snapshot, rel, BufferGetPage(buf));
|
|
if (!_hash_readpage(scan, &buf, dir))
|
|
end_of_scan = true;
|
|
}
|
|
else
|
|
end_of_scan = true;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (--so->currPos.itemIndex < so->currPos.firstItem)
|
|
{
|
|
if (so->numKilled > 0)
|
|
_hash_kill_items(scan);
|
|
|
|
blkno = so->currPos.prevPage;
|
|
if (BlockNumberIsValid(blkno))
|
|
{
|
|
buf = _hash_getbuf(rel, blkno, HASH_READ,
|
|
LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
|
|
TestForOldSnapshot(scan->xs_snapshot, rel, BufferGetPage(buf));
|
|
|
|
/*
|
|
* We always maintain the pin on bucket page for whole scan
|
|
* operation, so releasing the additional pin we have acquired
|
|
* here.
|
|
*/
|
|
if (buf == so->hashso_bucket_buf ||
|
|
buf == so->hashso_split_bucket_buf)
|
|
_hash_dropbuf(rel, buf);
|
|
|
|
if (!_hash_readpage(scan, &buf, dir))
|
|
end_of_scan = true;
|
|
}
|
|
else
|
|
end_of_scan = true;
|
|
}
|
|
}
|
|
|
|
if (end_of_scan)
|
|
{
|
|
_hash_dropscanbuf(rel, so);
|
|
HashScanPosInvalidate(so->currPos);
|
|
return false;
|
|
}
|
|
|
|
/* OK, itemIndex says what to return */
|
|
currItem = &so->currPos.items[so->currPos.itemIndex];
|
|
scan->xs_heaptid = currItem->heapTid;
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Advance to next page in a bucket, if any. If we are scanning the bucket
|
|
* being populated during split operation then this function advances to the
|
|
* bucket being split after the last bucket page of bucket being populated.
|
|
*/
|
|
static void
|
|
_hash_readnext(IndexScanDesc scan,
|
|
Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
|
|
{
|
|
BlockNumber blkno;
|
|
Relation rel = scan->indexRelation;
|
|
HashScanOpaque so = (HashScanOpaque) scan->opaque;
|
|
bool block_found = false;
|
|
|
|
blkno = (*opaquep)->hasho_nextblkno;
|
|
|
|
/*
|
|
* Retain the pin on primary bucket page till the end of scan. Refer the
|
|
* comments in _hash_first to know the reason of retaining pin.
|
|
*/
|
|
if (*bufp == so->hashso_bucket_buf || *bufp == so->hashso_split_bucket_buf)
|
|
LockBuffer(*bufp, BUFFER_LOCK_UNLOCK);
|
|
else
|
|
_hash_relbuf(rel, *bufp);
|
|
|
|
*bufp = InvalidBuffer;
|
|
/* check for interrupts while we're not holding any buffer lock */
|
|
CHECK_FOR_INTERRUPTS();
|
|
if (BlockNumberIsValid(blkno))
|
|
{
|
|
*bufp = _hash_getbuf(rel, blkno, HASH_READ, LH_OVERFLOW_PAGE);
|
|
block_found = true;
|
|
}
|
|
else if (so->hashso_buc_populated && !so->hashso_buc_split)
|
|
{
|
|
/*
|
|
* end of bucket, scan bucket being split if there was a split in
|
|
* progress at the start of scan.
|
|
*/
|
|
*bufp = so->hashso_split_bucket_buf;
|
|
|
|
/*
|
|
* buffer for bucket being split must be valid as we acquire the pin
|
|
* on it before the start of scan and retain it till end of scan.
|
|
*/
|
|
Assert(BufferIsValid(*bufp));
|
|
|
|
LockBuffer(*bufp, BUFFER_LOCK_SHARE);
|
|
PredicateLockPage(rel, BufferGetBlockNumber(*bufp), scan->xs_snapshot);
|
|
|
|
/*
|
|
* setting hashso_buc_split to true indicates that we are scanning
|
|
* bucket being split.
|
|
*/
|
|
so->hashso_buc_split = true;
|
|
|
|
block_found = true;
|
|
}
|
|
|
|
if (block_found)
|
|
{
|
|
*pagep = BufferGetPage(*bufp);
|
|
TestForOldSnapshot(scan->xs_snapshot, rel, *pagep);
|
|
*opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Advance to previous page in a bucket, if any. If the current scan has
|
|
* started during split operation then this function advances to bucket
|
|
* being populated after the first bucket page of bucket being split.
|
|
*/
|
|
static void
|
|
_hash_readprev(IndexScanDesc scan,
|
|
Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
|
|
{
|
|
BlockNumber blkno;
|
|
Relation rel = scan->indexRelation;
|
|
HashScanOpaque so = (HashScanOpaque) scan->opaque;
|
|
bool haveprevblk;
|
|
|
|
blkno = (*opaquep)->hasho_prevblkno;
|
|
|
|
/*
|
|
* Retain the pin on primary bucket page till the end of scan. Refer the
|
|
* comments in _hash_first to know the reason of retaining pin.
|
|
*/
|
|
if (*bufp == so->hashso_bucket_buf || *bufp == so->hashso_split_bucket_buf)
|
|
{
|
|
LockBuffer(*bufp, BUFFER_LOCK_UNLOCK);
|
|
haveprevblk = false;
|
|
}
|
|
else
|
|
{
|
|
_hash_relbuf(rel, *bufp);
|
|
haveprevblk = true;
|
|
}
|
|
|
|
*bufp = InvalidBuffer;
|
|
/* check for interrupts while we're not holding any buffer lock */
|
|
CHECK_FOR_INTERRUPTS();
|
|
|
|
if (haveprevblk)
|
|
{
|
|
Assert(BlockNumberIsValid(blkno));
|
|
*bufp = _hash_getbuf(rel, blkno, HASH_READ,
|
|
LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
|
|
*pagep = BufferGetPage(*bufp);
|
|
TestForOldSnapshot(scan->xs_snapshot, rel, *pagep);
|
|
*opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
|
|
|
|
/*
|
|
* We always maintain the pin on bucket page for whole scan operation,
|
|
* so releasing the additional pin we have acquired here.
|
|
*/
|
|
if (*bufp == so->hashso_bucket_buf || *bufp == so->hashso_split_bucket_buf)
|
|
_hash_dropbuf(rel, *bufp);
|
|
}
|
|
else if (so->hashso_buc_populated && so->hashso_buc_split)
|
|
{
|
|
/*
|
|
* end of bucket, scan bucket being populated if there was a split in
|
|
* progress at the start of scan.
|
|
*/
|
|
*bufp = so->hashso_bucket_buf;
|
|
|
|
/*
|
|
* buffer for bucket being populated must be valid as we acquire the
|
|
* pin on it before the start of scan and retain it till end of scan.
|
|
*/
|
|
Assert(BufferIsValid(*bufp));
|
|
|
|
LockBuffer(*bufp, BUFFER_LOCK_SHARE);
|
|
*pagep = BufferGetPage(*bufp);
|
|
*opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
|
|
|
|
/* move to the end of bucket chain */
|
|
while (BlockNumberIsValid((*opaquep)->hasho_nextblkno))
|
|
_hash_readnext(scan, bufp, pagep, opaquep);
|
|
|
|
/*
|
|
* setting hashso_buc_split to false indicates that we are scanning
|
|
* bucket being populated.
|
|
*/
|
|
so->hashso_buc_split = false;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* _hash_first() -- Find the first item in a scan.
|
|
*
|
|
* We find the first item (or, if backward scan, the last item) in the
|
|
* index that satisfies the qualification associated with the scan
|
|
* descriptor.
|
|
*
|
|
* On successful exit, if the page containing current index tuple is an
|
|
* overflow page, both pin and lock are released whereas if it is a bucket
|
|
* page then it is pinned but not locked and data about the matching
|
|
* tuple(s) on the page has been loaded into so->currPos,
|
|
* scan->xs_ctup.t_self is set to the heap TID of the current tuple.
|
|
*
|
|
* On failure exit (no more tuples), we return false, with pin held on
|
|
* bucket page but no pins or locks held on overflow page.
|
|
*/
|
|
bool
|
|
_hash_first(IndexScanDesc scan, ScanDirection dir)
|
|
{
|
|
Relation rel = scan->indexRelation;
|
|
HashScanOpaque so = (HashScanOpaque) scan->opaque;
|
|
ScanKey cur;
|
|
uint32 hashkey;
|
|
Bucket bucket;
|
|
Buffer buf;
|
|
Page page;
|
|
HashPageOpaque opaque;
|
|
HashScanPosItem *currItem;
|
|
|
|
pgstat_count_index_scan(rel);
|
|
|
|
/*
|
|
* We do not support hash scans with no index qualification, because we
|
|
* would have to read the whole index rather than just one bucket. That
|
|
* creates a whole raft of problems, since we haven't got a practical way
|
|
* to lock all the buckets against splits or compactions.
|
|
*/
|
|
if (scan->numberOfKeys < 1)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
errmsg("hash indexes do not support whole-index scans")));
|
|
|
|
/* There may be more than one index qual, but we hash only the first */
|
|
cur = &scan->keyData[0];
|
|
|
|
/* We support only single-column hash indexes */
|
|
Assert(cur->sk_attno == 1);
|
|
/* And there's only one operator strategy, too */
|
|
Assert(cur->sk_strategy == HTEqualStrategyNumber);
|
|
|
|
/*
|
|
* If the constant in the index qual is NULL, assume it cannot match any
|
|
* items in the index.
|
|
*/
|
|
if (cur->sk_flags & SK_ISNULL)
|
|
return false;
|
|
|
|
/*
|
|
* Okay to compute the hash key. We want to do this before acquiring any
|
|
* locks, in case a user-defined hash function happens to be slow.
|
|
*
|
|
* If scankey operator is not a cross-type comparison, we can use the
|
|
* cached hash function; otherwise gotta look it up in the catalogs.
|
|
*
|
|
* We support the convention that sk_subtype == InvalidOid means the
|
|
* opclass input type; this is a hack to simplify life for ScanKeyInit().
|
|
*/
|
|
if (cur->sk_subtype == rel->rd_opcintype[0] ||
|
|
cur->sk_subtype == InvalidOid)
|
|
hashkey = _hash_datum2hashkey(rel, cur->sk_argument);
|
|
else
|
|
hashkey = _hash_datum2hashkey_type(rel, cur->sk_argument,
|
|
cur->sk_subtype);
|
|
|
|
so->hashso_sk_hash = hashkey;
|
|
|
|
buf = _hash_getbucketbuf_from_hashkey(rel, hashkey, HASH_READ, NULL);
|
|
PredicateLockPage(rel, BufferGetBlockNumber(buf), scan->xs_snapshot);
|
|
page = BufferGetPage(buf);
|
|
TestForOldSnapshot(scan->xs_snapshot, rel, page);
|
|
opaque = (HashPageOpaque) PageGetSpecialPointer(page);
|
|
bucket = opaque->hasho_bucket;
|
|
|
|
so->hashso_bucket_buf = buf;
|
|
|
|
/*
|
|
* If a bucket split is in progress, then while scanning the bucket being
|
|
* populated, we need to skip tuples that were copied from bucket being
|
|
* split. We also need to maintain a pin on the bucket being split to
|
|
* ensure that split-cleanup work done by vacuum doesn't remove tuples
|
|
* from it till this scan is done. We need to maintain a pin on the
|
|
* bucket being populated to ensure that vacuum doesn't squeeze that
|
|
* bucket till this scan is complete; otherwise, the ordering of tuples
|
|
* can't be maintained during forward and backward scans. Here, we have
|
|
* to be cautious about locking order: first, acquire the lock on bucket
|
|
* being split; then, release the lock on it but not the pin; then,
|
|
* acquire a lock on bucket being populated and again re-verify whether
|
|
* the bucket split is still in progress. Acquiring the lock on bucket
|
|
* being split first ensures that the vacuum waits for this scan to
|
|
* finish.
|
|
*/
|
|
if (H_BUCKET_BEING_POPULATED(opaque))
|
|
{
|
|
BlockNumber old_blkno;
|
|
Buffer old_buf;
|
|
|
|
old_blkno = _hash_get_oldblock_from_newbucket(rel, bucket);
|
|
|
|
/*
|
|
* release the lock on new bucket and re-acquire it after acquiring
|
|
* the lock on old bucket.
|
|
*/
|
|
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
|
|
|
|
old_buf = _hash_getbuf(rel, old_blkno, HASH_READ, LH_BUCKET_PAGE);
|
|
TestForOldSnapshot(scan->xs_snapshot, rel, BufferGetPage(old_buf));
|
|
|
|
/*
|
|
* remember the split bucket buffer so as to use it later for
|
|
* scanning.
|
|
*/
|
|
so->hashso_split_bucket_buf = old_buf;
|
|
LockBuffer(old_buf, BUFFER_LOCK_UNLOCK);
|
|
|
|
LockBuffer(buf, BUFFER_LOCK_SHARE);
|
|
page = BufferGetPage(buf);
|
|
opaque = (HashPageOpaque) PageGetSpecialPointer(page);
|
|
Assert(opaque->hasho_bucket == bucket);
|
|
|
|
if (H_BUCKET_BEING_POPULATED(opaque))
|
|
so->hashso_buc_populated = true;
|
|
else
|
|
{
|
|
_hash_dropbuf(rel, so->hashso_split_bucket_buf);
|
|
so->hashso_split_bucket_buf = InvalidBuffer;
|
|
}
|
|
}
|
|
|
|
/* If a backwards scan is requested, move to the end of the chain */
|
|
if (ScanDirectionIsBackward(dir))
|
|
{
|
|
/*
|
|
* Backward scans that start during split needs to start from end of
|
|
* bucket being split.
|
|
*/
|
|
while (BlockNumberIsValid(opaque->hasho_nextblkno) ||
|
|
(so->hashso_buc_populated && !so->hashso_buc_split))
|
|
_hash_readnext(scan, &buf, &page, &opaque);
|
|
}
|
|
|
|
/* remember which buffer we have pinned, if any */
|
|
Assert(BufferIsInvalid(so->currPos.buf));
|
|
so->currPos.buf = buf;
|
|
|
|
/* Now find all the tuples satisfying the qualification from a page */
|
|
if (!_hash_readpage(scan, &buf, dir))
|
|
return false;
|
|
|
|
/* OK, itemIndex says what to return */
|
|
currItem = &so->currPos.items[so->currPos.itemIndex];
|
|
scan->xs_heaptid = currItem->heapTid;
|
|
|
|
/* if we're here, _hash_readpage found a valid tuples */
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* _hash_readpage() -- Load data from current index page into so->currPos
|
|
*
|
|
* We scan all the items in the current index page and save them into
|
|
* so->currPos if it satisfies the qualification. If no matching items
|
|
* are found in the current page, we move to the next or previous page
|
|
* in a bucket chain as indicated by the direction.
|
|
*
|
|
* Return true if any matching items are found else return false.
|
|
*/
|
|
static bool
|
|
_hash_readpage(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
|
|
{
|
|
Relation rel = scan->indexRelation;
|
|
HashScanOpaque so = (HashScanOpaque) scan->opaque;
|
|
Buffer buf;
|
|
Page page;
|
|
HashPageOpaque opaque;
|
|
OffsetNumber offnum;
|
|
uint16 itemIndex;
|
|
|
|
buf = *bufP;
|
|
Assert(BufferIsValid(buf));
|
|
_hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
|
|
page = BufferGetPage(buf);
|
|
opaque = (HashPageOpaque) PageGetSpecialPointer(page);
|
|
|
|
so->currPos.buf = buf;
|
|
so->currPos.currPage = BufferGetBlockNumber(buf);
|
|
|
|
if (ScanDirectionIsForward(dir))
|
|
{
|
|
BlockNumber prev_blkno = InvalidBlockNumber;
|
|
|
|
for (;;)
|
|
{
|
|
/* new page, locate starting position by binary search */
|
|
offnum = _hash_binsearch(page, so->hashso_sk_hash);
|
|
|
|
itemIndex = _hash_load_qualified_items(scan, page, offnum, dir);
|
|
|
|
if (itemIndex != 0)
|
|
break;
|
|
|
|
/*
|
|
* Could not find any matching tuples in the current page, move to
|
|
* the next page. Before leaving the current page, deal with any
|
|
* killed items.
|
|
*/
|
|
if (so->numKilled > 0)
|
|
_hash_kill_items(scan);
|
|
|
|
/*
|
|
* If this is a primary bucket page, hasho_prevblkno is not a real
|
|
* block number.
|
|
*/
|
|
if (so->currPos.buf == so->hashso_bucket_buf ||
|
|
so->currPos.buf == so->hashso_split_bucket_buf)
|
|
prev_blkno = InvalidBlockNumber;
|
|
else
|
|
prev_blkno = opaque->hasho_prevblkno;
|
|
|
|
_hash_readnext(scan, &buf, &page, &opaque);
|
|
if (BufferIsValid(buf))
|
|
{
|
|
so->currPos.buf = buf;
|
|
so->currPos.currPage = BufferGetBlockNumber(buf);
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* Remember next and previous block numbers for scrollable
|
|
* cursors to know the start position and return false
|
|
* indicating that no more matching tuples were found. Also,
|
|
* don't reset currPage or lsn, because we expect
|
|
* _hash_kill_items to be called for the old page after this
|
|
* function returns.
|
|
*/
|
|
so->currPos.prevPage = prev_blkno;
|
|
so->currPos.nextPage = InvalidBlockNumber;
|
|
so->currPos.buf = buf;
|
|
return false;
|
|
}
|
|
}
|
|
|
|
so->currPos.firstItem = 0;
|
|
so->currPos.lastItem = itemIndex - 1;
|
|
so->currPos.itemIndex = 0;
|
|
}
|
|
else
|
|
{
|
|
BlockNumber next_blkno = InvalidBlockNumber;
|
|
|
|
for (;;)
|
|
{
|
|
/* new page, locate starting position by binary search */
|
|
offnum = _hash_binsearch_last(page, so->hashso_sk_hash);
|
|
|
|
itemIndex = _hash_load_qualified_items(scan, page, offnum, dir);
|
|
|
|
if (itemIndex != MaxIndexTuplesPerPage)
|
|
break;
|
|
|
|
/*
|
|
* Could not find any matching tuples in the current page, move to
|
|
* the previous page. Before leaving the current page, deal with
|
|
* any killed items.
|
|
*/
|
|
if (so->numKilled > 0)
|
|
_hash_kill_items(scan);
|
|
|
|
if (so->currPos.buf == so->hashso_bucket_buf ||
|
|
so->currPos.buf == so->hashso_split_bucket_buf)
|
|
next_blkno = opaque->hasho_nextblkno;
|
|
|
|
_hash_readprev(scan, &buf, &page, &opaque);
|
|
if (BufferIsValid(buf))
|
|
{
|
|
so->currPos.buf = buf;
|
|
so->currPos.currPage = BufferGetBlockNumber(buf);
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* Remember next and previous block numbers for scrollable
|
|
* cursors to know the start position and return false
|
|
* indicating that no more matching tuples were found. Also,
|
|
* don't reset currPage or lsn, because we expect
|
|
* _hash_kill_items to be called for the old page after this
|
|
* function returns.
|
|
*/
|
|
so->currPos.prevPage = InvalidBlockNumber;
|
|
so->currPos.nextPage = next_blkno;
|
|
so->currPos.buf = buf;
|
|
return false;
|
|
}
|
|
}
|
|
|
|
so->currPos.firstItem = itemIndex;
|
|
so->currPos.lastItem = MaxIndexTuplesPerPage - 1;
|
|
so->currPos.itemIndex = MaxIndexTuplesPerPage - 1;
|
|
}
|
|
|
|
if (so->currPos.buf == so->hashso_bucket_buf ||
|
|
so->currPos.buf == so->hashso_split_bucket_buf)
|
|
{
|
|
so->currPos.prevPage = InvalidBlockNumber;
|
|
so->currPos.nextPage = opaque->hasho_nextblkno;
|
|
LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK);
|
|
}
|
|
else
|
|
{
|
|
so->currPos.prevPage = opaque->hasho_prevblkno;
|
|
so->currPos.nextPage = opaque->hasho_nextblkno;
|
|
_hash_relbuf(rel, so->currPos.buf);
|
|
so->currPos.buf = InvalidBuffer;
|
|
}
|
|
|
|
Assert(so->currPos.firstItem <= so->currPos.lastItem);
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Load all the qualified items from a current index page
|
|
* into so->currPos. Helper function for _hash_readpage.
|
|
*/
|
|
static int
|
|
_hash_load_qualified_items(IndexScanDesc scan, Page page,
|
|
OffsetNumber offnum, ScanDirection dir)
|
|
{
|
|
HashScanOpaque so = (HashScanOpaque) scan->opaque;
|
|
IndexTuple itup;
|
|
int itemIndex;
|
|
OffsetNumber maxoff;
|
|
|
|
maxoff = PageGetMaxOffsetNumber(page);
|
|
|
|
if (ScanDirectionIsForward(dir))
|
|
{
|
|
/* load items[] in ascending order */
|
|
itemIndex = 0;
|
|
|
|
while (offnum <= maxoff)
|
|
{
|
|
Assert(offnum >= FirstOffsetNumber);
|
|
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
|
|
|
|
/*
|
|
* skip the tuples that are moved by split operation for the scan
|
|
* that has started when split was in progress. Also, skip the
|
|
* tuples that are marked as dead.
|
|
*/
|
|
if ((so->hashso_buc_populated && !so->hashso_buc_split &&
|
|
(itup->t_info & INDEX_MOVED_BY_SPLIT_MASK)) ||
|
|
(scan->ignore_killed_tuples &&
|
|
(ItemIdIsDead(PageGetItemId(page, offnum)))))
|
|
{
|
|
offnum = OffsetNumberNext(offnum); /* move forward */
|
|
continue;
|
|
}
|
|
|
|
if (so->hashso_sk_hash == _hash_get_indextuple_hashkey(itup) &&
|
|
_hash_checkqual(scan, itup))
|
|
{
|
|
/* tuple is qualified, so remember it */
|
|
_hash_saveitem(so, itemIndex, offnum, itup);
|
|
itemIndex++;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* No more matching tuples exist in this page. so, exit while
|
|
* loop.
|
|
*/
|
|
break;
|
|
}
|
|
|
|
offnum = OffsetNumberNext(offnum);
|
|
}
|
|
|
|
Assert(itemIndex <= MaxIndexTuplesPerPage);
|
|
return itemIndex;
|
|
}
|
|
else
|
|
{
|
|
/* load items[] in descending order */
|
|
itemIndex = MaxIndexTuplesPerPage;
|
|
|
|
while (offnum >= FirstOffsetNumber)
|
|
{
|
|
Assert(offnum <= maxoff);
|
|
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
|
|
|
|
/*
|
|
* skip the tuples that are moved by split operation for the scan
|
|
* that has started when split was in progress. Also, skip the
|
|
* tuples that are marked as dead.
|
|
*/
|
|
if ((so->hashso_buc_populated && !so->hashso_buc_split &&
|
|
(itup->t_info & INDEX_MOVED_BY_SPLIT_MASK)) ||
|
|
(scan->ignore_killed_tuples &&
|
|
(ItemIdIsDead(PageGetItemId(page, offnum)))))
|
|
{
|
|
offnum = OffsetNumberPrev(offnum); /* move back */
|
|
continue;
|
|
}
|
|
|
|
if (so->hashso_sk_hash == _hash_get_indextuple_hashkey(itup) &&
|
|
_hash_checkqual(scan, itup))
|
|
{
|
|
itemIndex--;
|
|
/* tuple is qualified, so remember it */
|
|
_hash_saveitem(so, itemIndex, offnum, itup);
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* No more matching tuples exist in this page. so, exit while
|
|
* loop.
|
|
*/
|
|
break;
|
|
}
|
|
|
|
offnum = OffsetNumberPrev(offnum);
|
|
}
|
|
|
|
Assert(itemIndex >= 0);
|
|
return itemIndex;
|
|
}
|
|
}
|
|
|
|
/* Save an index item into so->currPos.items[itemIndex] */
|
|
static inline void
|
|
_hash_saveitem(HashScanOpaque so, int itemIndex,
|
|
OffsetNumber offnum, IndexTuple itup)
|
|
{
|
|
HashScanPosItem *currItem = &so->currPos.items[itemIndex];
|
|
|
|
currItem->heapTid = itup->t_tid;
|
|
currItem->indexOffset = offnum;
|
|
}
|