postgresql/src/backend/access/common/detoast.c

790 lines
21 KiB
C

/*-------------------------------------------------------------------------
*
* detoast.c
* Retrieve compressed or external variable size attributes.
*
* Copyright (c) 2000-2020, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/backend/access/common/detoast.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/detoast.h"
#include "access/genam.h"
#include "access/heaptoast.h"
#include "access/table.h"
#include "access/toast_internals.h"
#include "common/pg_lzcompress.h"
#include "utils/expandeddatum.h"
#include "utils/fmgroids.h"
#include "utils/rel.h"
static struct varlena *toast_fetch_datum(struct varlena *attr);
static struct varlena *toast_fetch_datum_slice(struct varlena *attr,
int32 sliceoffset,
int32 slicelength);
static void heap_fetch_toast_slice(Relation toastrel, Oid valueid,
int32 attrsize, int32 sliceoffset,
int32 slicelength, struct varlena *result);
static struct varlena *toast_decompress_datum(struct varlena *attr);
static struct varlena *toast_decompress_datum_slice(struct varlena *attr, int32 slicelength);
/* ----------
* detoast_external_attr -
*
* Public entry point to get back a toasted value from
* external source (possibly still in compressed format).
*
* This will return a datum that contains all the data internally, ie, not
* relying on external storage or memory, but it can still be compressed or
* have a short header. Note some callers assume that if the input is an
* EXTERNAL datum, the result will be a pfree'able chunk.
* ----------
*/
struct varlena *
detoast_external_attr(struct varlena *attr)
{
struct varlena *result;
if (VARATT_IS_EXTERNAL_ONDISK(attr))
{
/*
* This is an external stored plain value
*/
result = toast_fetch_datum(attr);
}
else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
{
/*
* This is an indirect pointer --- dereference it
*/
struct varatt_indirect redirect;
VARATT_EXTERNAL_GET_POINTER(redirect, attr);
attr = (struct varlena *) redirect.pointer;
/* nested indirect Datums aren't allowed */
Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
/* recurse if value is still external in some other way */
if (VARATT_IS_EXTERNAL(attr))
return detoast_external_attr(attr);
/*
* Copy into the caller's memory context, in case caller tries to
* pfree the result.
*/
result = (struct varlena *) palloc(VARSIZE_ANY(attr));
memcpy(result, attr, VARSIZE_ANY(attr));
}
else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
{
/*
* This is an expanded-object pointer --- get flat format
*/
ExpandedObjectHeader *eoh;
Size resultsize;
eoh = DatumGetEOHP(PointerGetDatum(attr));
resultsize = EOH_get_flat_size(eoh);
result = (struct varlena *) palloc(resultsize);
EOH_flatten_into(eoh, (void *) result, resultsize);
}
else
{
/*
* This is a plain value inside of the main tuple - why am I called?
*/
result = attr;
}
return result;
}
/* ----------
* detoast_attr -
*
* Public entry point to get back a toasted value from compression
* or external storage. The result is always non-extended varlena form.
*
* Note some callers assume that if the input is an EXTERNAL or COMPRESSED
* datum, the result will be a pfree'able chunk.
* ----------
*/
struct varlena *
detoast_attr(struct varlena *attr)
{
if (VARATT_IS_EXTERNAL_ONDISK(attr))
{
/*
* This is an externally stored datum --- fetch it back from there
*/
attr = toast_fetch_datum(attr);
/* If it's compressed, decompress it */
if (VARATT_IS_COMPRESSED(attr))
{
struct varlena *tmp = attr;
attr = toast_decompress_datum(tmp);
pfree(tmp);
}
}
else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
{
/*
* This is an indirect pointer --- dereference it
*/
struct varatt_indirect redirect;
VARATT_EXTERNAL_GET_POINTER(redirect, attr);
attr = (struct varlena *) redirect.pointer;
/* nested indirect Datums aren't allowed */
Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
/* recurse in case value is still extended in some other way */
attr = detoast_attr(attr);
/* if it isn't, we'd better copy it */
if (attr == (struct varlena *) redirect.pointer)
{
struct varlena *result;
result = (struct varlena *) palloc(VARSIZE_ANY(attr));
memcpy(result, attr, VARSIZE_ANY(attr));
attr = result;
}
}
else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
{
/*
* This is an expanded-object pointer --- get flat format
*/
attr = detoast_external_attr(attr);
/* flatteners are not allowed to produce compressed/short output */
Assert(!VARATT_IS_EXTENDED(attr));
}
else if (VARATT_IS_COMPRESSED(attr))
{
/*
* This is a compressed value inside of the main tuple
*/
attr = toast_decompress_datum(attr);
}
else if (VARATT_IS_SHORT(attr))
{
/*
* This is a short-header varlena --- convert to 4-byte header format
*/
Size data_size = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT;
Size new_size = data_size + VARHDRSZ;
struct varlena *new_attr;
new_attr = (struct varlena *) palloc(new_size);
SET_VARSIZE(new_attr, new_size);
memcpy(VARDATA(new_attr), VARDATA_SHORT(attr), data_size);
attr = new_attr;
}
return attr;
}
/* ----------
* detoast_attr_slice -
*
* Public entry point to get back part of a toasted value
* from compression or external storage.
*
* Note: When slicelength is negative, return suffix of the value.
* ----------
*/
struct varlena *
detoast_attr_slice(struct varlena *attr,
int32 sliceoffset, int32 slicelength)
{
struct varlena *preslice;
struct varlena *result;
char *attrdata;
int32 attrsize;
if (VARATT_IS_EXTERNAL_ONDISK(attr))
{
struct varatt_external toast_pointer;
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
/* fast path for non-compressed external datums */
if (!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
return toast_fetch_datum_slice(attr, sliceoffset, slicelength);
/*
* For compressed values, we need to fetch enough slices to decompress
* at least the requested part (when a prefix is requested). Otherwise,
* just fetch all slices.
*/
if (slicelength > 0 && sliceoffset >= 0)
{
int32 max_size;
/*
* Determine maximum amount of compressed data needed for a prefix
* of a given length (after decompression).
*/
max_size = pglz_maximum_compressed_size(sliceoffset + slicelength,
toast_pointer.va_extsize);
/*
* Fetch enough compressed slices (compressed marker will get set
* automatically).
*/
preslice = toast_fetch_datum_slice(attr, 0, max_size);
}
else
preslice = toast_fetch_datum(attr);
}
else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
{
struct varatt_indirect redirect;
VARATT_EXTERNAL_GET_POINTER(redirect, attr);
/* nested indirect Datums aren't allowed */
Assert(!VARATT_IS_EXTERNAL_INDIRECT(redirect.pointer));
return detoast_attr_slice(redirect.pointer,
sliceoffset, slicelength);
}
else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
{
/* pass it off to detoast_external_attr to flatten */
preslice = detoast_external_attr(attr);
}
else
preslice = attr;
Assert(!VARATT_IS_EXTERNAL(preslice));
if (VARATT_IS_COMPRESSED(preslice))
{
struct varlena *tmp = preslice;
/* Decompress enough to encompass the slice and the offset */
if (slicelength > 0 && sliceoffset >= 0)
preslice = toast_decompress_datum_slice(tmp, slicelength + sliceoffset);
else
preslice = toast_decompress_datum(tmp);
if (tmp != attr)
pfree(tmp);
}
if (VARATT_IS_SHORT(preslice))
{
attrdata = VARDATA_SHORT(preslice);
attrsize = VARSIZE_SHORT(preslice) - VARHDRSZ_SHORT;
}
else
{
attrdata = VARDATA(preslice);
attrsize = VARSIZE(preslice) - VARHDRSZ;
}
/* slicing of datum for compressed cases and plain value */
if (sliceoffset >= attrsize)
{
sliceoffset = 0;
slicelength = 0;
}
if (((sliceoffset + slicelength) > attrsize) || slicelength < 0)
slicelength = attrsize - sliceoffset;
result = (struct varlena *) palloc(slicelength + VARHDRSZ);
SET_VARSIZE(result, slicelength + VARHDRSZ);
memcpy(VARDATA(result), attrdata + sliceoffset, slicelength);
if (preslice != attr)
pfree(preslice);
return result;
}
/* ----------
* toast_fetch_datum -
*
* Reconstruct an in memory Datum from the chunks saved
* in the toast relation
* ----------
*/
static struct varlena *
toast_fetch_datum(struct varlena *attr)
{
Relation toastrel;
struct varlena *result;
struct varatt_external toast_pointer;
int32 attrsize;
if (!VARATT_IS_EXTERNAL_ONDISK(attr))
elog(ERROR, "toast_fetch_datum shouldn't be called for non-ondisk datums");
/* Must copy to access aligned fields */
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
attrsize = toast_pointer.va_extsize;
result = (struct varlena *) palloc(attrsize + VARHDRSZ);
if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
SET_VARSIZE_COMPRESSED(result, attrsize + VARHDRSZ);
else
SET_VARSIZE(result, attrsize + VARHDRSZ);
if (attrsize == 0)
return result; /* Probably shouldn't happen, but just in case. */
/*
* Open the toast relation and its indexes
*/
toastrel = table_open(toast_pointer.va_toastrelid, AccessShareLock);
/* Fetch all chunks */
heap_fetch_toast_slice(toastrel, toast_pointer.va_valueid, attrsize, 0,
attrsize, result);
/* Close toast table */
table_close(toastrel, AccessShareLock);
return result;
}
/* ----------
* toast_fetch_datum_slice -
*
* Reconstruct a segment of a Datum from the chunks saved
* in the toast relation
*
* Note that this function supports non-compressed external datums
* and compressed external datums (in which case the requested slice
* has to be a prefix, i.e. sliceoffset has to be 0).
* ----------
*/
static struct varlena *
toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset,
int32 slicelength)
{
Relation toastrel;
struct varlena *result;
struct varatt_external toast_pointer;
int32 attrsize;
if (!VARATT_IS_EXTERNAL_ONDISK(attr))
elog(ERROR, "toast_fetch_datum_slice shouldn't be called for non-ondisk datums");
/* Must copy to access aligned fields */
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
/*
* It's nonsense to fetch slices of a compressed datum unless when it's
* a prefix -- this isn't lo_* we can't return a compressed datum which
* is meaningful to toast later.
*/
Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) || 0 == sliceoffset);
attrsize = toast_pointer.va_extsize;
if (sliceoffset >= attrsize)
{
sliceoffset = 0;
slicelength = 0;
}
/*
* When fetching a prefix of a compressed external datum, account for the
* rawsize tracking amount of raw data, which is stored at the beginning
* as an int32 value).
*/
if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) && slicelength > 0)
slicelength = slicelength + sizeof(int32);
if (((sliceoffset + slicelength) > attrsize) || slicelength < 0)
slicelength = attrsize - sliceoffset;
result = (struct varlena *) palloc(slicelength + VARHDRSZ);
if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
SET_VARSIZE_COMPRESSED(result, slicelength + VARHDRSZ);
else
SET_VARSIZE(result, slicelength + VARHDRSZ);
if (slicelength == 0)
return result; /* Can save a lot of work at this point! */
/* Open the toast relation */
toastrel = table_open(toast_pointer.va_toastrelid, AccessShareLock);
/* Fetch all chunks */
heap_fetch_toast_slice(toastrel, toast_pointer.va_valueid, attrsize,
sliceoffset, slicelength, result);
/* Close toast table */
table_close(toastrel, AccessShareLock);
return result;
}
/*
* Fetch a TOAST slice from a heap table.
*
* toastrel is the relation from which chunks are to be fetched.
* valueid identifies the TOAST value from which chunks are being fetched.
* attrsize is the total size of the TOAST value.
* sliceoffset is the byte offset within the TOAST value from which to fetch.
* slicelength is the number of bytes to be fetched from the TOAST value.
* result is the varlena into which the results should be written.
*/
static void
heap_fetch_toast_slice(Relation toastrel, Oid valueid, int32 attrsize,
int32 sliceoffset, int32 slicelength,
struct varlena *result)
{
Relation *toastidxs;
ScanKeyData toastkey[3];
TupleDesc toasttupDesc = toastrel->rd_att;
int nscankeys;
SysScanDesc toastscan;
HeapTuple ttup;
int32 expectedchunk;
int32 totalchunks = ((attrsize - 1) / TOAST_MAX_CHUNK_SIZE) + 1;
int startchunk;
int endchunk;
int num_indexes;
int validIndex;
SnapshotData SnapshotToast;
/* Look for the valid index of toast relation */
validIndex = toast_open_indexes(toastrel,
AccessShareLock,
&toastidxs,
&num_indexes);
startchunk = sliceoffset / TOAST_MAX_CHUNK_SIZE;
endchunk = (sliceoffset + slicelength - 1) / TOAST_MAX_CHUNK_SIZE;
Assert(endchunk <= totalchunks);
/*
* Setup a scan key to fetch from the index. This is either two keys or
* three depending on the number of chunks.
*/
ScanKeyInit(&toastkey[0],
(AttrNumber) 1,
BTEqualStrategyNumber, F_OIDEQ,
ObjectIdGetDatum(valueid));
/*
* No additional condition if fetching all chunks. Otherwise, use an
* equality condition for one chunk, and a range condition otherwise.
*/
if (startchunk == 0 && endchunk == totalchunks - 1)
nscankeys = 1;
else if (startchunk == endchunk)
{
ScanKeyInit(&toastkey[1],
(AttrNumber) 2,
BTEqualStrategyNumber, F_INT4EQ,
Int32GetDatum(startchunk));
nscankeys = 2;
}
else
{
ScanKeyInit(&toastkey[1],
(AttrNumber) 2,
BTGreaterEqualStrategyNumber, F_INT4GE,
Int32GetDatum(startchunk));
ScanKeyInit(&toastkey[2],
(AttrNumber) 2,
BTLessEqualStrategyNumber, F_INT4LE,
Int32GetDatum(endchunk));
nscankeys = 3;
}
/* Prepare for scan */
init_toast_snapshot(&SnapshotToast);
toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex],
&SnapshotToast, nscankeys, toastkey);
/*
* Read the chunks by index
*
* The index is on (valueid, chunkidx) so they will come in order
*/
expectedchunk = startchunk;
while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
{
int32 curchunk;
Pointer chunk;
bool isnull;
char *chunkdata;
int32 chunksize;
int32 expected_size;
int32 chcpystrt;
int32 chcpyend;
/*
* Have a chunk, extract the sequence number and the data
*/
curchunk = DatumGetInt32(fastgetattr(ttup, 2, toasttupDesc, &isnull));
Assert(!isnull);
chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull));
Assert(!isnull);
if (!VARATT_IS_EXTENDED(chunk))
{
chunksize = VARSIZE(chunk) - VARHDRSZ;
chunkdata = VARDATA(chunk);
}
else if (VARATT_IS_SHORT(chunk))
{
/* could happen due to heap_form_tuple doing its thing */
chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT;
chunkdata = VARDATA_SHORT(chunk);
}
else
{
/* should never happen */
elog(ERROR, "found toasted toast chunk for toast value %u in %s",
valueid, RelationGetRelationName(toastrel));
chunksize = 0; /* keep compiler quiet */
chunkdata = NULL;
}
/*
* Some checks on the data we've found
*/
if (curchunk != expectedchunk)
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg_internal("unexpected chunk number %d (expected %d) for toast value %u in %s",
curchunk, expectedchunk, valueid,
RelationGetRelationName(toastrel))));
if (curchunk > endchunk)
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg_internal("unexpected chunk number %d (out of range %d..%d) for toast value %u in %s",
curchunk,
startchunk, endchunk, valueid,
RelationGetRelationName(toastrel))));
expected_size = curchunk < totalchunks - 1 ? TOAST_MAX_CHUNK_SIZE
: attrsize - ((totalchunks - 1) * TOAST_MAX_CHUNK_SIZE);
if (chunksize != expected_size)
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg_internal("unexpected chunk size %d (expected %d) in chunk %d of %d for toast value %u in %s",
chunksize, expected_size,
curchunk, totalchunks, valueid,
RelationGetRelationName(toastrel))));
/*
* Copy the data into proper place in our result
*/
chcpystrt = 0;
chcpyend = chunksize - 1;
if (curchunk == startchunk)
chcpystrt = sliceoffset % TOAST_MAX_CHUNK_SIZE;
if (curchunk == endchunk)
chcpyend = (sliceoffset + slicelength - 1) % TOAST_MAX_CHUNK_SIZE;
memcpy(VARDATA(result) +
(curchunk * TOAST_MAX_CHUNK_SIZE - sliceoffset) + chcpystrt,
chunkdata + chcpystrt,
(chcpyend - chcpystrt) + 1);
expectedchunk++;
}
/*
* Final checks that we successfully fetched the datum
*/
if (expectedchunk != (endchunk + 1))
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg_internal("missing chunk number %d for toast value %u in %s",
expectedchunk, valueid,
RelationGetRelationName(toastrel))));
/* End scan and close indexes. */
systable_endscan_ordered(toastscan);
toast_close_indexes(toastidxs, num_indexes, AccessShareLock);
}
/* ----------
* toast_decompress_datum -
*
* Decompress a compressed version of a varlena datum
*/
static struct varlena *
toast_decompress_datum(struct varlena *attr)
{
struct varlena *result;
Assert(VARATT_IS_COMPRESSED(attr));
result = (struct varlena *)
palloc(TOAST_COMPRESS_RAWSIZE(attr) + VARHDRSZ);
SET_VARSIZE(result, TOAST_COMPRESS_RAWSIZE(attr) + VARHDRSZ);
if (pglz_decompress(TOAST_COMPRESS_RAWDATA(attr),
TOAST_COMPRESS_SIZE(attr),
VARDATA(result),
TOAST_COMPRESS_RAWSIZE(attr), true) < 0)
elog(ERROR, "compressed data is corrupted");
return result;
}
/* ----------
* toast_decompress_datum_slice -
*
* Decompress the front of a compressed version of a varlena datum.
* offset handling happens in detoast_attr_slice.
* Here we just decompress a slice from the front.
*/
static struct varlena *
toast_decompress_datum_slice(struct varlena *attr, int32 slicelength)
{
struct varlena *result;
int32 rawsize;
Assert(VARATT_IS_COMPRESSED(attr));
result = (struct varlena *) palloc(slicelength + VARHDRSZ);
rawsize = pglz_decompress(TOAST_COMPRESS_RAWDATA(attr),
VARSIZE(attr) - TOAST_COMPRESS_HDRSZ,
VARDATA(result),
slicelength, false);
if (rawsize < 0)
elog(ERROR, "compressed data is corrupted");
SET_VARSIZE(result, rawsize + VARHDRSZ);
return result;
}
/* ----------
* toast_raw_datum_size -
*
* Return the raw (detoasted) size of a varlena datum
* (including the VARHDRSZ header)
* ----------
*/
Size
toast_raw_datum_size(Datum value)
{
struct varlena *attr = (struct varlena *) DatumGetPointer(value);
Size result;
if (VARATT_IS_EXTERNAL_ONDISK(attr))
{
/* va_rawsize is the size of the original datum -- including header */
struct varatt_external toast_pointer;
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
result = toast_pointer.va_rawsize;
}
else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
{
struct varatt_indirect toast_pointer;
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
/* nested indirect Datums aren't allowed */
Assert(!VARATT_IS_EXTERNAL_INDIRECT(toast_pointer.pointer));
return toast_raw_datum_size(PointerGetDatum(toast_pointer.pointer));
}
else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
{
result = EOH_get_flat_size(DatumGetEOHP(value));
}
else if (VARATT_IS_COMPRESSED(attr))
{
/* here, va_rawsize is just the payload size */
result = VARRAWSIZE_4B_C(attr) + VARHDRSZ;
}
else if (VARATT_IS_SHORT(attr))
{
/*
* we have to normalize the header length to VARHDRSZ or else the
* callers of this function will be confused.
*/
result = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT + VARHDRSZ;
}
else
{
/* plain untoasted datum */
result = VARSIZE(attr);
}
return result;
}
/* ----------
* toast_datum_size
*
* Return the physical storage size (possibly compressed) of a varlena datum
* ----------
*/
Size
toast_datum_size(Datum value)
{
struct varlena *attr = (struct varlena *) DatumGetPointer(value);
Size result;
if (VARATT_IS_EXTERNAL_ONDISK(attr))
{
/*
* Attribute is stored externally - return the extsize whether
* compressed or not. We do not count the size of the toast pointer
* ... should we?
*/
struct varatt_external toast_pointer;
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
result = toast_pointer.va_extsize;
}
else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
{
struct varatt_indirect toast_pointer;
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
/* nested indirect Datums aren't allowed */
Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
return toast_datum_size(PointerGetDatum(toast_pointer.pointer));
}
else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
{
result = EOH_get_flat_size(DatumGetEOHP(value));
}
else if (VARATT_IS_SHORT(attr))
{
result = VARSIZE_SHORT(attr);
}
else
{
/*
* Attribute is stored inline either compressed or not, just calculate
* the size of the datum in either case.
*/
result = VARSIZE(attr);
}
return result;
}