postgresql/src/bin/pg_dump/compress_io.c

718 lines
16 KiB
C

/*-------------------------------------------------------------------------
*
* compress_io.c
* Routines for archivers to write an uncompressed or compressed data
* stream.
*
* Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* This file includes two APIs for dealing with compressed data. The first
* provides more flexibility, using callbacks to read/write data from the
* underlying stream. The second API is a wrapper around fopen/gzopen and
* friends, providing an interface similar to those, but abstracts away
* the possible compression. Both APIs use libz for the compression, but
* the second API uses gzip headers, so the resulting files can be easily
* manipulated with the gzip utility.
*
* Compressor API
* --------------
*
* The interface for writing to an archive consists of three functions:
* AllocateCompressor, WriteDataToArchive and EndCompressor. First you call
* AllocateCompressor, then write all the data by calling WriteDataToArchive
* as many times as needed, and finally EndCompressor. WriteDataToArchive
* and EndCompressor will call the WriteFunc that was provided to
* AllocateCompressor for each chunk of compressed data.
*
* The interface for reading an archive consists of just one function:
* ReadDataFromArchive. ReadDataFromArchive reads the whole compressed input
* stream, by repeatedly calling the given ReadFunc. ReadFunc returns the
* compressed data chunk at a time, and ReadDataFromArchive decompresses it
* and passes the decompressed data to ahwrite(), until ReadFunc returns 0
* to signal EOF.
*
* The interface is the same for compressed and uncompressed streams.
*
* Compressed stream API
* ----------------------
*
* The compressed stream API is a wrapper around the C standard fopen() and
* libz's gzopen() APIs. It allows you to use the same functions for
* compressed and uncompressed streams. cfopen_read() first tries to open
* the file with given name, and if it fails, it tries to open the same
* file with the .gz suffix. cfopen_write() opens a file for writing, an
* extra argument specifies if the file should be compressed, and adds the
* .gz suffix to the filename if so. This allows you to easily handle both
* compressed and uncompressed files.
*
* IDENTIFICATION
* src/bin/pg_dump/compress_io.c
*
*-------------------------------------------------------------------------
*/
#include "postgres_fe.h"
#include "compress_io.h"
#include "pg_backup_utils.h"
/*----------------------
* Compressor API
*----------------------
*/
/* typedef appears in compress_io.h */
struct CompressorState
{
CompressionAlgorithm comprAlg;
WriteFunc writeF;
#ifdef HAVE_LIBZ
z_streamp zp;
char *zlibOut;
size_t zlibOutSize;
#endif
};
static void ParseCompressionOption(int compression, CompressionAlgorithm *alg,
int *level);
/* Routines that support zlib compressed data I/O */
#ifdef HAVE_LIBZ
static void InitCompressorZlib(CompressorState *cs, int level);
static void DeflateCompressorZlib(ArchiveHandle *AH, CompressorState *cs,
bool flush);
static void ReadDataFromArchiveZlib(ArchiveHandle *AH, ReadFunc readF);
static void WriteDataToArchiveZlib(ArchiveHandle *AH, CompressorState *cs,
const char *data, size_t dLen);
static void EndCompressorZlib(ArchiveHandle *AH, CompressorState *cs);
#endif
/* Routines that support uncompressed data I/O */
static void ReadDataFromArchiveNone(ArchiveHandle *AH, ReadFunc readF);
static void WriteDataToArchiveNone(ArchiveHandle *AH, CompressorState *cs,
const char *data, size_t dLen);
/*
* Interprets a numeric 'compression' value. The algorithm implied by the
* value (zlib or none at the moment), is returned in *alg, and the
* zlib compression level in *level.
*/
static void
ParseCompressionOption(int compression, CompressionAlgorithm *alg, int *level)
{
if (compression == Z_DEFAULT_COMPRESSION ||
(compression > 0 && compression <= 9))
*alg = COMPR_ALG_LIBZ;
else if (compression == 0)
*alg = COMPR_ALG_NONE;
else
{
fatal("invalid compression code: %d", compression);
*alg = COMPR_ALG_NONE; /* keep compiler quiet */
}
/* The level is just the passed-in value. */
if (level)
*level = compression;
}
/* Public interface routines */
/* Allocate a new compressor */
CompressorState *
AllocateCompressor(int compression, WriteFunc writeF)
{
CompressorState *cs;
CompressionAlgorithm alg;
int level;
ParseCompressionOption(compression, &alg, &level);
#ifndef HAVE_LIBZ
if (alg == COMPR_ALG_LIBZ)
fatal("not built with zlib support");
#endif
cs = (CompressorState *) pg_malloc0(sizeof(CompressorState));
cs->writeF = writeF;
cs->comprAlg = alg;
/*
* Perform compression algorithm specific initialization.
*/
#ifdef HAVE_LIBZ
if (alg == COMPR_ALG_LIBZ)
InitCompressorZlib(cs, level);
#endif
return cs;
}
/*
* Read all compressed data from the input stream (via readF) and print it
* out with ahwrite().
*/
void
ReadDataFromArchive(ArchiveHandle *AH, int compression, ReadFunc readF)
{
CompressionAlgorithm alg;
ParseCompressionOption(compression, &alg, NULL);
if (alg == COMPR_ALG_NONE)
ReadDataFromArchiveNone(AH, readF);
if (alg == COMPR_ALG_LIBZ)
{
#ifdef HAVE_LIBZ
ReadDataFromArchiveZlib(AH, readF);
#else
fatal("not built with zlib support");
#endif
}
}
/*
* Compress and write data to the output stream (via writeF).
*/
void
WriteDataToArchive(ArchiveHandle *AH, CompressorState *cs,
const void *data, size_t dLen)
{
switch (cs->comprAlg)
{
case COMPR_ALG_LIBZ:
#ifdef HAVE_LIBZ
WriteDataToArchiveZlib(AH, cs, data, dLen);
#else
fatal("not built with zlib support");
#endif
break;
case COMPR_ALG_NONE:
WriteDataToArchiveNone(AH, cs, data, dLen);
break;
}
}
/*
* Terminate compression library context and flush its buffers.
*/
void
EndCompressor(ArchiveHandle *AH, CompressorState *cs)
{
#ifdef HAVE_LIBZ
if (cs->comprAlg == COMPR_ALG_LIBZ)
EndCompressorZlib(AH, cs);
#endif
free(cs);
}
/* Private routines, specific to each compression method. */
#ifdef HAVE_LIBZ
/*
* Functions for zlib compressed output.
*/
static void
InitCompressorZlib(CompressorState *cs, int level)
{
z_streamp zp;
zp = cs->zp = (z_streamp) pg_malloc(sizeof(z_stream));
zp->zalloc = Z_NULL;
zp->zfree = Z_NULL;
zp->opaque = Z_NULL;
/*
* zlibOutSize is the buffer size we tell zlib it can output to. We
* actually allocate one extra byte because some routines want to append a
* trailing zero byte to the zlib output.
*/
cs->zlibOut = (char *) pg_malloc(ZLIB_OUT_SIZE + 1);
cs->zlibOutSize = ZLIB_OUT_SIZE;
if (deflateInit(zp, level) != Z_OK)
fatal("could not initialize compression library: %s",
zp->msg);
/* Just be paranoid - maybe End is called after Start, with no Write */
zp->next_out = (void *) cs->zlibOut;
zp->avail_out = cs->zlibOutSize;
}
static void
EndCompressorZlib(ArchiveHandle *AH, CompressorState *cs)
{
z_streamp zp = cs->zp;
zp->next_in = NULL;
zp->avail_in = 0;
/* Flush any remaining data from zlib buffer */
DeflateCompressorZlib(AH, cs, true);
if (deflateEnd(zp) != Z_OK)
fatal("could not close compression stream: %s", zp->msg);
free(cs->zlibOut);
free(cs->zp);
}
static void
DeflateCompressorZlib(ArchiveHandle *AH, CompressorState *cs, bool flush)
{
z_streamp zp = cs->zp;
char *out = cs->zlibOut;
int res = Z_OK;
while (cs->zp->avail_in != 0 || flush)
{
res = deflate(zp, flush ? Z_FINISH : Z_NO_FLUSH);
if (res == Z_STREAM_ERROR)
fatal("could not compress data: %s", zp->msg);
if ((flush && (zp->avail_out < cs->zlibOutSize))
|| (zp->avail_out == 0)
|| (zp->avail_in != 0)
)
{
/*
* Extra paranoia: avoid zero-length chunks, since a zero length
* chunk is the EOF marker in the custom format. This should never
* happen but...
*/
if (zp->avail_out < cs->zlibOutSize)
{
/*
* Any write function should do its own error checking but to
* make sure we do a check here as well...
*/
size_t len = cs->zlibOutSize - zp->avail_out;
cs->writeF(AH, out, len);
}
zp->next_out = (void *) out;
zp->avail_out = cs->zlibOutSize;
}
if (res == Z_STREAM_END)
break;
}
}
static void
WriteDataToArchiveZlib(ArchiveHandle *AH, CompressorState *cs,
const char *data, size_t dLen)
{
cs->zp->next_in = (void *) unconstify(char *, data);
cs->zp->avail_in = dLen;
DeflateCompressorZlib(AH, cs, false);
}
static void
ReadDataFromArchiveZlib(ArchiveHandle *AH, ReadFunc readF)
{
z_streamp zp;
char *out;
int res = Z_OK;
size_t cnt;
char *buf;
size_t buflen;
zp = (z_streamp) pg_malloc(sizeof(z_stream));
zp->zalloc = Z_NULL;
zp->zfree = Z_NULL;
zp->opaque = Z_NULL;
buf = pg_malloc(ZLIB_IN_SIZE);
buflen = ZLIB_IN_SIZE;
out = pg_malloc(ZLIB_OUT_SIZE + 1);
if (inflateInit(zp) != Z_OK)
fatal("could not initialize compression library: %s",
zp->msg);
/* no minimal chunk size for zlib */
while ((cnt = readF(AH, &buf, &buflen)))
{
zp->next_in = (void *) buf;
zp->avail_in = cnt;
while (zp->avail_in > 0)
{
zp->next_out = (void *) out;
zp->avail_out = ZLIB_OUT_SIZE;
res = inflate(zp, 0);
if (res != Z_OK && res != Z_STREAM_END)
fatal("could not uncompress data: %s", zp->msg);
out[ZLIB_OUT_SIZE - zp->avail_out] = '\0';
ahwrite(out, 1, ZLIB_OUT_SIZE - zp->avail_out, AH);
}
}
zp->next_in = NULL;
zp->avail_in = 0;
while (res != Z_STREAM_END)
{
zp->next_out = (void *) out;
zp->avail_out = ZLIB_OUT_SIZE;
res = inflate(zp, 0);
if (res != Z_OK && res != Z_STREAM_END)
fatal("could not uncompress data: %s", zp->msg);
out[ZLIB_OUT_SIZE - zp->avail_out] = '\0';
ahwrite(out, 1, ZLIB_OUT_SIZE - zp->avail_out, AH);
}
if (inflateEnd(zp) != Z_OK)
fatal("could not close compression library: %s", zp->msg);
free(buf);
free(out);
free(zp);
}
#endif /* HAVE_LIBZ */
/*
* Functions for uncompressed output.
*/
static void
ReadDataFromArchiveNone(ArchiveHandle *AH, ReadFunc readF)
{
size_t cnt;
char *buf;
size_t buflen;
buf = pg_malloc(ZLIB_OUT_SIZE);
buflen = ZLIB_OUT_SIZE;
while ((cnt = readF(AH, &buf, &buflen)))
{
ahwrite(buf, 1, cnt, AH);
}
free(buf);
}
static void
WriteDataToArchiveNone(ArchiveHandle *AH, CompressorState *cs,
const char *data, size_t dLen)
{
cs->writeF(AH, data, dLen);
}
/*----------------------
* Compressed stream API
*----------------------
*/
/*
* cfp represents an open stream, wrapping the underlying FILE or gzFile
* pointer. This is opaque to the callers.
*/
struct cfp
{
FILE *uncompressedfp;
#ifdef HAVE_LIBZ
gzFile compressedfp;
#endif
};
#ifdef HAVE_LIBZ
static int hasSuffix(const char *filename, const char *suffix);
#endif
/* free() without changing errno; useful in several places below */
static void
free_keep_errno(void *p)
{
int save_errno = errno;
free(p);
errno = save_errno;
}
/*
* Open a file for reading. 'path' is the file to open, and 'mode' should
* be either "r" or "rb".
*
* If the file at 'path' does not exist, we append the ".gz" suffix (if 'path'
* doesn't already have it) and try again. So if you pass "foo" as 'path',
* this will open either "foo" or "foo.gz".
*
* On failure, return NULL with an error code in errno.
*/
cfp *
cfopen_read(const char *path, const char *mode)
{
cfp *fp;
#ifdef HAVE_LIBZ
if (hasSuffix(path, ".gz"))
fp = cfopen(path, mode, 1);
else
#endif
{
fp = cfopen(path, mode, 0);
#ifdef HAVE_LIBZ
if (fp == NULL)
{
char *fname;
fname = psprintf("%s.gz", path);
fp = cfopen(fname, mode, 1);
free_keep_errno(fname);
}
#endif
}
return fp;
}
/*
* Open a file for writing. 'path' indicates the path name, and 'mode' must
* be a filemode as accepted by fopen() and gzopen() that indicates writing
* ("w", "wb", "a", or "ab").
*
* If 'compression' is non-zero, a gzip compressed stream is opened, and
* 'compression' indicates the compression level used. The ".gz" suffix
* is automatically added to 'path' in that case.
*
* On failure, return NULL with an error code in errno.
*/
cfp *
cfopen_write(const char *path, const char *mode, int compression)
{
cfp *fp;
if (compression == 0)
fp = cfopen(path, mode, 0);
else
{
#ifdef HAVE_LIBZ
char *fname;
fname = psprintf("%s.gz", path);
fp = cfopen(fname, mode, compression);
free_keep_errno(fname);
#else
fatal("not built with zlib support");
fp = NULL; /* keep compiler quiet */
#endif
}
return fp;
}
/*
* Opens file 'path' in 'mode'. If 'compression' is non-zero, the file
* is opened with libz gzopen(), otherwise with plain fopen().
*
* On failure, return NULL with an error code in errno.
*/
cfp *
cfopen(const char *path, const char *mode, int compression)
{
cfp *fp = pg_malloc(sizeof(cfp));
if (compression != 0)
{
#ifdef HAVE_LIBZ
if (compression != Z_DEFAULT_COMPRESSION)
{
/* user has specified a compression level, so tell zlib to use it */
char mode_compression[32];
snprintf(mode_compression, sizeof(mode_compression), "%s%d",
mode, compression);
fp->compressedfp = gzopen(path, mode_compression);
}
else
{
/* don't specify a level, just use the zlib default */
fp->compressedfp = gzopen(path, mode);
}
fp->uncompressedfp = NULL;
if (fp->compressedfp == NULL)
{
free_keep_errno(fp);
fp = NULL;
}
#else
fatal("not built with zlib support");
#endif
}
else
{
#ifdef HAVE_LIBZ
fp->compressedfp = NULL;
#endif
fp->uncompressedfp = fopen(path, mode);
if (fp->uncompressedfp == NULL)
{
free_keep_errno(fp);
fp = NULL;
}
}
return fp;
}
int
cfread(void *ptr, int size, cfp *fp)
{
int ret;
if (size == 0)
return 0;
#ifdef HAVE_LIBZ
if (fp->compressedfp)
{
ret = gzread(fp->compressedfp, ptr, size);
if (ret != size && !gzeof(fp->compressedfp))
{
int errnum;
const char *errmsg = gzerror(fp->compressedfp, &errnum);
fatal("could not read from input file: %s",
errnum == Z_ERRNO ? strerror(errno) : errmsg);
}
}
else
#endif
{
ret = fread(ptr, 1, size, fp->uncompressedfp);
if (ret != size && !feof(fp->uncompressedfp))
READ_ERROR_EXIT(fp->uncompressedfp);
}
return ret;
}
int
cfwrite(const void *ptr, int size, cfp *fp)
{
#ifdef HAVE_LIBZ
if (fp->compressedfp)
return gzwrite(fp->compressedfp, ptr, size);
else
#endif
return fwrite(ptr, 1, size, fp->uncompressedfp);
}
int
cfgetc(cfp *fp)
{
int ret;
#ifdef HAVE_LIBZ
if (fp->compressedfp)
{
ret = gzgetc(fp->compressedfp);
if (ret == EOF)
{
if (!gzeof(fp->compressedfp))
fatal("could not read from input file: %s", strerror(errno));
else
fatal("could not read from input file: end of file");
}
}
else
#endif
{
ret = fgetc(fp->uncompressedfp);
if (ret == EOF)
READ_ERROR_EXIT(fp->uncompressedfp);
}
return ret;
}
char *
cfgets(cfp *fp, char *buf, int len)
{
#ifdef HAVE_LIBZ
if (fp->compressedfp)
return gzgets(fp->compressedfp, buf, len);
else
#endif
return fgets(buf, len, fp->uncompressedfp);
}
int
cfclose(cfp *fp)
{
int result;
if (fp == NULL)
{
errno = EBADF;
return EOF;
}
#ifdef HAVE_LIBZ
if (fp->compressedfp)
{
result = gzclose(fp->compressedfp);
fp->compressedfp = NULL;
}
else
#endif
{
result = fclose(fp->uncompressedfp);
fp->uncompressedfp = NULL;
}
free_keep_errno(fp);
return result;
}
int
cfeof(cfp *fp)
{
#ifdef HAVE_LIBZ
if (fp->compressedfp)
return gzeof(fp->compressedfp);
else
#endif
return feof(fp->uncompressedfp);
}
const char *
get_cfp_error(cfp *fp)
{
#ifdef HAVE_LIBZ
if (fp->compressedfp)
{
int errnum;
const char *errmsg = gzerror(fp->compressedfp, &errnum);
if (errnum != Z_ERRNO)
return errmsg;
}
#endif
return strerror(errno);
}
#ifdef HAVE_LIBZ
static int
hasSuffix(const char *filename, const char *suffix)
{
int filenamelen = strlen(filename);
int suffixlen = strlen(suffix);
if (filenamelen < suffixlen)
return 0;
return memcmp(&filename[filenamelen - suffixlen],
suffix,
suffixlen) == 0;
}
#endif