postgresql/src/common/compression.c

477 lines
12 KiB
C

/*-------------------------------------------------------------------------
*
* compression.c
*
* Shared code for compression methods and specifications.
*
* A compression specification specifies the parameters that should be used
* when performing compression with a specific algorithm. The simplest
* possible compression specification is an integer, which sets the
* compression level.
*
* Otherwise, a compression specification is a comma-separated list of items,
* each having the form keyword or keyword=value.
*
* Currently, the supported keywords are "level", "long", and "workers".
*
* Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/common/compression.c
*-------------------------------------------------------------------------
*/
#ifndef FRONTEND
#include "postgres.h"
#else
#include "postgres_fe.h"
#endif
#ifdef USE_ZSTD
#include <zstd.h>
#endif
#ifdef HAVE_LIBZ
#include <zlib.h>
#endif
#include "common/compression.h"
static int expect_integer_value(char *keyword, char *value,
pg_compress_specification *result);
static bool expect_boolean_value(char *keyword, char *value,
pg_compress_specification *result);
/*
* Look up a compression algorithm by name. Returns true and sets *algorithm
* if the name is recognized. Otherwise returns false.
*/
bool
parse_compress_algorithm(char *name, pg_compress_algorithm *algorithm)
{
if (strcmp(name, "none") == 0)
*algorithm = PG_COMPRESSION_NONE;
else if (strcmp(name, "gzip") == 0)
*algorithm = PG_COMPRESSION_GZIP;
else if (strcmp(name, "lz4") == 0)
*algorithm = PG_COMPRESSION_LZ4;
else if (strcmp(name, "zstd") == 0)
*algorithm = PG_COMPRESSION_ZSTD;
else
return false;
return true;
}
/*
* Get the human-readable name corresponding to a particular compression
* algorithm.
*/
const char *
get_compress_algorithm_name(pg_compress_algorithm algorithm)
{
switch (algorithm)
{
case PG_COMPRESSION_NONE:
return "none";
case PG_COMPRESSION_GZIP:
return "gzip";
case PG_COMPRESSION_LZ4:
return "lz4";
case PG_COMPRESSION_ZSTD:
return "zstd";
/* no default, to provoke compiler warnings if values are added */
}
Assert(false);
return "???"; /* placate compiler */
}
/*
* Parse a compression specification for a specified algorithm.
*
* See the file header comments for a brief description of what a compression
* specification is expected to look like.
*
* On return, all fields of the result object will be initialized.
* In particular, result->parse_error will be NULL if no errors occurred
* during parsing, and will otherwise contain an appropriate error message.
* The caller may free this error message string using pfree, if desired.
* Note, however, even if there's no parse error, the string might not make
* sense: e.g. for gzip, level=12 is not sensible, but it does parse OK.
*
* The compression level is assigned by default if not directly specified
* by the specification.
*
* Use validate_compress_specification() to find out whether a compression
* specification is semantically sensible.
*/
void
parse_compress_specification(pg_compress_algorithm algorithm, char *specification,
pg_compress_specification *result)
{
int bare_level;
char *bare_level_endp;
/* Initial setup of result object. */
result->algorithm = algorithm;
result->options = 0;
result->parse_error = NULL;
/*
* Assign a default level depending on the compression method. This may
* be enforced later.
*/
switch (result->algorithm)
{
case PG_COMPRESSION_NONE:
result->level = 0;
break;
case PG_COMPRESSION_LZ4:
#ifdef USE_LZ4
result->level = 0; /* fast compression mode */
#else
result->parse_error =
psprintf(_("this build does not support compression with %s"),
"LZ4");
#endif
break;
case PG_COMPRESSION_ZSTD:
#ifdef USE_ZSTD
result->level = ZSTD_CLEVEL_DEFAULT;
#else
result->parse_error =
psprintf(_("this build does not support compression with %s"),
"ZSTD");
#endif
break;
case PG_COMPRESSION_GZIP:
#ifdef HAVE_LIBZ
result->level = Z_DEFAULT_COMPRESSION;
#else
result->parse_error =
psprintf(_("this build does not support compression with %s"),
"gzip");
#endif
break;
}
/* If there is no specification, we're done already. */
if (specification == NULL)
return;
/* As a special case, the specification can be a bare integer. */
bare_level = strtol(specification, &bare_level_endp, 10);
if (specification != bare_level_endp && *bare_level_endp == '\0')
{
result->level = bare_level;
return;
}
/* Look for comma-separated keyword or keyword=value entries. */
while (1)
{
char *kwstart;
char *kwend;
char *vstart;
char *vend;
int kwlen;
int vlen;
bool has_value;
char *keyword;
char *value;
/* Figure start, end, and length of next keyword and any value. */
kwstart = kwend = specification;
while (*kwend != '\0' && *kwend != ',' && *kwend != '=')
++kwend;
kwlen = kwend - kwstart;
if (*kwend != '=')
{
vstart = vend = NULL;
vlen = 0;
has_value = false;
}
else
{
vstart = vend = kwend + 1;
while (*vend != '\0' && *vend != ',')
++vend;
vlen = vend - vstart;
has_value = true;
}
/* Reject empty keyword. */
if (kwlen == 0)
{
result->parse_error =
pstrdup(_("found empty string where a compression option was expected"));
break;
}
/* Extract keyword and value as separate C strings. */
keyword = palloc(kwlen + 1);
memcpy(keyword, kwstart, kwlen);
keyword[kwlen] = '\0';
if (!has_value)
value = NULL;
else
{
value = palloc(vlen + 1);
memcpy(value, vstart, vlen);
value[vlen] = '\0';
}
/* Handle whatever keyword we found. */
if (strcmp(keyword, "level") == 0)
{
result->level = expect_integer_value(keyword, value, result);
/*
* No need to set a flag in "options", there is a default level
* set at least thanks to the logic above.
*/
}
else if (strcmp(keyword, "workers") == 0)
{
result->workers = expect_integer_value(keyword, value, result);
result->options |= PG_COMPRESSION_OPTION_WORKERS;
}
else if (strcmp(keyword, "long") == 0)
{
result->long_distance = expect_boolean_value(keyword, value, result);
result->options |= PG_COMPRESSION_OPTION_LONG_DISTANCE;
}
else
result->parse_error =
psprintf(_("unrecognized compression option: \"%s\""), keyword);
/* Release memory, just to be tidy. */
pfree(keyword);
if (value != NULL)
pfree(value);
/*
* If we got an error or have reached the end of the string, stop.
*
* If there is no value, then the end of the keyword might have been
* the end of the string. If there is a value, then the end of the
* keyword cannot have been the end of the string, but the end of the
* value might have been.
*/
if (result->parse_error != NULL ||
(vend == NULL ? *kwend == '\0' : *vend == '\0'))
break;
/* Advance to next entry and loop around. */
specification = vend == NULL ? kwend + 1 : vend + 1;
}
}
/*
* Parse 'value' as an integer and return the result.
*
* If parsing fails, set result->parse_error to an appropriate message
* and return -1.
*/
static int
expect_integer_value(char *keyword, char *value, pg_compress_specification *result)
{
int ivalue;
char *ivalue_endp;
if (value == NULL)
{
result->parse_error =
psprintf(_("compression option \"%s\" requires a value"),
keyword);
return -1;
}
ivalue = strtol(value, &ivalue_endp, 10);
if (ivalue_endp == value || *ivalue_endp != '\0')
{
result->parse_error =
psprintf(_("value for compression option \"%s\" must be an integer"),
keyword);
return -1;
}
return ivalue;
}
/*
* Parse 'value' as a boolean and return the result.
*
* If parsing fails, set result->parse_error to an appropriate message
* and return -1. The caller must check result->parse_error to determine if
* the call was successful.
*
* Valid values are: yes, no, on, off, 1, 0.
*
* Inspired by ParseVariableBool().
*/
static bool
expect_boolean_value(char *keyword, char *value, pg_compress_specification *result)
{
if (value == NULL)
return true;
if (pg_strcasecmp(value, "yes") == 0)
return true;
if (pg_strcasecmp(value, "on") == 0)
return true;
if (pg_strcasecmp(value, "1") == 0)
return true;
if (pg_strcasecmp(value, "no") == 0)
return false;
if (pg_strcasecmp(value, "off") == 0)
return false;
if (pg_strcasecmp(value, "0") == 0)
return false;
result->parse_error =
psprintf(_("value for compression option \"%s\" must be a Boolean value"),
keyword);
return false;
}
/*
* Returns NULL if the compression specification string was syntactically
* valid and semantically sensible. Otherwise, returns an error message.
*
* Does not test whether this build of PostgreSQL supports the requested
* compression method.
*/
char *
validate_compress_specification(pg_compress_specification *spec)
{
int min_level = 1;
int max_level = 1;
int default_level = 0;
/* If it didn't even parse OK, it's definitely no good. */
if (spec->parse_error != NULL)
return spec->parse_error;
/*
* Check that the algorithm expects a compression level and it is within
* the legal range for the algorithm.
*/
switch (spec->algorithm)
{
case PG_COMPRESSION_GZIP:
max_level = 9;
#ifdef HAVE_LIBZ
default_level = Z_DEFAULT_COMPRESSION;
#endif
break;
case PG_COMPRESSION_LZ4:
max_level = 12;
default_level = 0; /* fast mode */
break;
case PG_COMPRESSION_ZSTD:
#ifdef USE_ZSTD
max_level = ZSTD_maxCLevel();
min_level = ZSTD_minCLevel();
default_level = ZSTD_CLEVEL_DEFAULT;
#endif
break;
case PG_COMPRESSION_NONE:
if (spec->level != 0)
return psprintf(_("compression algorithm \"%s\" does not accept a compression level"),
get_compress_algorithm_name(spec->algorithm));
break;
}
if ((spec->level < min_level || spec->level > max_level) &&
spec->level != default_level)
return psprintf(_("compression algorithm \"%s\" expects a compression level between %d and %d (default at %d)"),
get_compress_algorithm_name(spec->algorithm),
min_level, max_level, default_level);
/*
* Of the compression algorithms that we currently support, only zstd
* allows parallel workers.
*/
if ((spec->options & PG_COMPRESSION_OPTION_WORKERS) != 0 &&
(spec->algorithm != PG_COMPRESSION_ZSTD))
{
return psprintf(_("compression algorithm \"%s\" does not accept a worker count"),
get_compress_algorithm_name(spec->algorithm));
}
/*
* Of the compression algorithms that we currently support, only zstd
* supports long-distance mode.
*/
if ((spec->options & PG_COMPRESSION_OPTION_LONG_DISTANCE) != 0 &&
(spec->algorithm != PG_COMPRESSION_ZSTD))
{
return psprintf(_("compression algorithm \"%s\" does not support long-distance mode"),
get_compress_algorithm_name(spec->algorithm));
}
return NULL;
}
#ifdef FRONTEND
/*
* Basic parsing of a value specified through a command-line option, commonly
* -Z/--compress.
*
* The parsing consists of a METHOD:DETAIL string fed later to
* parse_compress_specification(). This only extracts METHOD and DETAIL.
* If only an integer is found, the method is implied by the value specified.
*/
void
parse_compress_options(const char *option, char **algorithm, char **detail)
{
char *sep;
char *endp;
long result;
/*
* Check whether the compression specification consists of a bare integer.
*
* For backward-compatibility, assume "none" if the integer found is zero
* and "gzip" otherwise.
*/
result = strtol(option, &endp, 10);
if (*endp == '\0')
{
if (result == 0)
{
*algorithm = pstrdup("none");
*detail = NULL;
}
else
{
*algorithm = pstrdup("gzip");
*detail = pstrdup(option);
}
return;
}
/*
* Check whether there is a compression detail following the algorithm
* name.
*/
sep = strchr(option, ':');
if (sep == NULL)
{
*algorithm = pstrdup(option);
*detail = NULL;
}
else
{
char *alg;
alg = palloc((sep - option) + 1);
memcpy(alg, option, sep - option);
alg[sep - option] = '\0';
*algorithm = alg;
*detail = pstrdup(sep + 1);
}
}
#endif /* FRONTEND */