Add new pg_walsummary tool.

This can dump the contents of the WAL summary files found in
pg_wal/summaries. Normally, this shouldn't really be something anyone
needs to do, but it may be needed for debugging problems with
incremental backup, or could possibly be useful to external tools.

Discussion: http://postgr.es/m/CA+Tgmobvqqj-DW9F7uUzT-cQqs6wcVb-Xhs=w=hzJnXSE-kRGw@mail.gmail.com
This commit is contained in:
Robert Haas 2024-01-11 12:47:28 -05:00
parent d9ef650fca
commit ee1bfd1683
13 changed files with 600 additions and 0 deletions

View File

@ -219,6 +219,7 @@ Complete list of usable sgml source files in this directory.
<!ENTITY pgtesttiming SYSTEM "pgtesttiming.sgml">
<!ENTITY pgupgrade SYSTEM "pgupgrade.sgml">
<!ENTITY pgwaldump SYSTEM "pg_waldump.sgml">
<!ENTITY pgwalsummary SYSTEM "pg_walsummary.sgml">
<!ENTITY postgres SYSTEM "postgres-ref.sgml">
<!ENTITY psqlRef SYSTEM "psql-ref.sgml">
<!ENTITY reindexdb SYSTEM "reindexdb.sgml">

View File

@ -0,0 +1,122 @@
<!--
doc/src/sgml/ref/pg_walsummary.sgml
PostgreSQL documentation
-->
<refentry id="app-pgwalsummary">
<indexterm zone="app-pgwalsummary">
<primary>pg_walsummary</primary>
</indexterm>
<refmeta>
<refentrytitle><application>pg_walsummary</application></refentrytitle>
<manvolnum>1</manvolnum>
<refmiscinfo>Application</refmiscinfo>
</refmeta>
<refnamediv>
<refname>pg_walsummary</refname>
<refpurpose>print contents of WAL summary files</refpurpose>
</refnamediv>
<refsynopsisdiv>
<cmdsynopsis>
<command>pg_walsummary</command>
<arg rep="repeat" choice="opt"><replaceable>option</replaceable></arg>
<arg rep="repeat"><replaceable>file</replaceable></arg>
</cmdsynopsis>
</refsynopsisdiv>
<refsect1>
<title>Description</title>
<para>
<application>pg_walsummary</application> is used to print the contents of
WAL summary files. These binary files are found with the
<literal>pg_wal/summaries</literal> subdirectory of the data directory,
and can be converted to text using this tool. This is not ordinarily
necessary, since WAL summary files primarily exist to support
<link linkend="backup-incremental-backup">incremental backup</link>,
but it may be useful for debugging purposes.
</para>
<para>
A WAL summary file is indexed by tablespace OID, relation OID, and relation
fork. For each relation fork, it stores the list of blocks that were
modified by WAL within the range summarized in the file. It can also
store a "limit block," which is 0 if the relation fork was created or
truncated within the relevant WAL range, and otherwise the shortest length
to which the relation fork was truncated. If the relation fork was not
created, deleted, or truncated within the relevant WAL range, the limit
block is undefined or infinite and will not be printed by this tool.
</para>
</refsect1>
<refsect1>
<title>Options</title>
<para>
<variablelist>
<varlistentry>
<term><option>-i</option></term>
<term><option>--indivudual</option></term>
<listitem>
<para>
By default, <literal>pg_walsummary</literal> prints one line of output
for each range of one or more consecutive modified blocks. This can
make the output a lot briefer, since a relation where all blocks from
0 through 999 were modified will produce only one line of output rather
than 1000 separate lines. This option requests a separate line of
output for every modified block.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>-q</option></term>
<term><option>--quiet</option></term>
<listitem>
<para>
Do not print any output, except for errors. This can be useful
when you want to know whether a WAL summary file can be successfully
parsed but don't care about the contents.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>-?</option></term>
<term><option>--help</option></term>
<listitem>
<para>
Shows help about <application>pg_walsummary</application> command line
arguments, and exits.
</para>
</listitem>
</varlistentry>
</variablelist>
</para>
</refsect1>
<refsect1>
<title>Environment</title>
<para>
The environment variable <envar>PG_COLOR</envar> specifies whether to use
color in diagnostic messages. Possible values are
<literal>always</literal>, <literal>auto</literal> and
<literal>never</literal>.
</para>
</refsect1>
<refsect1>
<title>See Also</title>
<simplelist type="inline">
<member><xref linkend="app-pgbasebackup"/></member>
<member><xref linkend="app-pgcombinebackup"/></member>
</simplelist>
</refsect1>
</refentry>

View File

@ -289,6 +289,7 @@
&pgtesttiming;
&pgupgrade;
&pgwaldump;
&pgwalsummary;
&postgres;
</reference>

View File

@ -31,6 +31,7 @@ SUBDIRS = \
pg_upgrade \
pg_verifybackup \
pg_waldump \
pg_walsummary \
pgbench \
psql \
scripts

View File

@ -17,6 +17,7 @@ subdir('pg_test_timing')
subdir('pg_upgrade')
subdir('pg_verifybackup')
subdir('pg_waldump')
subdir('pg_walsummary')
subdir('pgbench')
subdir('pgevent')
subdir('psql')

1
src/bin/pg_walsummary/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
pg_walsummary

View File

@ -0,0 +1,48 @@
#-------------------------------------------------------------------------
#
# Makefile for src/bin/pg_walsummary
#
# Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
# Portions Copyright (c) 1994, Regents of the University of California
#
# src/bin/pg_walsummary/Makefile
#
#-------------------------------------------------------------------------
PGFILEDESC = "pg_walsummary - print contents of WAL summary files"
PGAPPICON=win32
subdir = src/bin/pg_walsummary
top_builddir = ../../..
include $(top_builddir)/src/Makefile.global
override CPPFLAGS := -I$(libpq_srcdir) $(CPPFLAGS)
LDFLAGS_INTERNAL += -L$(top_builddir)/src/fe_utils -lpgfeutils
OBJS = \
$(WIN32RES) \
pg_walsummary.o
all: pg_walsummary
pg_walsummary: $(OBJS) | submake-libpgport submake-libpgfeutils
$(CC) $(CFLAGS) $^ $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X)
install: all installdirs
$(INSTALL_PROGRAM) pg_walsummary$(X) '$(DESTDIR)$(bindir)/pg_walsummary$(X)'
installdirs:
$(MKDIR_P) '$(DESTDIR)$(bindir)'
uninstall:
rm -f '$(DESTDIR)$(bindir)/pg_walsummary$(X)'
clean distclean maintainer-clean:
rm -f pg_walsummary$(X) $(OBJS)
check:
$(prove_check)
installcheck:
$(prove_installcheck)

View File

@ -0,0 +1,30 @@
# Copyright (c) 2022-2023, PostgreSQL Global Development Group
pg_walsummary_sources = files(
'pg_walsummary.c',
)
if host_system == 'windows'
pg_walsummary_sources += rc_bin_gen.process(win32ver_rc, extra_args: [
'--NAME', 'pg_walsummary',
'--FILEDESC', 'pg_walsummary - print contents of WAL summary files',])
endif
pg_walsummary = executable('pg_walsummary',
pg_walsummary_sources,
dependencies: [frontend_code],
kwargs: default_bin_args,
)
bin_targets += pg_walsummary
tests += {
'name': 'pg_walsummary',
'sd': meson.current_source_dir(),
'bd': meson.current_build_dir(),
'tap': {
'tests': [
't/001_basic.pl',
't/002_blocks.pl',
],
}
}

View File

@ -0,0 +1,6 @@
# src/bin/pg_combinebackup/nls.mk
CATALOG_NAME = pg_walsummary
GETTEXT_FILES = $(FRONTEND_COMMON_GETTEXT_FILES) \
pg_walsummary.c
GETTEXT_TRIGGERS = $(FRONTEND_COMMON_GETTEXT_TRIGGERS)
GETTEXT_FLAGS = $(FRONTEND_COMMON_GETTEXT_FLAGS)

View File

@ -0,0 +1,280 @@
/*-------------------------------------------------------------------------
*
* pg_walsummary.c
* Prints the contents of WAL summary files.
*
* Copyright (c) 2017-2023, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/bin/pg_walsummary/pg_walsummary.c
*
*-------------------------------------------------------------------------
*/
#include "postgres_fe.h"
#include <fcntl.h>
#include <limits.h>
#include "common/blkreftable.h"
#include "common/logging.h"
#include "fe_utils/option_utils.h"
#include "lib/stringinfo.h"
#include "getopt_long.h"
typedef struct ws_options
{
bool individual;
bool quiet;
} ws_options;
typedef struct ws_file_info
{
int fd;
char *filename;
} ws_file_info;
static BlockNumber *block_buffer = NULL;
static unsigned block_buffer_size = 512; /* Initial size. */
static void dump_one_relation(ws_options *opt, RelFileLocator *rlocator,
ForkNumber forknum, BlockNumber limit_block,
BlockRefTableReader *reader);
static void help(const char *progname);
static int compare_block_numbers(const void *a, const void *b);
static int walsummary_read_callback(void *callback_arg, void *data,
int length);
static void walsummary_error_callback(void *callback_arg, char *fmt,...) pg_attribute_printf(2, 3);
/*
* Main program.
*/
int
main(int argc, char *argv[])
{
static struct option long_options[] = {
{"individual", no_argument, NULL, 'i'},
{"quiet", no_argument, NULL, 'q'},
{NULL, 0, NULL, 0}
};
const char *progname;
int optindex;
int c;
ws_options opt;
memset(&opt, 0, sizeof(ws_options));
pg_logging_init(argv[0]);
progname = get_progname(argv[0]);
handle_help_version_opts(argc, argv, progname, help);
/* process command-line options */
while ((c = getopt_long(argc, argv, "f:iqw:",
long_options, &optindex)) != -1)
{
switch (c)
{
case 'i':
opt.individual = true;
break;
case 'q':
opt.quiet = true;
break;
default:
/* getopt_long already emitted a complaint */
pg_log_error_hint("Try \"%s --help\" for more information.", progname);
exit(1);
}
}
if (optind >= argc)
{
pg_log_error("%s: no input files specified", progname);
pg_log_error_hint("Try \"%s --help\" for more information.", progname);
exit(1);
}
while (optind < argc)
{
ws_file_info ws;
BlockRefTableReader *reader;
RelFileLocator rlocator;
ForkNumber forknum;
BlockNumber limit_block;
ws.filename = argv[optind++];
if ((ws.fd = open(ws.filename, O_RDONLY | PG_BINARY, 0)) < 0)
pg_fatal("could not open file \"%s\": %m", ws.filename);
reader = CreateBlockRefTableReader(walsummary_read_callback, &ws,
ws.filename,
walsummary_error_callback, NULL);
while (BlockRefTableReaderNextRelation(reader, &rlocator, &forknum,
&limit_block))
dump_one_relation(&opt, &rlocator, forknum, limit_block, reader);
DestroyBlockRefTableReader(reader);
close(ws.fd);
}
exit(0);
}
/*
* Dump details for one relation.
*/
static void
dump_one_relation(ws_options *opt, RelFileLocator *rlocator,
ForkNumber forknum, BlockNumber limit_block,
BlockRefTableReader *reader)
{
unsigned i = 0;
unsigned nblocks;
BlockNumber startblock = InvalidBlockNumber;
BlockNumber endblock = InvalidBlockNumber;
/* Dump limit block, if any. */
if (limit_block != InvalidBlockNumber)
printf("TS %u, DB %u, REL %u, FORK %s: limit %u\n",
rlocator->spcOid, rlocator->dbOid, rlocator->relNumber,
forkNames[forknum], limit_block);
/* If we haven't allocated a block buffer yet, do that now. */
if (block_buffer == NULL)
block_buffer = palloc_array(BlockNumber, block_buffer_size);
/* Try to fill the block buffer. */
nblocks = BlockRefTableReaderGetBlocks(reader,
block_buffer,
block_buffer_size);
/* If we filled the block buffer completely, we must enlarge it. */
while (nblocks >= block_buffer_size)
{
unsigned new_size;
/* Double the size, being careful about overflow. */
new_size = block_buffer_size * 2;
if (new_size < block_buffer_size)
new_size = PG_UINT32_MAX;
block_buffer = repalloc_array(block_buffer, BlockNumber, new_size);
/* Try to fill the newly-allocated space. */
nblocks +=
BlockRefTableReaderGetBlocks(reader,
block_buffer + block_buffer_size,
new_size - block_buffer_size);
/* Save the new size for later calls. */
block_buffer_size = new_size;
}
/* If we don't need to produce any output, skip the rest of this. */
if (opt->quiet)
return;
/*
* Sort the returned block numbers. If the block reference table was using
* the bitmap representation for a given chunk, the block numbers in that
* chunk will already be sorted, but when the array-of-offsets
* representation is used, we can receive block numbers here out of order.
*/
qsort(block_buffer, nblocks, sizeof(BlockNumber), compare_block_numbers);
/* Dump block references. */
while (i < nblocks)
{
/*
* Find the next range of blocks to print, but if --individual was
* specified, then consider each block a separate range.
*/
startblock = endblock = block_buffer[i++];
if (!opt->individual)
{
while (i < nblocks && block_buffer[i] == endblock + 1)
{
endblock++;
i++;
}
}
/* Format this range of block numbers as a string. */
if (startblock == endblock)
printf("TS %u, DB %u, REL %u, FORK %s: block %u\n",
rlocator->spcOid, rlocator->dbOid, rlocator->relNumber,
forkNames[forknum], startblock);
else
printf("TS %u, DB %u, REL %u, FORK %s: blocks %u..%u\n",
rlocator->spcOid, rlocator->dbOid, rlocator->relNumber,
forkNames[forknum], startblock, endblock);
}
}
/*
* Quicksort comparator for block numbers.
*/
static int
compare_block_numbers(const void *a, const void *b)
{
BlockNumber aa = *(BlockNumber *) a;
BlockNumber bb = *(BlockNumber *) b;
if (aa > bb)
return 1;
else if (aa == bb)
return 0;
else
return -1;
}
/*
* Error callback.
*/
void
walsummary_error_callback(void *callback_arg, char *fmt,...)
{
va_list ap;
va_start(ap, fmt);
pg_log_generic_v(PG_LOG_ERROR, PG_LOG_PRIMARY, fmt, ap);
va_end(ap);
exit(1);
}
/*
* Read callback.
*/
int
walsummary_read_callback(void *callback_arg, void *data, int length)
{
ws_file_info *ws = callback_arg;
int rc;
if ((rc = read(ws->fd, data, length)) < 0)
pg_fatal("could not read file \"%s\": %m", ws->filename);
return rc;
}
/*
* help
*
* Prints help page for the program
*
* progname: the name of the executed program, such as "pg_walsummary"
*/
static void
help(const char *progname)
{
printf(_("%s prints the contents of a WAL summary file.\n\n"), progname);
printf(_("Usage:\n"));
printf(_(" %s [OPTION]... FILE...\n"), progname);
printf(_("\nOptions:\n"));
printf(_(" -i, --individual list block numbers individually, not as ranges\n"));
printf(_(" -q, --quiet don't print anything, just parse the files\n"));
printf(_(" -?, --help show this help, then exit\n"));
printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL);
}

View File

@ -0,0 +1,19 @@
# Copyright (c) 2021-2023, PostgreSQL Global Development Group
use strict;
use warnings;
use PostgreSQL::Test::Utils;
use Test::More;
my $tempdir = PostgreSQL::Test::Utils::tempdir;
program_help_ok('pg_walsummary');
program_version_ok('pg_walsummary');
program_options_handling_ok('pg_walsummary');
command_fails_like(
['pg_walsummary'],
qr/no input files specified/,
'input files must be specified');
done_testing();

View File

@ -0,0 +1,88 @@
# Copyright (c) 2021-2023, PostgreSQL Global Development Group
use strict;
use warnings;
use File::Compare;
use PostgreSQL::Test::Cluster;
use PostgreSQL::Test::Utils;
use Test::More;
# Set up a new database instance.
my $node1 = PostgreSQL::Test::Cluster->new('node1');
$node1->init(has_archiving => 1, allows_streaming => 1);
$node1->append_conf('postgresql.conf', 'summarize_wal = on');
$node1->start;
# See what's been summarized up until now.
my $progress = $node1->safe_psql('postgres', <<EOM);
SELECT summarized_tli, summarized_lsn FROM pg_get_wal_summarizer_state()
EOM
my ($summarized_tli, $summarized_lsn) = split(/\|/, $progress);
note("before insert, summarized TLI $summarized_tli through $summarized_lsn");
# Create a table and insert a few test rows into it. VACUUM FREEZE it so that
# autovacuum doesn't induce any future modifications unexpectedly. Then
# trigger a checkpoint.
$node1->safe_psql('postgres', <<EOM);
CREATE TABLE mytable (a int, b text);
INSERT INTO mytable
SELECT
g, random()::text||random()::text||random()::text||random()::text
FROM
generate_series(1, 400) g;
VACUUM FREEZE;
CHECKPOINT;
EOM
# Wait for a new summary to show up.
$node1->poll_query_until('postgres', <<EOM);
SELECT EXISTS (
SELECT * from pg_available_wal_summaries()
WHERE tli = $summarized_tli AND end_lsn > '$summarized_lsn'
)
EOM
# Again check the progress of WAL summarization.
$progress = $node1->safe_psql('postgres', <<EOM);
SELECT summarized_tli, summarized_lsn FROM pg_get_wal_summarizer_state()
EOM
($summarized_tli, $summarized_lsn) = split(/\|/, $progress);
note("after insert, summarized TLI $summarized_tli through $summarized_lsn");
# Update a row in the first block of the table and trigger a checkpoint.
$node1->safe_psql('postgres', <<EOM);
UPDATE mytable SET b = 'abcdefghijklmnopqrstuvwxyz' WHERE a = 2;
CHECKPOINT;
EOM
# Again wait for a new summary to show up.
$node1->poll_query_until('postgres', <<EOM);
SELECT EXISTS (
SELECT * from pg_available_wal_summaries()
WHERE tli = $summarized_tli AND end_lsn > '$summarized_lsn'
)
EOM
# Figure out the exact details for the new sumamry file.
my $details = $node1->safe_psql('postgres', <<EOM);
SELECT tli, start_lsn, end_lsn from pg_available_wal_summaries()
WHERE tli = $summarized_tli AND end_lsn > '$summarized_lsn'
EOM
my ($tli, $start_lsn, $end_lsn) = split(/\|/, $details);
note("examining summary for TLI $tli from $start_lsn to $end_lsn");
# Reconstruct the full pathname for the WAL summary file.
my $filename = sprintf "%s/pg_wal/summaries/%08s%08s%08s%08s%08s.summary",
$node1->data_dir, $tli,
split(m@/@, $start_lsn),
split(m@/@, $end_lsn);
ok(-f $filename, "WAL summary file exists");
# Run pg_walsummary on it. We expect block 0 to be modified, but block 1
# to be unmodified, so the output should say block 0, not block 0..1 or
# similar.
my ($stdout, $stderr) = run_command([ 'pg_walsummary', $filename ]);
like($stdout, qr/FORK main: block 0$/m, "stdout shows block 0 modified");
is($stderr, '', 'stderr is empty');
done_testing();

View File

@ -4039,3 +4039,5 @@ cb_tablespace_mapping
manifest_data
manifest_writer
rfile
ws_options
ws_file_info