Allow using copy_file_range in write_reconstructed_file

This commit allows using copy_file_range() for efficient combining of
data from multiple files, instead of simply reading/writing the blocks.
Depending on the filesystem and other factors (size of the increment,
distribution of modified blocks etc.) this may be faster than the
block-by-block copy, but more importantly it enables various features
provided by CoW filesystems.

If a checksum needs to be calculated for the file, the same strategy as
when copying whole files is used - copy_file_range is used to copy the
blocks, but the file is also read for the checksum calculation.

While the checksum calculation is rarely needed when cloning whole
files, when reconstructing the files from multiple backups it needs to
happen almost always (the only exception is when the user specified
--no-manifest).

Author: Tomas Vondra
Reviewed-by: Thomas Munro, Jakub Wartak, Robert Haas
Discussion: https://postgr.es/m/3024283a-7491-4240-80d0-421575f6bb23%40enterprisedb.com
This commit is contained in:
Tomas Vondra 2024-04-05 19:01:16 +02:00
parent b8b37e41ba
commit ac81101551
1 changed files with 108 additions and 30 deletions

View File

@ -58,9 +58,14 @@ static void write_reconstructed_file(char *input_filename,
rfile **sourcemap,
off_t *offsetmap,
pg_checksum_context *checksum_ctx,
CopyMethod copy_method,
bool debug,
bool dry_run);
static void read_bytes(rfile *rf, void *buffer, unsigned length);
static void write_block(int wfd, char *output_filename,
uint8 *buffer,
pg_checksum_context *checksum_ctx);
static void read_block(rfile *s, off_t off, uint8 *buffer);
/*
* Reconstruct a full file from an incremental file and a chain of prior
@ -325,7 +330,8 @@ reconstruct_from_incremental_file(char *input_filename,
{
write_reconstructed_file(input_filename, output_filename,
block_length, sourcemap, offsetmap,
&checksum_ctx, debug, dry_run);
&checksum_ctx, copy_method,
debug, dry_run);
debug_reconstruction(n_prior_backups + 1, source, dry_run);
}
@ -535,6 +541,7 @@ write_reconstructed_file(char *input_filename,
rfile **sourcemap,
off_t *offsetmap,
pg_checksum_context *checksum_ctx,
CopyMethod copy_method,
bool debug,
bool dry_run)
{
@ -622,7 +629,6 @@ write_reconstructed_file(char *input_filename,
{
uint8 buffer[BLCKSZ];
rfile *s = sourcemap[i];
int wb;
/* Update accounting information. */
if (s == NULL)
@ -646,38 +652,61 @@ write_reconstructed_file(char *input_filename,
* uninitialized block, so just zero-fill it.
*/
memset(buffer, 0, BLCKSZ);
}
else
{
int rb;
/* Read the block from the correct source, except if dry-run. */
rb = pg_pread(s->fd, buffer, BLCKSZ, offsetmap[i]);
if (rb != BLCKSZ)
/* Write out the block, update the checksum if needed. */
write_block(wfd, output_filename, buffer, checksum_ctx);
/* Nothing else to do for zero-filled blocks. */
continue;
}
/* Copy the block using the appropriate copy method. */
if (copy_method != COPY_METHOD_COPY_FILE_RANGE)
{
/*
* Read the block from the correct source file, and then write it
* out, possibly with a checksum update.
*/
read_block(s, offsetmap[i], buffer);
write_block(wfd, output_filename, buffer, checksum_ctx);
}
else /* use copy_file_range */
{
/* copy_file_range modifies the offset, so use a local copy */
off_t off = offsetmap[i];
size_t nwritten = 0;
/*
* Retry until we've written all the bytes (the offset is updated
* by copy_file_range, and so is the wfd file offset).
*/
do
{
if (rb < 0)
pg_fatal("could not read file \"%s\": %m", s->filename);
else
pg_fatal("could not read file \"%s\": read only %d of %d bytes at offset %llu",
s->filename, rb, BLCKSZ,
(unsigned long long) offsetmap[i]);
}
}
int wb;
/* Write out the block. */
if ((wb = write(wfd, buffer, BLCKSZ)) != BLCKSZ)
{
if (wb < 0)
pg_fatal("could not write file \"%s\": %m", output_filename);
else
pg_fatal("could not write file \"%s\": wrote only %d of %d bytes",
output_filename, wb, BLCKSZ);
}
wb = copy_file_range(s->fd, &off, wfd, NULL, BLCKSZ - nwritten, 0);
/* Update the checksum computation. */
if (pg_checksum_update(checksum_ctx, buffer, BLCKSZ) < 0)
pg_fatal("could not update checksum of file \"%s\"",
output_filename);
if (wb < 0)
pg_fatal("error while copying file range from \"%s\" to \"%s\": %m",
input_filename, output_filename);
nwritten += wb;
} while (BLCKSZ > nwritten);
/*
* When checksum calculation not needed, we're done, otherwise
* read the block and pass it to the checksum calculation.
*/
if (checksum_ctx->type == CHECKSUM_TYPE_NONE)
continue;
read_block(s, offsetmap[i], buffer);
if (pg_checksum_update(checksum_ctx, buffer, BLCKSZ) < 0)
pg_fatal("could not update checksum of file \"%s\"",
output_filename);
}
}
/* Debugging output. */
@ -693,3 +722,52 @@ write_reconstructed_file(char *input_filename,
if (wfd >= 0 && close(wfd) != 0)
pg_fatal("could not close \"%s\": %m", output_filename);
}
/*
* Write the block into the file (using the file descriptor), and
* if needed update the checksum calculation.
*
* The buffer is expected to contain BLCKSZ bytes. The filename is
* provided only for the error message.
*/
static void
write_block(int fd, char *output_filename,
uint8 *buffer, pg_checksum_context *checksum_ctx)
{
int wb;
if ((wb = write(fd, buffer, BLCKSZ)) != BLCKSZ)
{
if (wb < 0)
pg_fatal("could not write file \"%s\": %m", output_filename);
else
pg_fatal("could not write file \"%s\": wrote only %d of %d bytes",
output_filename, wb, BLCKSZ);
}
/* Update the checksum computation. */
if (pg_checksum_update(checksum_ctx, buffer, BLCKSZ) < 0)
pg_fatal("could not update checksum of file \"%s\"",
output_filename);
}
/*
* Read a block of data (BLCKSZ bytes) into the the buffer.
*/
static void
read_block(rfile *s, off_t off, uint8 *buffer)
{
int rb;
/* Read the block from the correct source, except if dry-run. */
rb = pg_pread(s->fd, buffer, BLCKSZ, off);
if (rb != BLCKSZ)
{
if (rb < 0)
pg_fatal("could not read file \"%s\": %m", s->filename);
else
pg_fatal("could not read file \"%s\": read only %d of %d bytes at offset %llu",
s->filename, rb, BLCKSZ,
(unsigned long long) off);
}
}