Allow copying files using clone/copy_file_range

Adds --clone/--copy-file-range options to pg_combinebackup, to allow
copying files using file cloning or copy_file_range(). These methods may
be faster than the standard block-by-block copy, but the main advantage
is that they enable various features provided by CoW filesystems.

This commit only uses these copy methods for files that did not change
and can be copied as a whole from a single backup.

These new copy methods may not be available on all platforms, in which
case the command throws an error (immediately, even if no files would be
copied as a whole). This early failure seems better than failing later
when trying to copy the first file, after performing a lot of work on
earlier files.

If the requested copy method is available, but a checksum needs to be
recalculated (e.g. because of a different checksum type), the file is
still copied using the requested method, but it is also read for the
checksum calculation. Depending on the filesystem this may be more
expensive than just performing the simple copy, but it does enable the
CoW benefits.

Initial patch by Jakub Wartak, various reworks and improvements by me.

Author: Tomas Vondra, Jakub Wartak
Reviewed-by: Thomas Munro, Jakub Wartak, Robert Haas
Discussion: https://postgr.es/m/3024283a-7491-4240-80d0-421575f6bb23%40enterprisedb.com
This commit is contained in:
Tomas Vondra 2024-04-05 18:01:26 +02:00
parent 3c5ff36aba
commit f8ce4ed78c
7 changed files with 278 additions and 41 deletions

View File

@ -185,6 +185,51 @@ PostgreSQL documentation
</listitem>
</varlistentry>
<varlistentry>
<term><option>--clone</option></term>
<listitem>
<para>
Use efficient file cloning (also known as <quote>reflinks</quote> on
some systems) instead of copying files to the new data directory,
which can result in near-instantaneous copying of the data files.
</para>
<para>
If a backup manifest is not available or does not contain checksum of
the right type, file cloning will be used to copy the file, but the
file will be also read block-by-block for the checksum calculation.
</para>
<para>
File cloning is only supported on some operating systems and file
systems. If it is selected but not supported, the
<application>pg_combinebackup</application> run will error. At present,
it is supported on Linux (kernel 4.5 or later) with Btrfs and XFS (on
file systems created with reflink support), and on macOS with APFS.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>--copy-file-range</option></term>
<listitem>
<para>
Use the <function>copy_file_range</function> system call for efficient
copying. On some file systems this gives results similar to
<option>--clone</option>, sharing physical disk blocks, while on others
it may still copy blocks, but do so via an optimized path. At present,
it is supported on Linux and FreeBSD.
</para>
<para>
If a backup manifest is not available or does not contain checksum of
the right type, <function>copy_file_range</function> will be used to
copy the file, but the file will be also read block-by-block for the
checksum calculation.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>-V</option></term>
<term><option>--version</option></term>

View File

@ -14,6 +14,7 @@
#include <copyfile.h>
#endif
#include <fcntl.h>
#include <limits.h>
#include <sys/stat.h>
#include <unistd.h>
@ -24,8 +25,15 @@
static void copy_file_blocks(const char *src, const char *dst,
pg_checksum_context *checksum_ctx);
static void copy_file_clone(const char *src, const char *dst,
pg_checksum_context *checksum_ctx);
static void copy_file_by_range(const char *src, const char *dst,
pg_checksum_context *checksum_ctx);
#ifdef WIN32
static void copy_file_copyfile(const char *src, const char *dst);
static void copy_file_copyfile(const char *src, const char *dst,
pg_checksum_context *checksum_ctx);
#endif
/*
@ -35,8 +43,13 @@ static void copy_file_copyfile(const char *src, const char *dst);
*/
void
copy_file(const char *src, const char *dst,
pg_checksum_context *checksum_ctx, bool dry_run)
pg_checksum_context *checksum_ctx,
CopyMethod copy_method, bool dry_run)
{
char *strategy_name = NULL;
void (*strategy_implementation) (const char *, const char *,
pg_checksum_context *checksum_ctx) = NULL;
/*
* In dry-run mode, we don't actually copy anything, nor do we read any
* data from the source file, but we do verify that we can open it.
@ -51,61 +64,94 @@ copy_file(const char *src, const char *dst,
pg_fatal("could not close \"%s\": %m", src);
}
/*
* If we don't need to compute a checksum, then we can use any special
* operating system primitives that we know about to copy the file; this
* may be quicker than a naive block copy.
*/
if (checksum_ctx->type == CHECKSUM_TYPE_NONE)
{
char *strategy_name = NULL;
void (*strategy_implementation) (const char *, const char *) = NULL;
#ifdef WIN32
strategy_name = "CopyFile";
strategy_implementation = copy_file_copyfile;
copy_method = COPY_METHOD_COPYFILE;
#endif
if (strategy_name != NULL)
{
if (dry_run)
pg_log_debug("would copy \"%s\" to \"%s\" using strategy %s",
src, dst, strategy_name);
else
{
pg_log_debug("copying \"%s\" to \"%s\" using strategy %s",
src, dst, strategy_name);
(*strategy_implementation) (src, dst);
}
return;
}
/* Determine the name of the copy strategy for use in log messages. */
switch (copy_method)
{
case COPY_METHOD_CLONE:
strategy_name = "clone";
strategy_implementation = copy_file_clone;
break;
case COPY_METHOD_COPY:
/* leave NULL for simple block-by-block copy */
strategy_implementation = copy_file_blocks;
break;
case COPY_METHOD_COPY_FILE_RANGE:
strategy_name = "copy_file_range";
strategy_implementation = copy_file_by_range;
break;
#ifdef WIN32
case COPY_METHOD_COPYFILE:
strategy_name = "CopyFile";
strategy_implementation = copy_file_copyfile;
break;
#endif
}
/*
* Fall back to the simple approach of reading and writing all the blocks,
* feeding them into the checksum context as we go.
*/
if (dry_run)
{
if (checksum_ctx->type == CHECKSUM_TYPE_NONE)
if (strategy_name)
pg_log_debug("would copy \"%s\" to \"%s\" using strategy %s",
src, dst, strategy_name);
else
pg_log_debug("would copy \"%s\" to \"%s\"",
src, dst);
else
pg_log_debug("would copy \"%s\" to \"%s\" and checksum with %s",
src, dst, pg_checksum_type_name(checksum_ctx->type));
}
else
{
if (checksum_ctx->type == CHECKSUM_TYPE_NONE)
if (strategy_name)
pg_log_debug("copying \"%s\" to \"%s\" using strategy %s",
src, dst, strategy_name);
else if (checksum_ctx->type == CHECKSUM_TYPE_NONE)
pg_log_debug("copying \"%s\" to \"%s\"",
src, dst);
else
pg_log_debug("copying \"%s\" to \"%s\" and checksumming with %s",
src, dst, pg_checksum_type_name(checksum_ctx->type));
copy_file_blocks(src, dst, checksum_ctx);
strategy_implementation(src, dst, checksum_ctx);
}
}
/*
* Calculate checksum for the src file.
*/
static void
checksum_file(const char *src, pg_checksum_context *checksum_ctx)
{
int src_fd;
uint8 *buffer;
const int buffer_size = 50 * BLCKSZ;
ssize_t rb;
unsigned offset = 0;
/* bail out if no checksum needed */
if (checksum_ctx->type == CHECKSUM_TYPE_NONE)
return;
if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0)
pg_fatal("could not open file \"%s\": %m", src);
buffer = pg_malloc(buffer_size);
while ((rb = read(src_fd, buffer, buffer_size)) > 0)
{
if (pg_checksum_update(checksum_ctx, buffer, rb) < 0)
pg_fatal("could not update checksum of file \"%s\"", src);
offset += rb;
}
if (rb < 0)
pg_fatal("could not read file \"%s\": %m", src);
pg_free(buffer);
close(src_fd);
}
/*
* Copy a file block by block, and optionally compute a checksum as we go.
*/
@ -156,14 +202,98 @@ copy_file_blocks(const char *src, const char *dst,
close(dest_fd);
}
/*
* copy_file_clone
* Clones/reflinks a file from src to dest.
*
* If needed, also reads the file and calculates the checksum.
*/
static void
copy_file_clone(const char *src, const char *dest,
pg_checksum_context *checksum_ctx)
{
#if defined(HAVE_COPYFILE) && defined(COPYFILE_CLONE_FORCE)
if (copyfile(src, dest, NULL, COPYFILE_CLONE_FORCE) < 0)
pg_fatal("error while cloning file \"%s\" to \"%s\": %m", src, dest);
#elif defined(__linux__) && defined(FICLONE)
{
if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0)
pg_fatal("could not open file \"%s\": %m", src);
if ((dest_fd = open(dest, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
pg_file_create_mode)) < 0)
pg_fatal("could not create file \"%s\": %m", dest);
if (ioctl(dest_fd, FICLONE, src_fd) < 0)
{
int save_errno = errno;
unlink(dest);
pg_fatal("error while cloning file \"%s\" to \"%s\": %s",
src, dest);
}
}
#else
pg_fatal("file cloning not supported on this platform");
#endif
/* if needed, calculate checksum of the file */
checksum_file(src, checksum_ctx);
}
/*
* copy_file_by_range
* Copies a file from src to dest using copy_file_range system call.
*
* If needed, also reads the file and calculates the checksum.
*/
static void
copy_file_by_range(const char *src, const char *dest,
pg_checksum_context *checksum_ctx)
{
#if defined(HAVE_COPY_FILE_RANGE)
int src_fd;
int dest_fd;
ssize_t nbytes;
if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0)
pg_fatal("could not open file \"%s\": %m", src);
if ((dest_fd = open(dest, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
pg_file_create_mode)) < 0)
pg_fatal("could not create file \"%s\": %m", dest);
do
{
nbytes = copy_file_range(src_fd, NULL, dest_fd, NULL, SSIZE_MAX, 0);
if (nbytes < 0)
pg_fatal("error while copying file range from \"%s\" to \"%s\": %m",
src, dest);
} while (nbytes > 0);
close(src_fd);
close(dest_fd);
#else
pg_fatal("copy_file_range not supported on this platform");
#endif
/* if needed, calculate checksum of the file */
checksum_file(src, checksum_ctx);
}
#ifdef WIN32
static void
copy_file_copyfile(const char *src, const char *dst)
copy_file_copyfile(const char *src, const char *dst,
pg_checksum_context *checksum_ctx)
{
if (CopyFile(src, dst, true) == 0)
{
_dosmaperr(GetLastError());
pg_fatal("could not copy \"%s\" to \"%s\": %m", src, dst);
}
/* if needed, calculate checksum of the file */
checksum_file(src, checksum_ctx);
}
#endif /* WIN32 */

View File

@ -11,9 +11,25 @@
#ifndef COPY_FILE_H
#define COPY_FILE_H
#include "c.h"
#include "common/checksum_helper.h"
#include "common/file_utils.h"
/*
* Enumeration to denote copy modes.
*/
typedef enum CopyMethod
{
COPY_METHOD_CLONE,
COPY_METHOD_COPY,
COPY_METHOD_COPY_FILE_RANGE,
#ifdef WIN32
COPY_METHOD_COPYFILE,
#endif
} CopyMethod;
extern void copy_file(const char *src, const char *dst,
pg_checksum_context *checksum_ctx, bool dry_run);
pg_checksum_context *checksum_ctx,
CopyMethod copy_method, bool dry_run);
#endif /* COPY_FILE_H */

View File

@ -69,6 +69,7 @@ typedef struct cb_options
pg_checksum_type manifest_checksums;
bool no_manifest;
DataDirSyncMethod sync_method;
CopyMethod copy_method;
} cb_options;
/*
@ -129,6 +130,8 @@ main(int argc, char *argv[])
{"manifest-checksums", required_argument, NULL, 1},
{"no-manifest", no_argument, NULL, 2},
{"sync-method", required_argument, NULL, 3},
{"clone", no_argument, NULL, 4},
{"copy-file-range", no_argument, NULL, 5},
{NULL, 0, NULL, 0}
};
@ -156,6 +159,7 @@ main(int argc, char *argv[])
memset(&opt, 0, sizeof(opt));
opt.manifest_checksums = CHECKSUM_TYPE_CRC32C;
opt.sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
opt.copy_method = COPY_METHOD_COPY;
/* process command-line options */
while ((c = getopt_long(argc, argv, "dnNPo:T:",
@ -192,6 +196,12 @@ main(int argc, char *argv[])
if (!parse_sync_method(optarg, &opt.sync_method))
exit(1);
break;
case 4:
opt.copy_method = COPY_METHOD_CLONE;
break;
case 5:
opt.copy_method = COPY_METHOD_COPY_FILE_RANGE;
break;
default:
/* getopt_long already emitted a complaint */
pg_log_error_hint("Try \"%s --help\" for more information.", progname);
@ -213,6 +223,35 @@ main(int argc, char *argv[])
if (opt.no_manifest)
opt.manifest_checksums = CHECKSUM_TYPE_NONE;
/* Check that the platform supports the requested copy method. */
if (opt.copy_method == COPY_METHOD_CLONE)
{
#if (defined(HAVE_COPYFILE) && defined(COPYFILE_CLONE_FORCE)) || \
(defined(__linux__) && defined(FICLONE))
if (opt.dry_run)
pg_log_debug("would use cloning to copy files");
else
pg_log_debug("will use cloning to copy files");
#else
pg_fatal("file cloning not supported on this platform");
#endif
}
else if (opt.copy_method == COPY_METHOD_COPY_FILE_RANGE)
{
#if defined(HAVE_COPY_FILE_RANGE)
if (opt.dry_run)
pg_log_debug("would use copy_file_range to copy blocks");
else
pg_log_debug("will use copy_file_range to copy blocks");
#else
pg_fatal("copy_file_range not supported on this platform");
#endif
}
/* Read the server version from the final backup. */
version = read_pg_version_file(argv[argc - 1]);
@ -696,6 +735,8 @@ help(const char *progname)
" use algorithm for manifest checksums\n"));
printf(_(" --no-manifest suppress generation of backup manifest\n"));
printf(_(" --sync-method=METHOD set method for syncing files to disk\n"));
printf(_(" --clone clone (reflink) instead of copying files\n"));
printf(_(" --copy-file-range copy using copy_file_range() syscall\n"));
printf(_(" -?, --help show this help, then exit\n"));
printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
@ -936,6 +977,7 @@ process_directory_recursively(Oid tsoid,
checksum_type,
&checksum_length,
&checksum_payload,
opt->copy_method,
opt->debug,
opt->dry_run);
}
@ -993,7 +1035,8 @@ process_directory_recursively(Oid tsoid,
/* Actually copy the file. */
snprintf(ofullpath, MAXPGPATH, "%s/%s", ofulldir, de->d_name);
copy_file(ifullpath, ofullpath, &checksum_ctx, opt->dry_run);
copy_file(ifullpath, ofullpath, &checksum_ctx,
opt->copy_method, opt->dry_run);
/*
* If copy_file() performed a checksum calculation for us, then

View File

@ -89,6 +89,7 @@ reconstruct_from_incremental_file(char *input_filename,
pg_checksum_type checksum_type,
int *checksum_length,
uint8 **checksum_payload,
CopyMethod copy_method,
bool debug,
bool dry_run)
{
@ -319,7 +320,7 @@ reconstruct_from_incremental_file(char *input_filename,
*/
if (copy_source != NULL)
copy_file(copy_source->filename, output_filename,
&checksum_ctx, dry_run);
&checksum_ctx, copy_method, dry_run);
else
{
write_reconstructed_file(input_filename, output_filename,

View File

@ -27,6 +27,7 @@ extern void reconstruct_from_incremental_file(char *input_filename,
pg_checksum_type checksum_type,
int *checksum_length,
uint8 **checksum_payload,
CopyMethod copy_method,
bool debug,
bool dry_run);

View File

@ -480,6 +480,7 @@ CopyFromState
CopyFromStateData
CopyHeaderChoice
CopyInsertMethod
CopyMethod
CopyLogVerbosityChoice
CopyMultiInsertBuffer
CopyMultiInsertInfo