Fsync directory after creating or unlinking file.

If file was created/deleted just before powerloss it's possible that
file system will miss that. To prevent it, call fsync() where creating/
unlinkg file is critical.

Author: Michael Paquier
Reviewed-by: Ashutosh Bapat, Takayuki Tsunakawa, me
This commit is contained in:
Teodor Sigaev 2017-03-27 19:33:01 +03:00
parent 1f171a1803
commit 1b02be21f2
6 changed files with 78 additions and 13 deletions

View File

@ -577,6 +577,13 @@ ShutdownCLOG(void)
/* Flush dirty CLOG pages to disk */
TRACE_POSTGRESQL_CLOG_CHECKPOINT_START(false);
SimpleLruFlush(ClogCtl, false);
/*
* fsync pg_xact to ensure that any files flushed previously are durably
* on disk.
*/
fsync_fname("pg_xact", true);
TRACE_POSTGRESQL_CLOG_CHECKPOINT_DONE(false);
}
@ -589,6 +596,13 @@ CheckPointCLOG(void)
/* Flush dirty CLOG pages to disk */
TRACE_POSTGRESQL_CLOG_CHECKPOINT_START(true);
SimpleLruFlush(ClogCtl, true);
/*
* fsync pg_xact to ensure that any files flushed previously are durably
* on disk.
*/
fsync_fname("pg_xact", true);
TRACE_POSTGRESQL_CLOG_CHECKPOINT_DONE(true);
}

View File

@ -746,6 +746,12 @@ ShutdownCommitTs(void)
{
/* Flush dirty CommitTs pages to disk */
SimpleLruFlush(CommitTsCtl, false);
/*
* fsync pg_commit_ts to ensure that any files flushed previously are durably
* on disk.
*/
fsync_fname("pg_commit_ts", true);
}
/*
@ -756,6 +762,12 @@ CheckPointCommitTs(void)
{
/* Flush dirty CommitTs pages to disk */
SimpleLruFlush(CommitTsCtl, true);
/*
* fsync pg_commit_ts to ensure that any files flushed previously are durably
* on disk.
*/
fsync_fname("pg_commit_ts", true);
}
/*

View File

@ -1650,6 +1650,14 @@ CheckPointTwoPhase(XLogRecPtr redo_horizon)
}
LWLockRelease(TwoPhaseStateLock);
/*
* Flush unconditionally the parent directory to make any information
* durable on disk. Two-phase files could have been removed and those
* removals need to be made persistent as well as any files newly created
* previously since the last checkpoint.
*/
fsync_fname(TWOPHASE_DIR, true);
TRACE_POSTGRESQL_TWOPHASE_CHECKPOINT_DONE();
if (log_checkpoints && serialized_xacts > 0)

View File

@ -3475,7 +3475,7 @@ InstallXLogFileSegment(XLogSegNo *segno, char *tmppath,
if (!find_free)
{
/* Force installation: get rid of any pre-existing segment file */
unlink(path);
durable_unlink(path, DEBUG1);
}
else
{
@ -4026,16 +4026,13 @@ RemoveXlogFile(const char *segname, XLogRecPtr PriorRedoPtr, XLogRecPtr endptr)
path)));
return;
}
rc = unlink(newpath);
rc = durable_unlink(newpath, LOG);
#else
rc = unlink(path);
rc = durable_unlink(path, LOG);
#endif
if (rc != 0)
{
ereport(LOG,
(errcode_for_file_access(),
errmsg("could not remove old transaction log file \"%s\": %m",
path)));
/* Message already logged by durable_unlink() */
return;
}
CheckpointStats.ckpt_segs_removed++;
@ -10771,17 +10768,13 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
(errcode_for_file_access(),
errmsg("could not read file \"%s\": %m",
BACKUP_LABEL_FILE)));
if (unlink(BACKUP_LABEL_FILE) != 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not remove file \"%s\": %m",
BACKUP_LABEL_FILE)));
durable_unlink(BACKUP_LABEL_FILE, ERROR);
/*
* Remove tablespace_map file if present, it is created only if there
* are tablespaces.
*/
unlink(TABLESPACE_MAP);
durable_unlink(TABLESPACE_MAP, DEBUG1);
}
PG_END_ENSURE_ERROR_CLEANUP(pg_stop_backup_callback, (Datum) BoolGetDatum(exclusive));
}

View File

@ -657,6 +657,43 @@ durable_rename(const char *oldfile, const char *newfile, int elevel)
return 0;
}
/*
* durable_unlink -- remove a file in a durable manner
*
* This routine ensures that, after returning, the effect of removing file
* persists in case of a crash. A crash while this routine is running will
* leave the system in no mixed state.
*
* It does so by using fsync on the parent directory of the file after the
* actual removal is done.
*
* Log errors with the severity specified by caller.
*
* Returns 0 if the operation succeeded, -1 otherwise. Note that errno is not
* valid upon return.
*/
int
durable_unlink(const char *fname, int elevel)
{
if (unlink(fname) < 0)
{
ereport(elevel,
(errcode_for_file_access(),
errmsg("could not remove file \"%s\": %m",
fname)));
return -1;
}
/*
* To guarantee that the removal of the file is persistent, fsync
* its parent directory.
*/
if (fsync_parent_path(fname, elevel) != 0)
return -1;
return 0;
}
/*
* durable_link_or_rename -- rename a file in a durable manner.
*

View File

@ -119,6 +119,7 @@ extern int pg_fdatasync(int fd);
extern void pg_flush_data(int fd, off_t offset, off_t amount);
extern void fsync_fname(const char *fname, bool isdir);
extern int durable_rename(const char *oldfile, const char *newfile, int loglevel);
extern int durable_unlink(const char *fname, int loglevel);
extern int durable_link_or_rename(const char *oldfile, const char *newfile, int loglevel);
extern void SyncDataDirectory(void);