Repair a bunch of problems in md.c. This builds on Hiroshi's

insight that RelationFlushRelation ought to invoke smgrclose, and that the
way to make that work is to ensure that mdclose doesn't fail if the relation
is already closed (or unlinked, if we are looking at a DROP TABLE).  While
I was testing that, I was able to identify several problems that we had
with multiple-segment relations.  The system is now able to do initdb and
pass the regression tests with a very small segment size (I had it set to
64Kb per segment for testing).  I don't believe that ever worked before.
File descriptor leaks seem to be gone too.
I have partially addressed the concerns we had about mdtruncate(), too.
On a Win32 or NFS filesystem it is not possible to unlink a file that
another backend is holding open, so what md.c now does is to truncate
unwanted files to zero length before trying to unlink them.  The other
backends will be forced to close their open files by relation cache
invalidation --- but I think it would take considerable work to make
that happen before vacuum truncates the relation rather than after.
Leaving zero-length files lying around seems a usable compromise.
This commit is contained in:
Tom Lane 1999-09-02 02:57:50 +00:00
parent 4478d299e1
commit 68c323483c
3 changed files with 155 additions and 113 deletions

View File

@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.51 1999/07/17 20:17:48 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.52 1999/09/02 02:57:49 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -32,6 +32,15 @@
* In order to do that, we break relations up into chunks of < 2GBytes
* and store one chunk in each of several files that represent the relation.
* See the BLCKSZ and RELSEG_SIZE configuration constants in include/config.h.
*
* The file descriptor stored in the relation cache (see RelationGetFile())
* is actually an index into the Md_fdvec array. -1 indicates not open.
*
* When a relation is broken into multiple chunks, only the first chunk
* has its own entry in the Md_fdvec array; the remaining chunks have
* palloc'd MdfdVec objects that are chained onto the first chunk via the
* mdfd_chain links. All chunks except the last MUST have size exactly
* equal to RELSEG_SIZE blocks --- see mdnblocks() and mdtruncate().
*/
typedef struct _MdfdVec
@ -45,18 +54,19 @@ typedef struct _MdfdVec
#endif
} MdfdVec;
static int Nfds = 100;
static int Nfds = 100; /* initial/current size of Md_fdvec array */
static MdfdVec *Md_fdvec = (MdfdVec *) NULL;
static int Md_Free = -1;
static int CurFd = 0;
static MemoryContext MdCxt;
static int Md_Free = -1; /* head of freelist of unused fdvec entries */
static int CurFd = 0; /* first never-used fdvec index */
static MemoryContext MdCxt; /* context for all my allocations */
#define MDFD_DIRTY (uint16) 0x01
#define MDFD_FREE (uint16) 0x02
/* routines declared here */
static int _mdfd_getrelnfd(Relation reln);
static MdfdVec *_mdfd_openseg(Relation reln, int segno, int oflags);
static MdfdVec *_mdfd_getseg(Relation reln, int blkno, int oflag);
static MdfdVec *_mdfd_getseg(Relation reln, int blkno);
static int _fdvec_alloc(void);
static void _fdvec_free(int);
static BlockNumber _mdnblocks(File file, Size blcksz);
@ -161,43 +171,39 @@ mdcreate(Relation reln)
int
mdunlink(Relation reln)
{
int nblocks;
int fd;
int i;
MdfdVec *v,
*ov;
MdfdVec *v;
MemoryContext oldcxt;
char fname[NAMEDATALEN];
char tname[NAMEDATALEN + 10]; /* leave room for overflow
* suffixes */
/*
* On Windows NT you can't unlink a file if it is open so we have * to
* do this.
* Force all segments of the relation to be opened, so that we
* won't miss deleting any of them.
*/
nblocks = mdnblocks(reln);
StrNCpy(fname, RelationGetRelationName(reln)->data, NAMEDATALEN);
if (FileNameUnlink(fname) < 0)
return SM_FAIL;
/* unlink all the overflow files for large relations */
for (i = 1;; i++)
{
sprintf(tname, "%s.%d", fname, i);
if (FileNameUnlink(tname) < 0)
break;
}
/* finally, clean out the mdfd vector */
/*
* Clean out the mdfd vector, letting fd.c unlink the physical files.
*
* NOTE: We truncate the file(s) before deleting 'em, because if other
* backends are holding the files open, the unlink will fail on some
* platforms (think Microsoft). Better a zero-size file gets left around
* than a big file. Those other backends will be forced to close the
* relation by cache invalidation, but that probably hasn't happened yet.
*/
fd = RelationGetFile(reln);
if (fd < 0) /* should not happen */
elog(ERROR, "mdunlink: mdnblocks didn't open relation");
Md_fdvec[fd].mdfd_flags = (uint16) 0;
oldcxt = MemoryContextSwitchTo(MdCxt);
#ifndef LET_OS_MANAGE_FILESIZE
for (v = &Md_fdvec[fd]; v != (MdfdVec *) NULL;)
{
MdfdVec *ov = v;
FileTruncate(v->mdfd_vfd, 0);
FileUnlink(v->mdfd_vfd);
ov = v;
v = v->mdfd_chain;
if (ov != &Md_fdvec[fd])
pfree(ov);
@ -205,13 +211,16 @@ mdunlink(Relation reln)
Md_fdvec[fd].mdfd_chain = (MdfdVec *) NULL;
#else
v = &Md_fdvec[fd];
if (v != (MdfdVec *) NULL)
FileUnlink(v->mdfd_vfd);
FileTruncate(v->mdfd_vfd, 0);
FileUnlink(v->mdfd_vfd);
#endif
MemoryContextSwitchTo(oldcxt);
_fdvec_free(fd);
/* be sure to mark relation closed */
reln->rd_fd = -1;
return SM_SUCCESS;
}
@ -229,7 +238,7 @@ mdextend(Relation reln, char *buffer)
MdfdVec *v;
nblocks = mdnblocks(reln);
v = _mdfd_getseg(reln, nblocks, O_CREAT);
v = _mdfd_getseg(reln, nblocks);
if ((pos = FileSeek(v->mdfd_vfd, 0L, SEEK_END)) < 0)
return SM_FAIL;
@ -303,7 +312,7 @@ mdopen(Relation reln)
}
/*
* mdclose() -- Close the specified relation
* mdclose() -- Close the specified relation, if it isn't closed already.
*
* AND FREE fd vector! It may be re-used for other relation!
* reln should be flushed from cache after closing !..
@ -314,16 +323,19 @@ int
mdclose(Relation reln)
{
int fd;
MdfdVec *v,
*ov;
MdfdVec *v;
MemoryContext oldcxt;
fd = RelationGetFile(reln);
if (fd < 0)
return SM_SUCCESS; /* already closed, so no work */
oldcxt = MemoryContextSwitchTo(MdCxt);
#ifndef LET_OS_MANAGE_FILESIZE
for (v = &Md_fdvec[fd]; v != (MdfdVec *) NULL;)
{
MdfdVec *ov = v;
/* if not closed already */
if (v->mdfd_vfd >= 0)
{
@ -340,7 +352,6 @@ mdclose(Relation reln)
v->mdfd_flags &= ~MDFD_DIRTY;
}
/* Now free vector */
ov = v;
v = v->mdfd_chain;
if (ov != &Md_fdvec[fd])
pfree(ov);
@ -371,6 +382,9 @@ mdclose(Relation reln)
_fdvec_free(fd);
/* be sure to mark relation closed */
reln->rd_fd = -1;
return SM_SUCCESS;
}
@ -387,7 +401,7 @@ mdread(Relation reln, BlockNumber blocknum, char *buffer)
int nbytes;
MdfdVec *v;
v = _mdfd_getseg(reln, blocknum, 0);
v = _mdfd_getseg(reln, blocknum);
#ifndef LET_OS_MANAGE_FILESIZE
seekpos = (long) (BLCKSZ * (blocknum % RELSEG_SIZE));
@ -427,7 +441,7 @@ mdwrite(Relation reln, BlockNumber blocknum, char *buffer)
long seekpos;
MdfdVec *v;
v = _mdfd_getseg(reln, blocknum, 0);
v = _mdfd_getseg(reln, blocknum);
#ifndef LET_OS_MANAGE_FILESIZE
seekpos = (long) (BLCKSZ * (blocknum % RELSEG_SIZE));
@ -464,7 +478,7 @@ mdflush(Relation reln, BlockNumber blocknum, char *buffer)
long seekpos;
MdfdVec *v;
v = _mdfd_getseg(reln, blocknum, 0);
v = _mdfd_getseg(reln, blocknum);
#ifndef LET_OS_MANAGE_FILESIZE
seekpos = (long) (BLCKSZ * (blocknum % RELSEG_SIZE));
@ -646,25 +660,27 @@ mdblindwrt(char *dbstr,
/*
* mdnblocks() -- Get the number of blocks stored in a relation.
*
* Returns # of blocks or -1 on error.
* Important side effect: all segments of the relation are opened
* and added to the mdfd_chain list. If this routine has not been
* called, then only segments up to the last one actually touched
* are present in the chain...
*
* Returns # of blocks, elog's on error.
*/
int
mdnblocks(Relation reln)
{
int fd;
MdfdVec *v;
#ifndef LET_OS_MANAGE_FILESIZE
int nblocks;
int segno;
#endif
fd = RelationGetFile(reln);
fd = _mdfd_getrelnfd(reln);
v = &Md_fdvec[fd];
#ifndef LET_OS_MANAGE_FILESIZE
#ifdef DIAGNOSTIC
if (_mdnblocks(v->mdfd_vfd, BLCKSZ) > RELSEG_SIZE)
elog(FATAL, "segment too big in getseg!");
#endif
segno = 0;
for (;;)
{
@ -702,54 +718,74 @@ mdnblocks(Relation reln)
int
mdtruncate(Relation reln, int nblocks)
{
int curnblk;
int fd;
MdfdVec *v;
#ifndef LET_OS_MANAGE_FILESIZE
int curnblk,
i,
oldsegno,
newsegno,
lastsegblocks;
MdfdVec **varray;
curnblk = mdnblocks(reln);
if (nblocks > curnblk)
return -1;
oldsegno = curnblk / RELSEG_SIZE;
newsegno = nblocks / RELSEG_SIZE;
MemoryContext oldcxt;
int priorblocks;
#endif
fd = RelationGetFile(reln);
/* NOTE: mdnblocks makes sure we have opened all existing segments,
* so that truncate/delete loop will get them all!
*/
curnblk = mdnblocks(reln);
if (nblocks < 0 || nblocks > curnblk)
return -1; /* bogus request */
if (nblocks == curnblk)
return nblocks; /* no work */
fd = _mdfd_getrelnfd(reln);
v = &Md_fdvec[fd];
#ifndef LET_OS_MANAGE_FILESIZE
varray = (MdfdVec **)palloc((oldsegno + 1) * sizeof(MdfdVec *));
for (i = 0; i <= oldsegno; i++)
oldcxt = MemoryContextSwitchTo(MdCxt);
priorblocks = 0;
while (v != (MdfdVec *) NULL)
{
if (!v)
elog(ERROR,"segment isn't open in mdtruncate!");
varray[i] = v;
v = v->mdfd_chain;
}
for (i = oldsegno; i > newsegno; i--)
{
v = varray[i];
if (FileTruncate(v->mdfd_vfd, 0) < 0)
MdfdVec *ov = v;
if (priorblocks > nblocks)
{
pfree(varray);
return -1;
/* This segment is no longer wanted at all (and has already been
* unlinked from the mdfd_chain).
* We truncate the file before deleting it because if other
* backends are holding the file open, the unlink will fail on
* some platforms. Better a zero-size file gets left around than
* a big file...
*/
FileTruncate(v->mdfd_vfd, 0);
FileUnlink(v->mdfd_vfd);
v = v->mdfd_chain;
Assert(ov != &Md_fdvec[fd]); /* we never drop the 1st segment */
pfree(ov);
}
v->mdfd_lstbcnt = 0;
else if (priorblocks + RELSEG_SIZE > nblocks)
{
/* This is the last segment we want to keep.
* Truncate the file to the right length, and clear chain link
* that points to any remaining segments (which we shall zap).
* NOTE: if nblocks is exactly a multiple K of RELSEG_SIZE,
* we will truncate the K+1st segment to 0 length but keep it.
* This is mainly so that the right thing happens if nblocks=0.
*/
int lastsegblocks = nblocks - priorblocks;
if (FileTruncate(v->mdfd_vfd, lastsegblocks * BLCKSZ) < 0)
return -1;
v->mdfd_lstbcnt = lastsegblocks;
v = v->mdfd_chain;
ov->mdfd_chain = (MdfdVec *) NULL;
}
else
{
/* We still need this segment and 0 or more blocks beyond it,
* so nothing to do here.
*/
v = v->mdfd_chain;
}
priorblocks += RELSEG_SIZE;
}
/* Calculate the # of blocks in the last segment */
lastsegblocks = nblocks - (newsegno * RELSEG_SIZE);
v = varray[i];
pfree(varray);
if (FileTruncate(v->mdfd_vfd, lastsegblocks * BLCKSZ) < 0)
return -1;
v->mdfd_lstbcnt = lastsegblocks;
MemoryContextSwitchTo(oldcxt);
#else
if (FileTruncate(v->mdfd_vfd, nblocks * BLCKSZ) < 0)
return -1;
@ -814,11 +850,11 @@ mdabort()
{
#ifndef LET_OS_MANAGE_FILESIZE
for (v = &Md_fdvec[i]; v != (MdfdVec *) NULL; v = v->mdfd_chain)
v->mdfd_flags &= ~MDFD_DIRTY;
#else
v = &Md_fdvec[i];
if (v != (MdfdVec *) NULL)
v->mdfd_flags &= ~MDFD_DIRTY;
#endif
v->mdfd_flags &= ~MDFD_DIRTY;
}
return SM_SUCCESS;
@ -895,6 +931,7 @@ _fdvec_free(int fdvec)
{
Assert(Md_Free < 0 || Md_fdvec[Md_Free].mdfd_flags == MDFD_FREE);
Assert(Md_fdvec[fdvec].mdfd_flags != MDFD_FREE);
Md_fdvec[fdvec].mdfd_nextFree = Md_Free;
Md_fdvec[fdvec].mdfd_flags = MDFD_FREE;
Md_Free = fdvec;
@ -926,9 +963,9 @@ _mdfd_openseg(Relation reln, int segno, int oflags)
/* open the file */
#ifndef __CYGWIN32__
fd = PathNameOpenFile(fullpath, O_RDWR | oflags, 0600);
fd = FileNameOpenFile(fullpath, O_RDWR | oflags, 0600);
#else
fd = PathNameOpenFile(fullpath, O_RDWR | O_BINARY | oflags, 0600);
fd = FileNameOpenFile(fullpath, O_RDWR | O_BINARY | oflags, 0600);
#endif
if (dofree)
@ -959,13 +996,12 @@ _mdfd_openseg(Relation reln, int segno, int oflags)
return v;
}
static MdfdVec *
_mdfd_getseg(Relation reln, int blkno, int oflag)
/* Get the fd for the relation, opening it if it's not already open */
static int
_mdfd_getrelnfd(Relation reln)
{
MdfdVec *v;
int segno;
int fd;
int i;
fd = RelationGetFile(reln);
if (fd < 0)
@ -975,6 +1011,20 @@ _mdfd_getseg(Relation reln, int blkno, int oflag)
RelationGetRelationName(reln));
reln->rd_fd = fd;
}
return fd;
}
/* Find the segment of the relation holding the specified block */
static MdfdVec *
_mdfd_getseg(Relation reln, int blkno)
{
MdfdVec *v;
int segno;
int fd;
int i;
fd = _mdfd_getrelnfd(reln);
#ifndef LET_OS_MANAGE_FILESIZE
for (v = &Md_fdvec[fd], segno = blkno / RELSEG_SIZE, i = 1;
@ -984,7 +1034,7 @@ _mdfd_getseg(Relation reln, int blkno, int oflag)
if (v->mdfd_chain == (MdfdVec *) NULL)
{
v->mdfd_chain = _mdfd_openseg(reln, i, oflag);
v->mdfd_chain = _mdfd_openseg(reln, i, O_CREAT);
if (v->mdfd_chain == (MdfdVec *) NULL)
elog(ERROR, "cannot open segment %d of relation %s",

View File

@ -10,7 +10,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.28 1999/07/17 20:17:49 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.29 1999/09/02 02:57:49 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -191,13 +191,12 @@ smgropen(int16 which, Relation reln)
/*
* smgrclose() -- Close a relation.
*
* NOTE: mdclose frees fd vector! It may be re-used for other relation!
* reln should be flushed from cache after closing !..
* Currently, smgrclose is calling by
* relcache.c:RelationPurgeLocalRelation() only.
* It would be nice to have smgrfree(), but because of
* smgrclose is called from single place... - vadim 05/22/97
*
* NOTE: underlying manager should allow case where relation is
* already closed. Indeed relation may have been unlinked!
* This is currently called only from RelationFlushRelation() when
* the relation cache entry is about to be dropped; could be doing
* simple relation cache clear, or finishing up DROP TABLE.
*
* Returns SM_SUCCESS on success, aborts on failure.
*/
int

View File

@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/cache/relcache.c,v 1.67 1999/07/17 20:18:02 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/utils/cache/relcache.c,v 1.68 1999/09/02 02:57:50 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -1256,9 +1256,13 @@ RelationFlushRelation(Relation *relationPtr,
if (!onlyFlushReferenceCountZero ||
RelationHasReferenceCountZero(relation))
{
oldcxt = MemoryContextSwitchTo((MemoryContext) CacheCxt);
/* make sure smgr and lower levels close the relation's files,
* if they weren't closed already
*/
smgrclose(DEFAULT_SMGR, relation);
RelationCacheDelete(relation);
FreeTupleDesc(relation->rd_att);
@ -1515,17 +1519,6 @@ RelationPurgeLocalRelation(bool xactCommitted)
else
smgrunlink(DEFAULT_SMGR, reln);
}
else if (!IsBootstrapProcessingMode() && !(reln->rd_isnoname))
/*
* RelationFlushRelation () below will flush relation
* information from the cache. We must call smgrclose to flush
* relation information from SMGR & FMGR, too. We assume that
* for temp relations smgrunlink is already called by
* heap_destroyr and we skip smgrclose for them. -
* vadim 05/22/97
*/
smgrclose(DEFAULT_SMGR, reln);
reln->rd_myxactonly = FALSE;