Introduce WaitEventSet API.

Commit ac1d794 ("Make idle backends exit if the postmaster dies.")
introduced a regression on, at least, large linux systems. Constantly
adding the same postmaster_alive_fds to the OSs internal datastructures
for implementing poll/select can cause significant contention; leading
to a performance regression of nearly 3x in one example.

This can be avoided by using e.g. linux' epoll, which avoids having to
add/remove file descriptors to the wait datastructures at a high rate.
Unfortunately the current latch interface makes it hard to allocate any
persistent per-backend resources.

Replace, with a backward compatibility layer, WaitLatchOrSocket with a
new WaitEventSet API. Users can allocate such a Set across multiple
calls, and add more than one file-descriptor to wait on. The latter has
been added because there's upcoming postgres features where that will be
helpful.

In addition to the previously existing poll(2), select(2),
WaitForMultipleObjects() implementations also provide an epoll_wait(2)
based implementation to address the aforementioned performance
problem. Epoll is only available on linux, but that is the most likely
OS for machines large enough (four sockets) to reproduce the problem.

To actually address the aforementioned regression, create and use a
long-lived WaitEventSet for FE/BE communication.  There are additional
places that would benefit from a long-lived set, but that's a task for
another day.

Thanks to Amit Kapila, who helped make the windows code I blindly wrote
actually work.

Reported-By: Dmitry Vasilyev Discussion:
CAB-SwXZh44_2ybvS5Z67p_CDz=XFn4hNAD=CnMEF+QqkXwFrGg@mail.gmail.com
20160114143931.GG10941@awork2.anarazel.de
This commit is contained in:
Andres Freund 2016-03-21 09:56:39 +01:00
parent 72e2d21c12
commit 98a64d0bd7
10 changed files with 1171 additions and 530 deletions

2
configure vendored
View File

@ -10193,7 +10193,7 @@ fi
## Header files
##
for ac_header in atomic.h crypt.h dld.h fp_class.h getopt.h ieeefp.h ifaddrs.h langinfo.h mbarrier.h poll.h pwd.h sys/ioctl.h sys/ipc.h sys/poll.h sys/pstat.h sys/resource.h sys/select.h sys/sem.h sys/shm.h sys/socket.h sys/sockio.h sys/tas.h sys/time.h sys/un.h termios.h ucred.h utime.h wchar.h wctype.h
for ac_header in atomic.h crypt.h dld.h fp_class.h getopt.h ieeefp.h ifaddrs.h langinfo.h mbarrier.h poll.h pwd.h sys/epoll.h sys/ioctl.h sys/ipc.h sys/poll.h sys/pstat.h sys/resource.h sys/select.h sys/sem.h sys/shm.h sys/socket.h sys/sockio.h sys/tas.h sys/time.h sys/un.h termios.h ucred.h utime.h wchar.h wctype.h
do :
as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default"

View File

@ -1183,7 +1183,7 @@ AC_SUBST(UUID_LIBS)
##
dnl sys/socket.h is required by AC_FUNC_ACCEPT_ARGTYPES
AC_CHECK_HEADERS([atomic.h crypt.h dld.h fp_class.h getopt.h ieeefp.h ifaddrs.h langinfo.h mbarrier.h poll.h pwd.h sys/ioctl.h sys/ipc.h sys/poll.h sys/pstat.h sys/resource.h sys/select.h sys/sem.h sys/shm.h sys/socket.h sys/sockio.h sys/tas.h sys/time.h sys/un.h termios.h ucred.h utime.h wchar.h wctype.h])
AC_CHECK_HEADERS([atomic.h crypt.h dld.h fp_class.h getopt.h ieeefp.h ifaddrs.h langinfo.h mbarrier.h poll.h pwd.h sys/epoll.h sys/ioctl.h sys/ipc.h sys/poll.h sys/pstat.h sys/resource.h sys/select.h sys/sem.h sys/shm.h sys/socket.h sys/sockio.h sys/tas.h sys/time.h sys/un.h termios.h ucred.h utime.h wchar.h wctype.h])
# On BSD, test for net/if.h will fail unless sys/socket.h
# is included first.

View File

@ -140,13 +140,13 @@ retry:
/* In blocking mode, wait until the socket is ready */
if (n < 0 && !port->noblock && (errno == EWOULDBLOCK || errno == EAGAIN))
{
int w;
WaitEvent event;
Assert(waitfor);
w = WaitLatchOrSocket(MyLatch,
WL_LATCH_SET | WL_POSTMASTER_DEATH | waitfor,
port->sock, 0);
ModifyWaitEvent(FeBeWaitSet, 0, waitfor, NULL);
WaitEventSetWait(FeBeWaitSet, -1 /* no timeout */, &event, 1);
/*
* If the postmaster has died, it's not safe to continue running,
@ -165,13 +165,13 @@ retry:
* cycles checking for this very rare condition, and this should cause
* us to exit quickly in most cases.)
*/
if (w & WL_POSTMASTER_DEATH)
if (event.events & WL_POSTMASTER_DEATH)
ereport(FATAL,
(errcode(ERRCODE_ADMIN_SHUTDOWN),
errmsg("terminating connection due to unexpected postmaster exit")));
/* Handle interrupt. */
if (w & WL_LATCH_SET)
if (event.events & WL_LATCH_SET)
{
ResetLatch(MyLatch);
ProcessClientReadInterrupt(true);
@ -241,22 +241,22 @@ retry:
if (n < 0 && !port->noblock && (errno == EWOULDBLOCK || errno == EAGAIN))
{
int w;
WaitEvent event;
Assert(waitfor);
w = WaitLatchOrSocket(MyLatch,
WL_LATCH_SET | WL_POSTMASTER_DEATH | waitfor,
port->sock, 0);
ModifyWaitEvent(FeBeWaitSet, 0, waitfor, NULL);
WaitEventSetWait(FeBeWaitSet, -1 /* no timeout */, &event, 1);
/* See comments in secure_read. */
if (w & WL_POSTMASTER_DEATH)
if (event.events & WL_POSTMASTER_DEATH)
ereport(FATAL,
(errcode(ERRCODE_ADMIN_SHUTDOWN),
errmsg("terminating connection due to unexpected postmaster exit")));
/* Handle interrupt. */
if (w & WL_LATCH_SET)
if (event.events & WL_LATCH_SET)
{
ResetLatch(MyLatch);
ProcessClientWriteInterrupt(true);

View File

@ -201,6 +201,11 @@ pq_init(void)
(errmsg("could not set socket to nonblocking mode: %m")));
#endif
FeBeWaitSet = CreateWaitEventSet(TopMemoryContext, 3);
AddWaitEventToSet(FeBeWaitSet, WL_SOCKET_WRITEABLE, MyProcPort->sock,
NULL, NULL);
AddWaitEventToSet(FeBeWaitSet, WL_LATCH_SET, -1, MyLatch, NULL);
AddWaitEventToSet(FeBeWaitSet, WL_POSTMASTER_DEATH, -1, NULL, NULL);
}
/* --------------------------------

File diff suppressed because it is too large Load Diff

View File

@ -33,6 +33,7 @@
#include "access/htup_details.h"
#include "catalog/pg_authid.h"
#include "libpq/libpq.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "postmaster/autovacuum.h"
@ -247,6 +248,9 @@ SwitchToSharedLatch(void)
MyLatch = &MyProc->procLatch;
if (FeBeWaitSet)
ModifyWaitEvent(FeBeWaitSet, 1, WL_LATCH_SET, MyLatch);
/*
* Set the shared latch as the local one might have been set. This
* shouldn't normally be necessary as code is supposed to check the
@ -262,6 +266,10 @@ SwitchBackToLocalLatch(void)
Assert(MyProc != NULL && MyLatch == &MyProc->procLatch);
MyLatch = &LocalLatchData;
if (FeBeWaitSet)
ModifyWaitEvent(FeBeWaitSet, 1, WL_LATCH_SET, MyLatch);
SetLatch(MyLatch);
}

View File

@ -19,6 +19,7 @@
#include "lib/stringinfo.h"
#include "libpq/libpq-be.h"
#include "storage/latch.h"
typedef struct
@ -95,6 +96,8 @@ extern ssize_t secure_raw_write(Port *port, const void *ptr, size_t len);
extern bool ssl_loaded_verify_locations;
WaitEventSet *FeBeWaitSet;
/* GUCs */
extern char *SSLCipherSuites;
extern char *SSLECDHCurve;

View File

@ -530,6 +530,9 @@
/* Define to 1 if you have the syslog interface. */
#undef HAVE_SYSLOG
/* Define to 1 if you have the <sys/epoll.h> header file. */
#undef HAVE_SYS_EPOLL_H
/* Define to 1 if you have the <sys/ioctl.h> header file. */
#undef HAVE_SYS_IOCTL_H

View File

@ -68,6 +68,12 @@
* use of any generic handler.
*
*
* WaitEventSets allow to wait for latches being set and additional events -
* postmaster dying and socket readiness of several sockets currently - at the
* same time. On many platforms using a long lived event set is more
* efficient than using WaitLatch or WaitLatchOrSocket.
*
*
* Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
@ -95,13 +101,27 @@ typedef struct Latch
#endif
} Latch;
/* Bitmasks for events that may wake-up WaitLatch() clients */
/*
* Bitmasks for events that may wake-up WaitLatch(), WaitLatchOrSocket(), or
* WaitEventSetWait().
*/
#define WL_LATCH_SET (1 << 0)
#define WL_SOCKET_READABLE (1 << 1)
#define WL_SOCKET_WRITEABLE (1 << 2)
#define WL_TIMEOUT (1 << 3)
#define WL_TIMEOUT (1 << 3) /* not for WaitEventSetWait() */
#define WL_POSTMASTER_DEATH (1 << 4)
typedef struct WaitEvent
{
int pos; /* position in the event data structure */
uint32 events; /* triggered events */
pgsocket fd; /* socket fd associated with event */
void *user_data; /* pointer provided in AddWaitEventToSet */
} WaitEvent;
/* forward declaration to avoid exposing latch.c implementation details */
typedef struct WaitEventSet WaitEventSet;
/*
* prototypes for functions in latch.c
*/
@ -110,12 +130,19 @@ extern void InitLatch(volatile Latch *latch);
extern void InitSharedLatch(volatile Latch *latch);
extern void OwnLatch(volatile Latch *latch);
extern void DisownLatch(volatile Latch *latch);
extern int WaitLatch(volatile Latch *latch, int wakeEvents, long timeout);
extern int WaitLatchOrSocket(volatile Latch *latch, int wakeEvents,
pgsocket sock, long timeout);
extern void SetLatch(volatile Latch *latch);
extern void ResetLatch(volatile Latch *latch);
extern WaitEventSet *CreateWaitEventSet(MemoryContext context, int nevents);
extern void FreeWaitEventSet(WaitEventSet *set);
extern int AddWaitEventToSet(WaitEventSet *set, uint32 events, pgsocket fd,
Latch *latch, void *user_data);
extern void ModifyWaitEvent(WaitEventSet *set, int pos, uint32 events, Latch *latch);
extern int WaitEventSetWait(WaitEventSet *set, long timeout, WaitEvent *occurred_events, int nevents);
extern int WaitLatch(volatile Latch *latch, int wakeEvents, long timeout);
extern int WaitLatchOrSocket(volatile Latch *latch, int wakeEvents,
pgsocket sock, long timeout);
/*
* Unix implementation uses SIGUSR1 for inter-process signaling.

View File

@ -2113,6 +2113,8 @@ WalSnd
WalSndCtlData
WalSndSendDataCallback
WalSndState
WaitEvent
WaitEventSet
WholeRowVarExprState
WindowAgg
WindowAggState