Implement TCP Keep-Alives across most Unix-like systems (#12782)

## TCP Keep-Alives

[TCP
Keep-Alives](https://datatracker.ietf.org/doc/html/rfc9293#name-tcp-keep-alives)
provides a way to detect whether a TCP connection is alive or dead,
which can be useful for reducing system resources by cleaning up dead
connections.

There is full support of TCP Keep-Alives on Linux and partial support on
macOS in `redis` at present.

This PR intends to complete the rest.

## Unix-like OS's support

`TCP_KEEPIDLE`, `TCP_KEEPINTVL`, and `TCP_KEEPCNT` are not included in
the POSIX standard for `setsockopts`, while these three socket options
are widely available on most Unix-like systems and Windows.

### References

- [AIX](https://www.ibm.com/support/pages/ibm-aix-tcp-keepalive-probes)
- [DragonflyBSD](https://man.dragonflybsd.org/?command=tcp&section=4)
- [FreeBSD](https://www.freebsd.org/cgi/man.cgi?query=tcp)
-
[HP-UX](https://docstore.mik.ua/manuals/hp-ux/en/B2355-60130/TCP.7P.html)
- [illumos](https://illumos.org/man/4P/tcp)
- [Linux](https://man7.org/linux/man-pages/man7/tcp.7.html)
- [NetBSD](https://man.netbsd.org/NetBSD-8.0/tcp.4)
-
[Windows](https://learn.microsoft.com/en-us/windows/win32/winsock/ipproto-tcp-socket-options)

### Mac OS

In earlier versions, macOS only supported setting `TCP_KEEPALIVE` (the
equivalent of `TCP_KEEPIDLE` on other platforms), but since macOS 10.8
it has supported `TCP_KEEPINTVL` and `TCP_KEEPCNT`.

Check out [this mailing
list](https://lists.apple.com/archives/macnetworkprog/2012/Jul/msg00005.html)
and [the source
code](https://github.com/apple/darwin-xnu/blob/main/bsd/netinet/tcp.h#L215-L230)
for more details.

### Solaris

Solaris claimed it supported the TCP-Alives mechanism, but
`TCP_KEEPIDLE`, `TCP_KEEPINTVL`, and `TCP_KEEPCNT` were not available on
Solaris until the latest version 11.4. Therefore, we need to simulate
the TCP-Alives mechanism on other platforms via
`TCP_KEEPALIVE_THRESHOLD` + `TCP_KEEPALIVE_ABORT_THRESHOLD`.

- [Solaris
11.3](https://docs.oracle.com/cd/E86824_01/html/E54777/tcp-7p.html)
- [Solaris
11.4](https://docs.oracle.com/cd/E88353_01/html/E37851/tcp-4p.html)

---------

Co-authored-by: Oran Agra <oran@redislabs.com>
This commit is contained in:
Andy Pan 2023-12-27 00:44:18 +08:00 committed by GitHub
parent 27a8e3b04e
commit 1aa633d61b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 117 additions and 32 deletions

View File

@ -130,57 +130,142 @@ int anetCloexec(int fd) {
return r;
}
/* Set TCP keep alive option to detect dead peers. The interval option
* is only used for Linux as we are using Linux-specific APIs to set
* the probe send time, interval, and count. */
/* Enable TCP keep-alive mechanism to detect dead peers,
* TCP_KEEPIDLE, TCP_KEEPINTVL and TCP_KEEPCNT will be set accordingly. */
int anetKeepAlive(char *err, int fd, int interval)
{
int val = 1;
if (setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &val, sizeof(val)) == -1)
int enabled = 1;
if (setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &enabled, sizeof(enabled)))
{
anetSetError(err, "setsockopt SO_KEEPALIVE: %s", strerror(errno));
return ANET_ERR;
}
#ifdef __linux__
/* Default settings are more or less garbage, with the keepalive time
* set to 7200 by default on Linux. Modify settings to make the feature
* actually useful. */
int idle;
int intvl;
int cnt;
/* Send first probe after interval. */
val = interval;
if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &val, sizeof(val)) < 0) {
anetSetError(err, "setsockopt TCP_KEEPIDLE: %s\n", strerror(errno));
return ANET_ERR;
}
/* There are platforms that are expected to support the full mechanism of TCP keep-alive,
* we want the compiler to emit warnings of unused variables if the preprocessor directives
* somehow fail, and other than those platforms, just omit these warnings if they happen.
*/
#if !(defined(_AIX) || defined(__APPLE__) || defined(__DragonFly__) || \
defined(__FreeBSD__) || defined(__illumos__) || defined(__linux__) || \
defined(__NetBSD__) || defined(__sun))
UNUSED(interval);
UNUSED(idle);
UNUSED(intvl);
UNUSED(cnt);
#endif
/* Send next probes after the specified interval. Note that we set the
* delay as interval / 3, as we send three probes before detecting
* an error (see the next setsockopt call). */
val = interval/3;
if (val == 0) val = 1;
if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPINTVL, &val, sizeof(val)) < 0) {
/* The implementation of TCP keep-alive on Solaris/SmartOS is a bit unusual
* compared to other Unix-like systems.
* Thus, we need to specialize it on Solaris. */
#ifdef __sun
/* There are two keep-alive mechanisms on Solaris:
* - By default, the first keep-alive probe is sent out after a TCP connection is idle for two hours.
* If the peer does not respond to the probe within eight minutes, the TCP connection is aborted.
* You can alter the interval for sending out the first probe using the socket option TCP_KEEPALIVE_THRESHOLD
* in milliseconds or TCP_KEEPIDLE in seconds.
* The system default is controlled by the TCP ndd parameter tcp_keepalive_interval. The minimum value is ten seconds.
* The maximum is ten days, while the default is two hours. If you receive no response to the probe,
* you can use the TCP_KEEPALIVE_ABORT_THRESHOLD socket option to change the time threshold for aborting a TCP connection.
* The option value is an unsigned integer in milliseconds. The value zero indicates that TCP should never time out and
* abort the connection when probing. The system default is controlled by the TCP ndd parameter tcp_keepalive_abort_interval.
* The default is eight minutes.
* - The second implementation is activated if socket option TCP_KEEPINTVL and/or TCP_KEEPCNT are set.
* The time between each consequent probes is set by TCP_KEEPINTVL in seconds.
* The minimum value is ten seconds. The maximum is ten days, while the default is two hours.
* The TCP connection will be aborted after certain amount of probes, which is set by TCP_KEEPCNT, without receiving response.
*/
idle = interval;
if (idle < 10) idle = 10; // kernel expects at least 10 seconds
if (idle > 10*24*60*60) idle = 10*24*60*60; // kernel expects at most 10 days
/* `TCP_KEEPIDLE`, `TCP_KEEPINTVL`, and `TCP_KEEPCNT` were not available on Solaris
* until version 11.4, but let's take a chance here. */
#if defined(TCP_KEEPIDLE) && defined(TCP_KEEPINTVL) && defined(TCP_KEEPCNT)
if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &idle, sizeof(idle))) {
anetSetError(err, "setsockopt TCP_KEEPIDLE: %s\n", strerror(errno));
return ANET_ERR;
}
intvl = idle/3;
if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPINTVL, &intvl, sizeof(intvl))) {
anetSetError(err, "setsockopt TCP_KEEPINTVL: %s\n", strerror(errno));
return ANET_ERR;
}
cnt = 3;
if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPCNT, &cnt, sizeof(cnt))) {
anetSetError(err, "setsockopt TCP_KEEPCNT: %s\n", strerror(errno));
return ANET_ERR;
}
return ANET_OK;
#endif
/* Fall back to the first implementation of tcp-alive mechanism for older Solaris,
* simulate the tcp-alive mechanism on other platforms via `TCP_KEEPALIVE_THRESHOLD` + `TCP_KEEPALIVE_ABORT_THRESHOLD`.
*/
idle *= 1000; // kernel expects milliseconds
if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPALIVE_THRESHOLD, &idle, sizeof(idle))) {
anetSetError(err, "setsockopt TCP_KEEPINTVL: %s\n", strerror(errno));
return ANET_ERR;
}
/* Consider the socket in error state after three we send three ACK
* probes without getting a reply. */
val = 3;
if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPCNT, &val, sizeof(val)) < 0) {
/* Note that the consequent probes will not be sent at equal intervals on Solaris,
* but will be sent using the exponential backoff algorithm. */
intvl = idle/3;
cnt = 3;
int time_to_abort = intvl * cnt;
if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPALIVE_ABORT_THRESHOLD, &time_to_abort, sizeof(time_to_abort))) {
anetSetError(err, "setsockopt TCP_KEEPCNT: %s\n", strerror(errno));
return ANET_ERR;
}
#elif defined(__APPLE__)
/* Set idle time with interval */
val = interval;
if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPALIVE, &val, sizeof(val)) < 0) {
return ANET_OK;
#endif
#ifdef TCP_KEEPIDLE
/* Default settings are more or less garbage, with the keepalive time
* set to 7200 by default on Linux and other Unix-like systems.
* Modify settings to make the feature actually useful. */
/* Send first probe after interval. */
idle = interval;
if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &idle, sizeof(idle))) {
anetSetError(err, "setsockopt TCP_KEEPIDLE: %s\n", strerror(errno));
return ANET_ERR;
}
#elif defined(TCP_KEEPALIVE)
/* Darwin/macOS uses TCP_KEEPALIVE in place of TCP_KEEPIDLE. */
idle = interval;
if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPALIVE, &idle, sizeof(idle))) {
anetSetError(err, "setsockopt TCP_KEEPALIVE: %s\n", strerror(errno));
return ANET_ERR;
}
#else
((void) interval); /* Avoid unused var warning for non Linux systems. */
#endif
#ifdef TCP_KEEPINTVL
/* Send next probes after the specified interval. Note that we set the
* delay as interval / 3, as we send three probes before detecting
* an error (see the next setsockopt call). */
intvl = interval/3;
if (intvl == 0) intvl = 1;
if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPINTVL, &intvl, sizeof(intvl))) {
anetSetError(err, "setsockopt TCP_KEEPINTVL: %s\n", strerror(errno));
return ANET_ERR;
}
#endif
#ifdef TCP_KEEPCNT
/* Consider the socket in error state after three we send three ACK
* probes without getting a reply. */
cnt = 3;
if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPCNT, &cnt, sizeof(cnt))) {
anetSetError(err, "setsockopt TCP_KEEPCNT: %s\n", strerror(errno));
return ANET_ERR;
}
#endif
return ANET_OK;