Prevent nodes with invalid IDs from being propagated through gossip (#12921)

There have been occasional instances of memory corruption (though code bugs or bit flips) leading to invalid node information being gossiped around. To prevent this invalid information spreading, we verify the node IDs in received gossip are in an acceptable format, and disregard any gossiped nodes with invalid IDs. This PR uses the existing verifyClusterNodeId function to check the validity of the gossiped node IDs and if an invalid one is encountered, logs raw byte information to help debug the corruption.

---------

Co-authored-by: Madelyn Olson <madelyneolson@gmail.com>
This commit is contained in:
Brennan 2024-01-22 11:25:43 -08:00 committed by GitHub
parent 8d0156eb18
commit e12f2decc1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 43 additions and 0 deletions

View File

@ -2043,6 +2043,41 @@ static void getClientPortFromGossip(clusterMsgDataGossip *g, int *tls_port, int
}
}
/* Returns a string with the byte representation of the node ID (i.e. nodename)
* along with 8 trailing bytes for debugging purposes. */
char *getCorruptedNodeIdByteString(clusterMsgDataGossip *gossip_msg) {
const int num_bytes = CLUSTER_NAMELEN + 8;
/* Allocate enough room for 4 chars per byte + null terminator */
char *byte_string = (char*) zmalloc((num_bytes*4) + 1);
const char *name_ptr = gossip_msg->nodename;
/* Ensure we won't print beyond the bounds of the message */
serverAssert(name_ptr + num_bytes <= (char*)gossip_msg + sizeof(clusterMsgDataGossip));
for (int i = 0; i < num_bytes; i++) {
snprintf(byte_string + 4*i, 5, "\\x%02hhX", name_ptr[i]);
}
return byte_string;
}
/* Returns the number of nodes in the gossip with invalid IDs. */
int verifyGossipSectionNodeIds(clusterMsgDataGossip *g, uint16_t count) {
int invalid_ids = 0;
for (int i = 0; i < count; i++) {
const char *nodename = g[i].nodename;
if (verifyClusterNodeId(nodename, CLUSTER_NAMELEN) != C_OK) {
invalid_ids++;
char *raw_node_id = getCorruptedNodeIdByteString(g);
serverLog(LL_WARNING,
"Received gossip about a node with invalid ID %.40s. For debugging purposes, "
"the 48 bytes including the invalid ID and 8 trailing bytes are: %s",
nodename, raw_node_id);
zfree(raw_node_id);
}
}
return invalid_ids;
}
/* Process the gossip section of PING or PONG packets.
* Note that this function assumes that the packet is already sanity-checked
* by the caller, not in the content of the gossip section, but in the
@ -2052,6 +2087,14 @@ void clusterProcessGossipSection(clusterMsg *hdr, clusterLink *link) {
clusterMsgDataGossip *g = (clusterMsgDataGossip*) hdr->data.ping.gossip;
clusterNode *sender = link->node ? link->node : clusterLookupNode(hdr->sender, CLUSTER_NAMELEN);
/* Abort if the gossip contains invalid node IDs to avoid adding incorrect information to
* the nodes dictionary. An invalid ID indicates memory corruption on the sender side. */
int invalid_ids = verifyGossipSectionNodeIds(g, count);
if (invalid_ids) {
serverLog(LL_WARNING, "Node %.40s (%s) gossiped %d nodes with invalid IDs.", sender->name, sender->human_nodename, invalid_ids);
return;
}
while(count--) {
uint16_t flags = ntohs(g->flags);
clusterNode *node;