From 2acbd39d4d59fb35712795503f9881a75141a83a Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Thu, 13 Jun 2024 01:27:31 +0000 Subject: [PATCH 01/45] Adds `clusterMsgPubsub` light header Signed-off-by: Roshan Khatri --- src/cluster_legacy.c | 244 +++++++++++++++++++++++++++++++++---------- src/cluster_legacy.h | 19 ++++ 2 files changed, 210 insertions(+), 53 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 0de6351e90..32e3c6dd36 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -111,6 +111,7 @@ int auxTlsPortSetter(clusterNode *n, void *value, int length); sds auxTlsPortGetter(clusterNode *n, sds s); int auxTlsPortPresent(clusterNode *n); static void clusterBuildMessageHdr(clusterMsg *hdr, int type, size_t msglen); +static void clusterBuildPubsubMessageHdr(clusterMsgPubsub *hdr, int type, size_t msglen); void freeClusterLink(clusterLink *link); int verifyClusterNodeId(const char *name, int length); @@ -134,6 +135,7 @@ static inline int defaultClientPort(void) { (server.cluster->slots[slot] == NULL || bitmapTestBit(server.cluster->owner_not_claiming_slot, slot)) #define RCVBUF_INIT_LEN 1024 +#define RCVBUF_MIN_READ_LEN 16 #define RCVBUF_MAX_PREALLOC (1 << 20) /* 1MB */ /* Fixed timeout value for cluster operations (milliseconds) */ @@ -313,6 +315,12 @@ typedef struct { clusterMsg msg; } clusterMsgSendBlock; +typedef struct { + size_t totlen; /* Total length of this block including the message */ + int refcount; /* Number of cluster link send msg queues containing the message */ + clusterMsgPubsub msg; +} clusterMsgSendBlockPubsub; + /* ----------------------------------------------------------------------------- * Initialization * -------------------------------------------------------------------------- */ @@ -1142,6 +1150,16 @@ static clusterMsgSendBlock *createClusterMsgSendBlock(int type, uint32_t msglen) return msgblock; } +static clusterMsgSendBlockPubsub *createPubsubClusterMsgSendBlock(int type, uint32_t msglen) { + uint32_t blocklen = msglen + sizeof(clusterMsgSendBlockPubsub) - sizeof(clusterMsgPubsub); + clusterMsgSendBlockPubsub *msgblock = zcalloc(blocklen); + msgblock->refcount = 1; + msgblock->totlen = blocklen; + server.stat_cluster_links_memory += blocklen; + clusterBuildPubsubMessageHdr(&msgblock->msg, type, msglen); + return msgblock; +} + static void clusterMsgSendBlockDecrRefCount(void *node) { clusterMsgSendBlock *msgblock = (clusterMsgSendBlock *)node; msgblock->refcount--; @@ -1152,6 +1170,16 @@ static void clusterMsgSendBlockDecrRefCount(void *node) { } } +static void clusterMsgSendBlockDecrRefCountPublish(void *node) { + clusterMsgSendBlockPubsub *msgblock = (clusterMsgSendBlockPubsub *)node; + msgblock->refcount--; + serverAssert(msgblock->refcount >= 0); + if (msgblock->refcount == 0) { + server.stat_cluster_links_memory -= msgblock->totlen; + zfree(msgblock); + } +} + clusterLink *createClusterLink(clusterNode *node) { clusterLink *link = zmalloc(sizeof(*link)); link->ctime = mstime(); @@ -2807,9 +2835,10 @@ int clusterIsValidPacket(clusterLink *link) { explen = sizeof(clusterMsg) - sizeof(union clusterMsgData); explen += sizeof(clusterMsgDataFail); } else if (type == CLUSTERMSG_TYPE_PUBLISH || type == CLUSTERMSG_TYPE_PUBLISHSHARD) { - explen = sizeof(clusterMsg) - sizeof(union clusterMsgData); - explen += sizeof(clusterMsgDataPublish) - 8 + ntohl(hdr->data.publish.msg.channel_len) + - ntohl(hdr->data.publish.msg.message_len); + clusterMsgPubsub *hdr_pubsub = (clusterMsgPubsub *)link->rcvbuf; + explen = sizeof(clusterMsgPubsub) - sizeof(union clusterMsgData); + explen += sizeof(clusterMsgDataPublish) - 8 + ntohl(hdr_pubsub->data.publish.msg.channel_len) + + ntohl(hdr_pubsub->data.publish.msg.message_len); } else if (type == CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST || type == CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK || type == CLUSTERMSG_TYPE_MFSTART) { explen = sizeof(clusterMsg) - sizeof(union clusterMsgData); @@ -2849,20 +2878,44 @@ int clusterProcessPacket(clusterLink *link) { clusterMsg *hdr = (clusterMsg *)link->rcvbuf; uint16_t type = ntohs(hdr->type); mstime_t now = mstime(); + clusterNode *sender; - uint16_t flags = ntohs(hdr->flags); - uint64_t senderCurrentEpoch = 0, senderConfigEpoch = 0; - clusterNode *sender = getNodeFromLinkAndMsg(link, hdr); + if (type == CLUSTERMSG_TYPE_PUBLISH || type == CLUSTERMSG_TYPE_PUBLISHSHARD) { + clusterMsgPubsub *hdr_pubsub = (clusterMsgPubsub *)link->rcvbuf; + robj *channel, *message; + serverAssert(link->node && !nodeInHandshake(link->node)); + sender = link->node; + sender->data_received = now; + uint32_t channel_len, message_len; - if (sender && (hdr->mflags[0] & CLUSTERMSG_FLAG0_EXT_DATA)) { - sender->flags |= CLUSTER_NODE_EXTENSIONS_SUPPORTED; + /* Don't bother creating useless objects if there are no + * Pub/Sub subscribers. */ + if ((type == CLUSTERMSG_TYPE_PUBLISH && serverPubsubSubscriptionCount() > 0) || + (type == CLUSTERMSG_TYPE_PUBLISHSHARD && serverPubsubShardSubscriptionCount() > 0)) { + channel_len = ntohl(hdr_pubsub->data.publish.msg.channel_len); + message_len = ntohl(hdr_pubsub->data.publish.msg.message_len); + channel = createStringObject((char *)hdr_pubsub->data.publish.msg.bulk_data, channel_len); + message = createStringObject((char *)hdr_pubsub->data.publish.msg.bulk_data + channel_len, message_len); + pubsubPublishMessage(channel, message, type == CLUSTERMSG_TYPE_PUBLISHSHARD); + decrRefCount(channel); + decrRefCount(message); + } + return 1; } + sender = getNodeFromLinkAndMsg(link, hdr); + /* Update the last time we saw any data from this node. We * use this in order to avoid detecting a timeout from a node that * is just sending a lot of data in the cluster bus, for instance * because of Pub/Sub. */ if (sender) sender->data_received = now; + uint16_t flags = ntohs(hdr->flags); + uint64_t senderCurrentEpoch = 0, senderConfigEpoch = 0; + + if (sender && (hdr->mflags[0] & CLUSTERMSG_FLAG0_EXT_DATA)) { + sender->flags |= CLUSTER_NODE_EXTENSIONS_SUPPORTED; + } if (sender && !nodeInHandshake(sender)) { /* Update our currentEpoch if we see a newer epoch in the cluster. */ @@ -3239,24 +3292,6 @@ int clusterProcessPacket(clusterLink *link) { serverLog(LL_NOTICE, "Ignoring FAIL message from unknown node %.40s about %.40s", hdr->sender, hdr->data.fail.about.nodename); } - } else if (type == CLUSTERMSG_TYPE_PUBLISH || type == CLUSTERMSG_TYPE_PUBLISHSHARD) { - if (!sender) return 1; /* We don't know that node. */ - - robj *channel, *message; - uint32_t channel_len, message_len; - - /* Don't bother creating useless objects if there are no - * Pub/Sub subscribers. */ - if ((type == CLUSTERMSG_TYPE_PUBLISH && serverPubsubSubscriptionCount() > 0) || - (type == CLUSTERMSG_TYPE_PUBLISHSHARD && serverPubsubShardSubscriptionCount() > 0)) { - channel_len = ntohl(hdr->data.publish.msg.channel_len); - message_len = ntohl(hdr->data.publish.msg.message_len); - channel = createStringObject((char *)hdr->data.publish.msg.bulk_data, channel_len); - message = createStringObject((char *)hdr->data.publish.msg.bulk_data + channel_len, message_len); - pubsubPublishMessage(channel, message, type == CLUSTERMSG_TYPE_PUBLISHSHARD); - decrRefCount(channel); - decrRefCount(message); - } } else if (type == CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST) { if (!sender) return 1; /* We don't know that node. */ clusterSendFailoverAuthIfNeeded(sender, hdr); @@ -3375,6 +3410,47 @@ void clusterWriteHandler(connection *conn) { if (listLength(link->send_msg_queue) == 0) connSetWriteHandler(link->conn, NULL); } +/* Send the pubsub messages queued for the link. */ +void clusterPubsubWriteHandler(connection *conn) { + clusterLink *link = connGetPrivateData(conn); + ssize_t nwritten; + size_t totwritten = 0; + + while (totwritten < NET_MAX_WRITES_PER_EVENT && listLength(link->send_msg_queue) > 0) { + listNode *head = listFirst(link->send_msg_queue); + clusterMsgSendBlockPubsub *msgblock = (clusterMsgSendBlockPubsub *)head->value; + clusterMsgPubsub *msg = &msgblock->msg; + size_t msg_offset = link->head_msg_send_offset; + size_t msg_len = ntohl(msg->totlen); + + nwritten = connWrite(conn, (char *)msg + msg_offset, msg_len - msg_offset); + if (nwritten <= 0) { + serverLog(LL_DEBUG, "I/O error writing to node link: %s", + (nwritten == -1) ? connGetLastError(conn) : "short write"); + handleLinkIOError(link); + return; + } + if (msg_offset + nwritten < msg_len) { + /* If full message wasn't written, record the offset + * and continue sending from this point next time */ + link->head_msg_send_offset += nwritten; + return; + } + serverAssert((msg_offset + nwritten) == msg_len); + link->head_msg_send_offset = 0; + + /* Delete the node and update our memory tracking */ + uint32_t blocklen = msgblock->totlen; + listDelNode(link->send_msg_queue, head); + server.stat_cluster_links_memory -= sizeof(listNode); + link->send_msg_queue_mem -= sizeof(listNode) + blocklen; + + totwritten += nwritten; + } + + if (listLength(link->send_msg_queue) == 0) connSetWriteHandler(link->conn, NULL); +} + /* A connect handler that gets called when a connection to another node * gets established. */ @@ -3429,31 +3505,40 @@ void clusterReadHandler(connection *conn) { while (1) { /* Read as long as there is data to read. */ rcvbuflen = link->rcvbuf_len; - if (rcvbuflen < 8) { - /* First, obtain the first 8 bytes to get the full message - * length. */ - readlen = 8 - rcvbuflen; + if (rcvbuflen < RCVBUF_MIN_READ_LEN) { + /* First, obtain the first 16 bytes to get the full message + * length and type. */ + readlen = RCVBUF_MIN_READ_LEN - rcvbuflen; } else { /* Finally read the full message. */ hdr = (clusterMsg *)link->rcvbuf; - if (rcvbuflen == 8) { + uint16_t type = ntohs(hdr->type); + serverLog(LL_WARNING, "TYPE:%hu",type); + if (rcvbuflen == RCVBUF_MIN_READ_LEN) { + int is_msg_valid = 0; /* Perform some sanity check on the message signature * and length. */ if (memcmp(hdr->sig, "RCmb", 4) != 0 || ntohl(hdr->totlen) < CLUSTERMSG_MIN_LEN) { - char ip[NET_IP_STR_LEN]; - int port; - if (connAddrPeerName(conn, ip, sizeof(ip), &port) == -1) { - serverLog(LL_WARNING, "Bad message length or signature received " - "on the Cluster bus."); - } else { - serverLog(LL_WARNING, - "Bad message length or signature received " - "on the Cluster bus from %s:%d", - ip, port); + /* The minimum length for PUBLISH and PUBLISHSHARD will be CLUSTERMSGPUBSUB_MIN_LEN + * as we are using the `clusterMsgPubsub` hdr. */ + if ((type == CLUSTERMSG_TYPE_PUBLISH || type == CLUSTERMSG_TYPE_PUBLISHSHARD) && (ntohl(hdr->totlen) >= CLUSTERMSGPUBSUB_MIN_LEN)) is_msg_valid = 1; + if (!is_msg_valid) { + char ip[NET_IP_STR_LEN]; + int port; + if (connAddrPeerName(conn, ip, sizeof(ip), &port) == -1) { + serverLog(LL_WARNING, "Bad message length or signature received " + "on the Cluster bus."); + } else { + serverLog(LL_WARNING, + "Bad message length or signature received " + "on the Cluster bus from %s:%d", + ip, port); + } + handleLinkIOError(link); + return; } - handleLinkIOError(link); - return; } + is_msg_valid = 0; } readlen = ntohl(hdr->totlen) - rcvbuflen; if (readlen > sizeof(buf)) readlen = sizeof(buf); @@ -3486,7 +3571,7 @@ void clusterReadHandler(connection *conn) { } /* Total length obtained? Process this packet. */ - if (rcvbuflen >= 8 && rcvbuflen == ntohl(hdr->totlen)) { + if (rcvbuflen >= RCVBUF_MIN_READ_LEN && rcvbuflen == ntohl(hdr->totlen)) { if (clusterProcessPacket(link)) { if (link->rcvbuf_alloc > RCVBUF_INIT_LEN) { size_t prev_rcvbuf_alloc = link->rcvbuf_alloc; @@ -3526,6 +3611,25 @@ void clusterSendMessage(clusterLink *link, clusterMsgSendBlock *msgblock) { if (type < CLUSTERMSG_TYPE_COUNT) server.cluster->stats_bus_messages_sent[type]++; } +void clusterSendPublishMessage(clusterLink *link, clusterMsgSendBlockPubsub *msgblock) { + if (!link) { + return; + } + if (listLength(link->send_msg_queue) == 0 && msgblock->msg.totlen != 0) + connSetWriteHandlerWithBarrier(link->conn, clusterWriteHandler, 1); + + listAddNodeTail(link->send_msg_queue, msgblock); + msgblock->refcount++; + + /* Update memory tracking */ + link->send_msg_queue_mem += sizeof(listNode) + msgblock->totlen; + server.stat_cluster_links_memory += sizeof(listNode); + + /* Populate sent messages stats. */ + uint16_t type = ntohs(msgblock->msg.type); + if (type < CLUSTERMSG_TYPE_COUNT) server.cluster->stats_bus_messages_sent[type]++; +} + /* Send a message to all the nodes that are part of the cluster having * a connected link. * @@ -3546,6 +3650,20 @@ void clusterBroadcastMessage(clusterMsgSendBlock *msgblock) { dictReleaseIterator(di); } +void clusterBroadcastPublishMessage(clusterMsgSendBlockPubsub *msgblock) { + dictIterator *di; + dictEntry *de; + + di = dictGetSafeIterator(server.cluster->nodes); + while ((de = dictNext(di)) != NULL) { + clusterNode *node = dictGetVal(de); + + if (node->flags & (CLUSTER_NODE_MYSELF | CLUSTER_NODE_HANDSHAKE)) continue; + clusterSendPublishMessage(node->link, msgblock); + } + dictReleaseIterator(di); +} + /* Build the message header. hdr must point to a buffer at least * sizeof(clusterMsg) in bytes. */ static void clusterBuildMessageHdr(clusterMsg *hdr, int type, size_t msglen) { @@ -3609,6 +3727,26 @@ static void clusterBuildMessageHdr(clusterMsg *hdr, int type, size_t msglen) { hdr->totlen = htonl(msglen); } +static void clusterBuildPubsubMessageHdr(clusterMsgPubsub *hdr, int type, size_t msglen) { + + hdr->ver = htons(CLUSTER_PROTO_VER); + hdr->sig[0] = 'R'; + hdr->sig[1] = 'C'; + hdr->sig[2] = 'm'; + hdr->sig[3] = 'b'; + hdr->type = htons(type); + + /* Handle cluster-announce-[tls-|bus-]port. */ + int announced_tcp_port, announced_tls_port, announced_cport; + deriveAnnouncedPorts(&announced_tcp_port, &announced_tls_port, &announced_cport); + if (server.tls_cluster) { + hdr->port = htons(announced_tls_port); + } else { + hdr->port = htons(announced_tcp_port); + } + hdr->totlen = htonl(msglen); +} + /* Set the i-th entry of the gossip section in the message pointed by 'hdr' * to the info of the specified node 'n'. */ void clusterSetGossipEntry(clusterMsg *hdr, int i, clusterNode *n) { @@ -3813,7 +3951,7 @@ void clusterBroadcastPong(int target) { * the 'bulk_data', sanitizer generates an out-of-bounds error which is a false * positive in this context. */ VALKEY_NO_SANITIZE("bounds") -clusterMsgSendBlock *clusterCreatePublishMsgBlock(robj *channel, robj *message, uint16_t type) { +clusterMsgSendBlockPubsub *clusterCreatePublishMsgBlock(robj *channel, robj *message, uint16_t type) { uint32_t channel_len, message_len; channel = getDecodedObject(channel); @@ -3821,11 +3959,11 @@ clusterMsgSendBlock *clusterCreatePublishMsgBlock(robj *channel, robj *message, channel_len = sdslen(channel->ptr); message_len = sdslen(message->ptr); - size_t msglen = sizeof(clusterMsg) - sizeof(union clusterMsgData); + size_t msglen = sizeof(clusterMsgPubsub) - sizeof(union clusterMsgData); msglen += sizeof(clusterMsgDataPublish) - 8 + channel_len + message_len; - clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(type, msglen); + clusterMsgSendBlockPubsub *msgblock = createPubsubClusterMsgSendBlock(type, msglen); - clusterMsg *hdr = &msgblock->msg; + clusterMsgPubsub *hdr = &msgblock->msg; hdr->data.publish.msg.channel_len = htonl(channel_len); hdr->data.publish.msg.message_len = htonl(message_len); memcpy(hdr->data.publish.msg.bulk_data, channel->ptr, sdslen(channel->ptr)); @@ -3931,12 +4069,12 @@ int clusterSendModuleMessageToTarget(const char *target, * Publish this message across the slot (primary/replica). * -------------------------------------------------------------------------- */ void clusterPropagatePublish(robj *channel, robj *message, int sharded) { - clusterMsgSendBlock *msgblock; + clusterMsgSendBlockPubsub *msgblock; if (!sharded) { msgblock = clusterCreatePublishMsgBlock(channel, message, CLUSTERMSG_TYPE_PUBLISH); - clusterBroadcastMessage(msgblock); - clusterMsgSendBlockDecrRefCount(msgblock); + clusterBroadcastPublishMessage(msgblock); + clusterMsgSendBlockDecrRefCountPublish(msgblock); return; } @@ -3949,9 +4087,9 @@ void clusterPropagatePublish(robj *channel, robj *message, int sharded) { while ((ln = listNext(&li))) { clusterNode *node = listNodeValue(ln); if (node->flags & (CLUSTER_NODE_MYSELF | CLUSTER_NODE_HANDSHAKE)) continue; - clusterSendMessage(node->link, msgblock); + clusterSendPublishMessage(node->link, msgblock); } - clusterMsgSendBlockDecrRefCount(msgblock); + clusterMsgSendBlockDecrRefCountPublish(msgblock); } /* ----------------------------------------------------------------------------- diff --git a/src/cluster_legacy.h b/src/cluster_legacy.h index cc02f30a8b..b304de580c 100644 --- a/src/cluster_legacy.h +++ b/src/cluster_legacy.h @@ -277,6 +277,25 @@ static_assert(offsetof(clusterMsg, data) == 2256, "unexpected field offset"); master is up. */ #define CLUSTERMSG_FLAG0_EXT_DATA (1 << 2) /* Message contains extension data */ +typedef struct { + char sig[4]; /* Signature "RCmb" (Cluster message bus). */ + uint32_t totlen; /* Total length of this message */ + uint16_t ver; /* Protocol version, currently set to CLUSTER_PROTO_VER. */ + uint16_t port; /* Primary port number (TCP or TLS). */ + uint16_t type; /* Message type */ + union clusterMsgData data; +} clusterMsgPubsub; + + +static_assert(offsetof(clusterMsgPubsub, sig) == 0, "unexpected field offset"); +static_assert(offsetof(clusterMsgPubsub, totlen) == 4, "unexpected field offset"); +static_assert(offsetof(clusterMsgPubsub, ver) == 8, "unexpected field offset"); +static_assert(offsetof(clusterMsgPubsub, port) == 10, "unexpected field offset"); +static_assert(offsetof(clusterMsgPubsub, type) == 12, "unexpected field offset"); +static_assert(offsetof(clusterMsgPubsub, data) == 16, "unexpected field offset"); + +#define CLUSTERMSGPUBSUB_MIN_LEN (sizeof(clusterMsgPubsub) - sizeof(union clusterMsgData)) + struct _clusterNode { mstime_t ctime; /* Node object creation time. */ char name[CLUSTER_NAMELEN]; /* Node name, hex string, sha1-size */ From d265e2875603efc8ee228cde5d22146ef3e54af0 Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Fri, 14 Jun 2024 20:47:51 +0000 Subject: [PATCH 02/45] Adds compatibility with older versions and cleanup Signed-off-by: Roshan Khatri --- src/cluster_legacy.c | 145 ++++++++++++++++++++++++++++++------------- src/cluster_legacy.h | 2 + 2 files changed, 104 insertions(+), 43 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 32e3c6dd36..1625d4c1c1 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -842,6 +842,7 @@ void clusterUpdateMyselfFlags(void) { int nofailover = server.cluster_slave_no_failover ? CLUSTER_NODE_NOFAILOVER : 0; myself->flags &= ~CLUSTER_NODE_NOFAILOVER; myself->flags |= nofailover; + myself->flags |= CLUSTER_NODE_LIGHT_PUBSUB_SUPPORTED; if (myself->flags != oldflags) { clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG | CLUSTER_TODO_UPDATE_STATE); } @@ -1001,6 +1002,7 @@ void clusterInit(void) { * by the createClusterNode() function. */ myself = server.cluster->myself = createClusterNode(NULL, CLUSTER_NODE_MYSELF | CLUSTER_NODE_MASTER); serverLog(LL_NOTICE, "No cluster configuration found, I'm %.40s", myself->name); + myself->flags |= CLUSTER_NODE_LIGHT_PUBSUB_SUPPORTED; clusterAddNode(myself); clusterAddNodeToShard(myself->shard_id, myself); saveconf = 1; @@ -1170,8 +1172,9 @@ static void clusterMsgSendBlockDecrRefCount(void *node) { } } -static void clusterMsgSendBlockDecrRefCountPublish(void *node) { - clusterMsgSendBlockPubsub *msgblock = (clusterMsgSendBlockPubsub *)node; +static void clusterMsgSendBlockDecrRefCountPublish(void *node_light, void *node) { + clusterMsgSendBlockDecrRefCount(node); + clusterMsgSendBlockPubsub *msgblock = (clusterMsgSendBlockPubsub *)node_light; msgblock->refcount--; serverAssert(msgblock->refcount >= 0); if (msgblock->refcount == 0) { @@ -2776,6 +2779,27 @@ static clusterNode *getNodeFromLinkAndMsg(clusterLink *link, clusterMsg *hdr) { return sender; } +int pubsubProcessLightPacket(clusterLink *link, uint16_t type) { + clusterMsgPubsub *hdr_pubsub = (clusterMsgPubsub *)link->rcvbuf; + robj *channel, *message; + uint32_t channel_len, message_len; + + /* Don't bother creating useless objects if there are no + * Pub/Sub subscribers. */ + if ((type == CLUSTERMSG_TYPE_PUBLISH && serverPubsubSubscriptionCount() > 0) || + (type == CLUSTERMSG_TYPE_PUBLISHSHARD && serverPubsubShardSubscriptionCount() > 0)) { + channel_len = ntohl(hdr_pubsub->data.publish.msg.channel_len); + message_len = ntohl(hdr_pubsub->data.publish.msg.message_len); + channel = createStringObject((char *)hdr_pubsub->data.publish.msg.bulk_data, channel_len); + message = createStringObject((char *)hdr_pubsub->data.publish.msg.bulk_data + channel_len, message_len); + pubsubPublishMessage(channel, message, type == CLUSTERMSG_TYPE_PUBLISHSHARD); + decrRefCount(channel); + decrRefCount(message); + } + return 1; +} + + int clusterIsValidPacket(clusterLink *link) { clusterMsg *hdr = (clusterMsg *)link->rcvbuf; uint32_t totlen = ntohl(hdr->totlen); @@ -2835,10 +2859,17 @@ int clusterIsValidPacket(clusterLink *link) { explen = sizeof(clusterMsg) - sizeof(union clusterMsgData); explen += sizeof(clusterMsgDataFail); } else if (type == CLUSTERMSG_TYPE_PUBLISH || type == CLUSTERMSG_TYPE_PUBLISHSHARD) { - clusterMsgPubsub *hdr_pubsub = (clusterMsgPubsub *)link->rcvbuf; - explen = sizeof(clusterMsgPubsub) - sizeof(union clusterMsgData); - explen += sizeof(clusterMsgDataPublish) - 8 + ntohl(hdr_pubsub->data.publish.msg.channel_len) + - ntohl(hdr_pubsub->data.publish.msg.message_len); + clusterNode *sender = getNodeFromLinkAndMsg(link, hdr); + if (sender && nodeSupportsPubsubMsgHdr(sender)) { + clusterMsgPubsub *hdr_pubsub = (clusterMsgPubsub *)link->rcvbuf; + explen = sizeof(clusterMsgPubsub) - sizeof(union clusterMsgData); + explen += sizeof(clusterMsgDataPublish) - 8 + ntohl(hdr_pubsub->data.publish.msg.channel_len) + + ntohl(hdr_pubsub->data.publish.msg.message_len); + } else { + explen = sizeof(clusterMsg) - sizeof(union clusterMsgData); + explen += sizeof(clusterMsgDataPublish) - 8 + ntohl(hdr->data.publish.msg.channel_len) + + ntohl(hdr->data.publish.msg.message_len); + } } else if (type == CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST || type == CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK || type == CLUSTERMSG_TYPE_MFSTART) { explen = sizeof(clusterMsg) - sizeof(union clusterMsgData); @@ -2878,38 +2909,17 @@ int clusterProcessPacket(clusterLink *link) { clusterMsg *hdr = (clusterMsg *)link->rcvbuf; uint16_t type = ntohs(hdr->type); mstime_t now = mstime(); - clusterNode *sender; - - if (type == CLUSTERMSG_TYPE_PUBLISH || type == CLUSTERMSG_TYPE_PUBLISHSHARD) { - clusterMsgPubsub *hdr_pubsub = (clusterMsgPubsub *)link->rcvbuf; - robj *channel, *message; - serverAssert(link->node && !nodeInHandshake(link->node)); - sender = link->node; - sender->data_received = now; - uint32_t channel_len, message_len; - - /* Don't bother creating useless objects if there are no - * Pub/Sub subscribers. */ - if ((type == CLUSTERMSG_TYPE_PUBLISH && serverPubsubSubscriptionCount() > 0) || - (type == CLUSTERMSG_TYPE_PUBLISHSHARD && serverPubsubShardSubscriptionCount() > 0)) { - channel_len = ntohl(hdr_pubsub->data.publish.msg.channel_len); - message_len = ntohl(hdr_pubsub->data.publish.msg.message_len); - channel = createStringObject((char *)hdr_pubsub->data.publish.msg.bulk_data, channel_len); - message = createStringObject((char *)hdr_pubsub->data.publish.msg.bulk_data + channel_len, message_len); - pubsubPublishMessage(channel, message, type == CLUSTERMSG_TYPE_PUBLISHSHARD); - decrRefCount(channel); - decrRefCount(message); - } - return 1; - } - - sender = getNodeFromLinkAndMsg(link, hdr); - + clusterNode *sender = getNodeFromLinkAndMsg(link, hdr); /* Update the last time we saw any data from this node. We * use this in order to avoid detecting a timeout from a node that * is just sending a lot of data in the cluster bus, for instance * because of Pub/Sub. */ if (sender) sender->data_received = now; + + if (sender && (type == CLUSTERMSG_TYPE_PUBLISH || type == CLUSTERMSG_TYPE_PUBLISHSHARD) && nodeSupportsPubsubMsgHdr(sender)) { + return pubsubProcessLightPacket(link, type); + } + uint16_t flags = ntohs(hdr->flags); uint64_t senderCurrentEpoch = 0, senderConfigEpoch = 0; @@ -2917,6 +2927,10 @@ int clusterProcessPacket(clusterLink *link) { sender->flags |= CLUSTER_NODE_EXTENSIONS_SUPPORTED; } + if (sender && (!nodeSupportsPubsubMsgHdr(link->node)) && (flags & CLUSTER_NODE_LIGHT_PUBSUB_SUPPORTED)) { + sender->flags |= CLUSTER_NODE_LIGHT_PUBSUB_SUPPORTED; + } + if (sender && !nodeInHandshake(sender)) { /* Update our currentEpoch if we see a newer epoch in the cluster. */ senderCurrentEpoch = ntohu64(hdr->currentEpoch); @@ -3292,6 +3306,24 @@ int clusterProcessPacket(clusterLink *link) { serverLog(LL_NOTICE, "Ignoring FAIL message from unknown node %.40s about %.40s", hdr->sender, hdr->data.fail.about.nodename); } + } else if (type == CLUSTERMSG_TYPE_PUBLISH || type == CLUSTERMSG_TYPE_PUBLISHSHARD) { + if (!sender) return 1; /* We don't know that node. */ + + robj *channel, *message; + uint32_t channel_len, message_len; + + /* Don't bother creating useless objects if there are no + * Pub/Sub subscribers. */ + if ((type == CLUSTERMSG_TYPE_PUBLISH && serverPubsubSubscriptionCount() > 0) || + (type == CLUSTERMSG_TYPE_PUBLISHSHARD && serverPubsubShardSubscriptionCount() > 0)) { + channel_len = ntohl(hdr->data.publish.msg.channel_len); + message_len = ntohl(hdr->data.publish.msg.message_len); + channel = createStringObject((char *)hdr->data.publish.msg.bulk_data, channel_len); + message = createStringObject((char *)hdr->data.publish.msg.bulk_data + channel_len, message_len); + pubsubPublishMessage(channel, message, type == CLUSTERMSG_TYPE_PUBLISHSHARD); + decrRefCount(channel); + decrRefCount(message); + } } else if (type == CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST) { if (!sender) return 1; /* We don't know that node. */ clusterSendFailoverAuthIfNeeded(sender, hdr); @@ -3513,7 +3545,6 @@ void clusterReadHandler(connection *conn) { /* Finally read the full message. */ hdr = (clusterMsg *)link->rcvbuf; uint16_t type = ntohs(hdr->type); - serverLog(LL_WARNING, "TYPE:%hu",type); if (rcvbuflen == RCVBUF_MIN_READ_LEN) { int is_msg_valid = 0; /* Perform some sanity check on the message signature @@ -3538,7 +3569,6 @@ void clusterReadHandler(connection *conn) { return; } } - is_msg_valid = 0; } readlen = ntohl(hdr->totlen) - rcvbuflen; if (readlen > sizeof(buf)) readlen = sizeof(buf); @@ -3650,7 +3680,7 @@ void clusterBroadcastMessage(clusterMsgSendBlock *msgblock) { dictReleaseIterator(di); } -void clusterBroadcastPublishMessage(clusterMsgSendBlockPubsub *msgblock) { +void clusterBroadcastPublishMessage(clusterMsgSendBlockPubsub *msgblock_light, clusterMsgSendBlock *msgblock) { dictIterator *di; dictEntry *de; @@ -3659,7 +3689,8 @@ void clusterBroadcastPublishMessage(clusterMsgSendBlockPubsub *msgblock) { clusterNode *node = dictGetVal(de); if (node->flags & (CLUSTER_NODE_MYSELF | CLUSTER_NODE_HANDSHAKE)) continue; - clusterSendPublishMessage(node->link, msgblock); + if (nodeSupportsPubsubMsgHdr(node)) clusterSendPublishMessage(node->link, msgblock_light); + else clusterSendMessage(node->link, msgblock); } dictReleaseIterator(di); } @@ -3951,7 +3982,31 @@ void clusterBroadcastPong(int target) { * the 'bulk_data', sanitizer generates an out-of-bounds error which is a false * positive in this context. */ VALKEY_NO_SANITIZE("bounds") -clusterMsgSendBlockPubsub *clusterCreatePublishMsgBlock(robj *channel, robj *message, uint16_t type) { +clusterMsgSendBlock *clusterCreatePublishMsgBlock(robj *channel, robj *message, uint16_t type) { + uint32_t channel_len, message_len; + + channel = getDecodedObject(channel); + message = getDecodedObject(message); + channel_len = sdslen(channel->ptr); + message_len = sdslen(message->ptr); + + size_t msglen = sizeof(clusterMsg) - sizeof(union clusterMsgData); + msglen += sizeof(clusterMsgDataPublish) - 8 + channel_len + message_len; + clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(type, msglen); + + clusterMsg *hdr = &msgblock->msg; + hdr->data.publish.msg.channel_len = htonl(channel_len); + hdr->data.publish.msg.message_len = htonl(message_len); + memcpy(hdr->data.publish.msg.bulk_data, channel->ptr, sdslen(channel->ptr)); + memcpy(hdr->data.publish.msg.bulk_data + sdslen(channel->ptr), message->ptr, sdslen(message->ptr)); + + decrRefCount(channel); + decrRefCount(message); + + return msgblock; +} + +clusterMsgSendBlockPubsub *clusterCreatePublishLightMsgBlock(robj *channel, robj *message, uint16_t type) { uint32_t channel_len, message_len; channel = getDecodedObject(channel); @@ -4069,12 +4124,14 @@ int clusterSendModuleMessageToTarget(const char *target, * Publish this message across the slot (primary/replica). * -------------------------------------------------------------------------- */ void clusterPropagatePublish(robj *channel, robj *message, int sharded) { - clusterMsgSendBlockPubsub *msgblock; + clusterMsgSendBlockPubsub *msgblock_light; + clusterMsgSendBlock *msgblock; if (!sharded) { + msgblock_light = clusterCreatePublishLightMsgBlock(channel, message, CLUSTERMSG_TYPE_PUBLISH); msgblock = clusterCreatePublishMsgBlock(channel, message, CLUSTERMSG_TYPE_PUBLISH); - clusterBroadcastPublishMessage(msgblock); - clusterMsgSendBlockDecrRefCountPublish(msgblock); + clusterBroadcastPublishMessage(msgblock_light, msgblock); + clusterMsgSendBlockDecrRefCountPublish(msgblock_light, msgblock); return; } @@ -4083,13 +4140,15 @@ void clusterPropagatePublish(robj *channel, robj *message, int sharded) { list *nodes_for_slot = clusterGetNodesInMyShard(server.cluster->myself); serverAssert(nodes_for_slot != NULL); listRewind(nodes_for_slot, &li); + msgblock_light = clusterCreatePublishLightMsgBlock(channel, message, CLUSTERMSG_TYPE_PUBLISHSHARD); msgblock = clusterCreatePublishMsgBlock(channel, message, CLUSTERMSG_TYPE_PUBLISHSHARD); while ((ln = listNext(&li))) { clusterNode *node = listNodeValue(ln); if (node->flags & (CLUSTER_NODE_MYSELF | CLUSTER_NODE_HANDSHAKE)) continue; - clusterSendPublishMessage(node->link, msgblock); + if (nodeSupportsPubsubMsgHdr(node)) clusterSendPublishMessage(node->link, msgblock_light); + else clusterSendMessage(node->link, msgblock); } - clusterMsgSendBlockDecrRefCountPublish(msgblock); + clusterMsgSendBlockDecrRefCountPublish(msgblock_light, msgblock); } /* ----------------------------------------------------------------------------- diff --git a/src/cluster_legacy.h b/src/cluster_legacy.h index b304de580c..145455adcb 100644 --- a/src/cluster_legacy.h +++ b/src/cluster_legacy.h @@ -52,6 +52,7 @@ typedef struct clusterLink { #define CLUSTER_NODE_MIGRATE_TO 256 /* Master eligible for replica migration. */ #define CLUSTER_NODE_NOFAILOVER 512 /* Slave will not try to failover. */ #define CLUSTER_NODE_EXTENSIONS_SUPPORTED 1024 /* This node supports extensions. */ +#define CLUSTER_NODE_LIGHT_PUBSUB_SUPPORTED 2048 /* This node supports light pubsub message header. */ #define CLUSTER_NODE_NULL_NAME \ "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ "\000\000\000\000\000\000\000\000\000\000\000\000" @@ -64,6 +65,7 @@ typedef struct clusterLink { #define nodeFailed(n) ((n)->flags & CLUSTER_NODE_FAIL) #define nodeCantFailover(n) ((n)->flags & CLUSTER_NODE_NOFAILOVER) #define nodeSupportsExtensions(n) ((n)->flags & CLUSTER_NODE_EXTENSIONS_SUPPORTED) +#define nodeSupportsPubsubMsgHdr(n) ((n)->flags & CLUSTER_NODE_LIGHT_PUBSUB_SUPPORTED) /* This structure represent elements of node->fail_reports. */ typedef struct clusterNodeFailReport { From b2836d50184c64a50bc11261ba2861435840750a Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Mon, 1 Jul 2024 00:48:00 +0000 Subject: [PATCH 03/45] Adds Multiple Payload Feature with cluster Light header Signed-off-by: Roshan Khatri --- src/cluster.h | 2 +- src/cluster_legacy.c | 260 +++++++++++++++++++++++++------------------ src/cluster_legacy.h | 44 +++++--- src/commands.def | 8 +- src/pubsub.c | 23 +++- src/server.h | 2 + 6 files changed, 207 insertions(+), 132 deletions(-) diff --git a/src/cluster.h b/src/cluster.h index f163e7f688..6699f93d7e 100644 --- a/src/cluster.h +++ b/src/cluster.h @@ -52,7 +52,7 @@ void clusterUpdateMyselfHostname(void); void clusterUpdateMyselfAnnouncedPorts(void); void clusterUpdateMyselfHumanNodename(void); -void clusterPropagatePublish(robj *channel, robj *message, int sharded); +void clusterPropagatePublish(robj *channel, robj **message, int count, int sharded); unsigned long getClusterConnectionsCount(void); int isClusterHealthy(void); diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index fe680a6d2e..9a1c29cd52 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -111,7 +111,7 @@ int auxTlsPortSetter(clusterNode *n, void *value, int length); sds auxTlsPortGetter(clusterNode *n, sds s); int auxTlsPortPresent(clusterNode *n); static void clusterBuildMessageHdr(clusterMsg *hdr, int type, size_t msglen); -static void clusterBuildPubsubMessageHdr(clusterMsgPubsub *hdr, int type, size_t msglen); +static void clusterBuildPubsubMessageHdr(clusterMsgLight *hdr, int type, size_t msglen); void freeClusterLink(clusterLink *link); int verifyClusterNodeId(const char *name, int length); sds clusterEncodeOpenSlotsAuxField(int rdbflags); @@ -317,8 +317,8 @@ typedef struct { typedef struct { size_t totlen; /* Total length of this block including the message */ int refcount; /* Number of cluster link send msg queues containing the message */ - clusterMsgPubsub msg; -} clusterMsgSendBlockPubsub; + clusterMsgLight msg; +} clusterMsgSendBlockLight; /* ----------------------------------------------------------------------------- * Initialization @@ -841,7 +841,7 @@ void clusterUpdateMyselfFlags(void) { int nofailover = server.cluster_replica_no_failover ? CLUSTER_NODE_NOFAILOVER : 0; myself->flags &= ~CLUSTER_NODE_NOFAILOVER; myself->flags |= nofailover; - myself->flags |= CLUSTER_NODE_LIGHT_PUBSUB_SUPPORTED; + myself->flags |= CLUSTER_NODE_LIGHT_HDR_SUPPORTED; if (myself->flags != oldflags) { clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG | CLUSTER_TODO_UPDATE_STATE); } @@ -1001,7 +1001,7 @@ void clusterInit(void) { * by the createClusterNode() function. */ myself = server.cluster->myself = createClusterNode(NULL, CLUSTER_NODE_MYSELF | CLUSTER_NODE_PRIMARY); serverLog(LL_NOTICE, "No cluster configuration found, I'm %.40s", myself->name); - myself->flags |= CLUSTER_NODE_LIGHT_PUBSUB_SUPPORTED; + myself->flags |= CLUSTER_NODE_LIGHT_HDR_SUPPORTED; clusterAddNode(myself); clusterAddNodeToShard(myself->shard_id, myself); saveconf = 1; @@ -1155,9 +1155,9 @@ static clusterMsgSendBlock *createClusterMsgSendBlock(int type, uint32_t msglen) return msgblock; } -static clusterMsgSendBlockPubsub *createPubsubClusterMsgSendBlock(int type, uint32_t msglen) { - uint32_t blocklen = msglen + sizeof(clusterMsgSendBlockPubsub) - sizeof(clusterMsgPubsub); - clusterMsgSendBlockPubsub *msgblock = zcalloc(blocklen); +static clusterMsgSendBlockLight *createPubsubClusterMsgSendBlock(int type, uint32_t msglen) { + uint32_t blocklen = msglen + sizeof(clusterMsgSendBlockLight) - sizeof(clusterMsgLight); + clusterMsgSendBlockLight *msgblock = zcalloc(blocklen); msgblock->refcount = 1; msgblock->totlen = blocklen; server.stat_cluster_links_memory += blocklen; @@ -1175,9 +1175,8 @@ static void clusterMsgSendBlockDecrRefCount(void *node) { } } -static void clusterMsgSendBlockDecrRefCountPublish(void *node_light, void *node) { - clusterMsgSendBlockDecrRefCount(node); - clusterMsgSendBlockPubsub *msgblock = (clusterMsgSendBlockPubsub *)node_light; +static void clusterMsgSendBlockDecrRefCountPublish(void *light_block) { + clusterMsgSendBlockLight *msgblock = (clusterMsgSendBlockLight *)light_block; msgblock->refcount--; serverAssert(msgblock->refcount >= 0); if (msgblock->refcount == 0) { @@ -2779,22 +2778,55 @@ static clusterNode *getNodeFromLinkAndMsg(clusterLink *link, clusterMsg *hdr) { return sender; } +static clusterMsgDataPublishMessage *getInitialBulkData(clusterMsgDataPublishMulti *msg) { + clusterMsgDataPublishMessage *initial = (clusterMsgDataPublishMessage *)&(msg->bulk_data); + return initial; +} + +static clusterMsgDataPublishMessage *getNextCursorBulkData(clusterMsgDataPublishMessage *message_cursor) { + clusterMsgDataPublishMessage *next = (clusterMsgDataPublishMessage *)((char *)message_cursor->message_data + ntohl(message_cursor->message_len)); + return next; +} + +void writeDataToCursor(clusterMsgDataPublishMessage *cursor, robj *data) { + uint32_t data_len = sdslen(data->ptr); + cursor->message_len = htonl(data_len); + memcpy(cursor->message_data, data->ptr, data_len); + return; +} + +static robj *readBulkDataFromCursor(clusterMsgDataPublishMessage *cursor) { + uint32_t data_len; + data_len = ntohl(cursor->message_len); + robj *data = (createStringObject((char *)cursor->message_data, data_len)); + return data; +} + +static uint32_t getPublishMsgLength(clusterMsgDataPublishMessage *cursor) { + uint32_t msg_length = ntohl(cursor->message_len); + return msg_length; +} + int pubsubProcessLightPacket(clusterLink *link, uint16_t type) { - clusterMsgPubsub *hdr_pubsub = (clusterMsgPubsub *)link->rcvbuf; - robj *channel, *message; - uint32_t channel_len, message_len; + clusterMsgLight *hdr = (clusterMsgLight *)link->rcvbuf; + robj *channel; + uint64_t data_count; /* Don't bother creating useless objects if there are no * Pub/Sub subscribers. */ - if ((type == CLUSTERMSG_TYPE_PUBLISH && serverPubsubSubscriptionCount() > 0) || - (type == CLUSTERMSG_TYPE_PUBLISHSHARD && serverPubsubShardSubscriptionCount() > 0)) { - channel_len = ntohl(hdr_pubsub->data.publish.msg.channel_len); - message_len = ntohl(hdr_pubsub->data.publish.msg.message_len); - channel = createStringObject((char *)hdr_pubsub->data.publish.msg.bulk_data, channel_len); - message = createStringObject((char *)hdr_pubsub->data.publish.msg.bulk_data + channel_len, message_len); - pubsubPublishMessage(channel, message, type == CLUSTERMSG_TYPE_PUBLISHSHARD); + if ((type == CLUSTERMSG_TYPE_PUBLISH_LIGHT && serverPubsubSubscriptionCount() > 0) || + (type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT && serverPubsubShardSubscriptionCount() > 0)) { + data_count = ntohu64(hdr->data.publish.msg.data_count); + clusterMsgDataPublishMessage *cursor = getInitialBulkData(&hdr->data.publish.msg); + channel = readBulkDataFromCursor(cursor); + while(--data_count){ + cursor = getNextCursorBulkData(cursor); + if (type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT) { + serverLog(LL_NOTICE, "type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT"); + } + pubsubPublishMessage(channel, readBulkDataFromCursor(cursor), type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT); + } decrRefCount(channel); - decrRefCount(message); } return 1; } @@ -2859,16 +2891,23 @@ int clusterIsValidPacket(clusterLink *link) { explen = sizeof(clusterMsg) - sizeof(union clusterMsgData); explen += sizeof(clusterMsgDataFail); } else if (type == CLUSTERMSG_TYPE_PUBLISH || type == CLUSTERMSG_TYPE_PUBLISHSHARD) { + explen = sizeof(clusterMsg) - sizeof(union clusterMsgData); + explen += sizeof(clusterMsgDataPublish) - 8 + ntohl(hdr->data.publish.msg.channel_len) + + ntohl(hdr->data.publish.msg.message_len); + } else if (type == CLUSTERMSG_TYPE_PUBLISH_LIGHT || type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT) { clusterNode *sender = getNodeFromLinkAndMsg(link, hdr); - if (sender && nodeSupportsPubsubMsgHdr(sender)) { - clusterMsgPubsub *hdr_pubsub = (clusterMsgPubsub *)link->rcvbuf; - explen = sizeof(clusterMsgPubsub) - sizeof(union clusterMsgData); - explen += sizeof(clusterMsgDataPublish) - 8 + ntohl(hdr_pubsub->data.publish.msg.channel_len) + - ntohl(hdr_pubsub->data.publish.msg.message_len); - } else { - explen = sizeof(clusterMsg) - sizeof(union clusterMsgData); - explen += sizeof(clusterMsgDataPublish) - 8 + ntohl(hdr->data.publish.msg.channel_len) + - ntohl(hdr->data.publish.msg.message_len); + if (sender && nodeSupportsLightMsgHdr(sender)) { + clusterMsgLight *hdr_pubsub = (clusterMsgLight *)link->rcvbuf; + explen = sizeof(clusterMsgLight) - sizeof(union clusterMsgDataLight); + explen += sizeof(clusterMsgDataPublishMulti); + uint64_t data_count = ntohu64(hdr_pubsub->data.publish.msg.data_count); + explen += ((data_count) * (sizeof(clusterMsgDataPublishMessage) - 8)); + clusterMsgDataPublishMessage *msg_data = getInitialBulkData(&hdr_pubsub->data.publish.msg); + while(data_count--){ + uint32_t msglen = getPublishMsgLength(msg_data); + explen += msglen; + msg_data = getNextCursorBulkData(msg_data); + } } } else if (type == CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST || type == CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK || type == CLUSTERMSG_TYPE_MFSTART) { @@ -2916,7 +2955,7 @@ int clusterProcessPacket(clusterLink *link) { * because of Pub/Sub. */ if (sender) sender->data_received = now; - if (sender && (type == CLUSTERMSG_TYPE_PUBLISH || type == CLUSTERMSG_TYPE_PUBLISHSHARD) && nodeSupportsPubsubMsgHdr(sender)) { + if (sender && (type == CLUSTERMSG_TYPE_PUBLISH_LIGHT || type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT) && nodeSupportsLightMsgHdr(sender)) { return pubsubProcessLightPacket(link, type); } @@ -2927,8 +2966,8 @@ int clusterProcessPacket(clusterLink *link) { sender->flags |= CLUSTER_NODE_EXTENSIONS_SUPPORTED; } - if (sender && (!nodeSupportsPubsubMsgHdr(link->node)) && (flags & CLUSTER_NODE_LIGHT_PUBSUB_SUPPORTED)) { - sender->flags |= CLUSTER_NODE_LIGHT_PUBSUB_SUPPORTED; + if (sender && (!nodeSupportsLightMsgHdr(link->node)) && (flags & CLUSTER_NODE_LIGHT_HDR_SUPPORTED)) { + sender->flags |= CLUSTER_NODE_LIGHT_HDR_SUPPORTED; } if (sender && !nodeInHandshake(sender)) { @@ -3411,47 +3450,6 @@ void clusterWriteHandler(connection *conn) { if (listLength(link->send_msg_queue) == 0) connSetWriteHandler(link->conn, NULL); } -/* Send the pubsub messages queued for the link. */ -void clusterPubsubWriteHandler(connection *conn) { - clusterLink *link = connGetPrivateData(conn); - ssize_t nwritten; - size_t totwritten = 0; - - while (totwritten < NET_MAX_WRITES_PER_EVENT && listLength(link->send_msg_queue) > 0) { - listNode *head = listFirst(link->send_msg_queue); - clusterMsgSendBlockPubsub *msgblock = (clusterMsgSendBlockPubsub *)head->value; - clusterMsgPubsub *msg = &msgblock->msg; - size_t msg_offset = link->head_msg_send_offset; - size_t msg_len = ntohl(msg->totlen); - - nwritten = connWrite(conn, (char *)msg + msg_offset, msg_len - msg_offset); - if (nwritten <= 0) { - serverLog(LL_DEBUG, "I/O error writing to node link: %s", - (nwritten == -1) ? connGetLastError(conn) : "short write"); - handleLinkIOError(link); - return; - } - if (msg_offset + nwritten < msg_len) { - /* If full message wasn't written, record the offset - * and continue sending from this point next time */ - link->head_msg_send_offset += nwritten; - return; - } - serverAssert((msg_offset + nwritten) == msg_len); - link->head_msg_send_offset = 0; - - /* Delete the node and update our memory tracking */ - uint32_t blocklen = msgblock->totlen; - listDelNode(link->send_msg_queue, head); - server.stat_cluster_links_memory -= sizeof(listNode); - link->send_msg_queue_mem -= sizeof(listNode) + blocklen; - - totwritten += nwritten; - } - - if (listLength(link->send_msg_queue) == 0) connSetWriteHandler(link->conn, NULL); -} - /* A connect handler that gets called when a connection to another node * gets established. */ @@ -3519,9 +3517,11 @@ void clusterReadHandler(connection *conn) { /* Perform some sanity check on the message signature * and length. */ if (memcmp(hdr->sig, "RCmb", 4) != 0 || ntohl(hdr->totlen) < CLUSTERMSG_MIN_LEN) { - /* The minimum length for PUBLISH and PUBLISHSHARD will be CLUSTERMSGPUBSUB_MIN_LEN - * as we are using the `clusterMsgPubsub` hdr. */ - if ((type == CLUSTERMSG_TYPE_PUBLISH || type == CLUSTERMSG_TYPE_PUBLISHSHARD) && (ntohl(hdr->totlen) >= CLUSTERMSGPUBSUB_MIN_LEN)) is_msg_valid = 1; + /* The minimum length for PUBLISH and PUBLISHSHARD will be clusterMsgLight_MIN_LEN + * as we are using the `clusterMsgLight` hdr. */ + if ((type == CLUSTERMSG_TYPE_PUBLISH_LIGHT || type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT) && ((ntohl(hdr->totlen) >= CLUSTERMSG_LIGHT_MIN_LEN))) { + is_msg_valid = 1; + } if (!is_msg_valid) { char ip[NET_IP_STR_LEN]; int port; @@ -3610,7 +3610,7 @@ void clusterSendMessage(clusterLink *link, clusterMsgSendBlock *msgblock) { if (type < CLUSTERMSG_TYPE_COUNT) server.cluster->stats_bus_messages_sent[type]++; } -void clusterSendPublishMessage(clusterLink *link, clusterMsgSendBlockPubsub *msgblock) { +void clusterSendPublishMessage(clusterLink *link, clusterMsgSendBlockLight *msgblock) { if (!link) { return; } @@ -3649,7 +3649,7 @@ void clusterBroadcastMessage(clusterMsgSendBlock *msgblock) { dictReleaseIterator(di); } -void clusterBroadcastPublishMessage(clusterMsgSendBlockPubsub *msgblock_light, clusterMsgSendBlock *msgblock) { +void clusterBroadcastPublishMessage(clusterMsgSendBlock *msgblock) { dictIterator *di; dictEntry *de; @@ -3658,12 +3658,26 @@ void clusterBroadcastPublishMessage(clusterMsgSendBlockPubsub *msgblock_light, c clusterNode *node = dictGetVal(de); if (node->flags & (CLUSTER_NODE_MYSELF | CLUSTER_NODE_HANDSHAKE)) continue; - if (nodeSupportsPubsubMsgHdr(node)) clusterSendPublishMessage(node->link, msgblock_light); + if (nodeSupportsLightMsgHdr(node)) continue; else clusterSendMessage(node->link, msgblock); } dictReleaseIterator(di); } +void clusterBroadcastPublishLightMessage(clusterMsgSendBlockLight *msgblock_light) { + dictIterator *di; + dictEntry *de; + + di = dictGetSafeIterator(server.cluster->nodes); + while ((de = dictNext(di)) != NULL) { + clusterNode *node = dictGetVal(de); + + if (node->flags & (CLUSTER_NODE_MYSELF | CLUSTER_NODE_HANDSHAKE)) continue; + if (nodeSupportsLightMsgHdr(node)) clusterSendPublishMessage(node->link, msgblock_light); + else continue; + } + dictReleaseIterator(di); +} /* Build the message header. hdr must point to a buffer at least * sizeof(clusterMsg) in bytes. */ static void clusterBuildMessageHdr(clusterMsg *hdr, int type, size_t msglen) { @@ -3727,7 +3741,7 @@ static void clusterBuildMessageHdr(clusterMsg *hdr, int type, size_t msglen) { hdr->totlen = htonl(msglen); } -static void clusterBuildPubsubMessageHdr(clusterMsgPubsub *hdr, int type, size_t msglen) { +static void clusterBuildPubsubMessageHdr(clusterMsgLight *hdr, int type, size_t msglen) { hdr->ver = htons(CLUSTER_PROTO_VER); hdr->sig[0] = 'R'; @@ -3975,26 +3989,37 @@ clusterMsgSendBlock *clusterCreatePublishMsgBlock(robj *channel, robj *message, return msgblock; } -clusterMsgSendBlockPubsub *clusterCreatePublishLightMsgBlock(robj *channel, robj *message, uint16_t type) { +clusterMsgSendBlockLight *clusterCreatePublishLightMsgBlock(robj *channel, robj **messages, int count, uint16_t type) { uint32_t channel_len, message_len; + int i; channel = getDecodedObject(channel); - message = getDecodedObject(message); channel_len = sdslen(channel->ptr); - message_len = sdslen(message->ptr); - - size_t msglen = sizeof(clusterMsgPubsub) - sizeof(union clusterMsgData); - msglen += sizeof(clusterMsgDataPublish) - 8 + channel_len + message_len; - clusterMsgSendBlockPubsub *msgblock = createPubsubClusterMsgSendBlock(type, msglen); - - clusterMsgPubsub *hdr = &msgblock->msg; - hdr->data.publish.msg.channel_len = htonl(channel_len); - hdr->data.publish.msg.message_len = htonl(message_len); - memcpy(hdr->data.publish.msg.bulk_data, channel->ptr, sdslen(channel->ptr)); - memcpy(hdr->data.publish.msg.bulk_data + sdslen(channel->ptr), message->ptr, sdslen(message->ptr)); + uint32_t aggregated_msg_len = 0 ; + aggregated_msg_len += channel_len; + for (i = 0; i < count; i++) { + messages[i] = getDecodedObject(messages[i]); + message_len = sdslen(messages[i]->ptr); + aggregated_msg_len += message_len; + } + + size_t msglen = sizeof(clusterMsgLight) - sizeof(union clusterMsgDataLight) ; + msglen += sizeof(clusterMsgDataPublishMulti); + msglen += ((count+1) * (sizeof(clusterMsgDataPublishMessage) - 8)); + msglen += aggregated_msg_len; + clusterMsgSendBlockLight *msgblock = createPubsubClusterMsgSendBlock(type, msglen); + + clusterMsgLight *hdr = &msgblock->msg; + hdr->data.publish.msg.data_count = htonu64(count+1); + clusterMsgDataPublishMessage *cursor = getInitialBulkData(&hdr->data.publish.msg); + writeDataToCursor(cursor, channel); + for (i = 0; i < count; i++) { + cursor = getNextCursorBulkData(cursor); + writeDataToCursor(cursor, messages[i]); + decrRefCount(messages[i]); + } decrRefCount(channel); - decrRefCount(message); return msgblock; } @@ -4092,32 +4117,47 @@ int clusterSendModuleMessageToTarget(const char *target, * Otherwise: * Publish this message across the slot (primary/replica). * -------------------------------------------------------------------------- */ -void clusterPropagatePublish(robj *channel, robj *message, int sharded) { - clusterMsgSendBlockPubsub *msgblock_light; +void clusterPropagatePublish(robj *channel, robj **message, int count, int sharded) { + clusterMsgSendBlockLight *msgblock_light; clusterMsgSendBlock *msgblock; - + int i; + msgblock_light = clusterCreatePublishLightMsgBlock(channel, message, count, sharded ? CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT : CLUSTERMSG_TYPE_PUBLISH_LIGHT); if (!sharded) { - msgblock_light = clusterCreatePublishLightMsgBlock(channel, message, CLUSTERMSG_TYPE_PUBLISH); - msgblock = clusterCreatePublishMsgBlock(channel, message, CLUSTERMSG_TYPE_PUBLISH); - clusterBroadcastPublishMessage(msgblock_light, msgblock); - clusterMsgSendBlockDecrRefCountPublish(msgblock_light, msgblock); + clusterBroadcastPublishLightMessage(msgblock_light); + clusterMsgSendBlockDecrRefCountPublish(msgblock_light); + for (i = 0; i < count; i++) { + msgblock = clusterCreatePublishMsgBlock(channel, message[i], CLUSTERMSG_TYPE_PUBLISH); + clusterBroadcastPublishMessage(msgblock); + clusterMsgSendBlockDecrRefCount(msgblock); + } return; } listIter li; listNode *ln; + clusterNode *node; list *nodes_for_slot = clusterGetNodesInMyShard(server.cluster->myself); serverAssert(nodes_for_slot != NULL); listRewind(nodes_for_slot, &li); - msgblock_light = clusterCreatePublishLightMsgBlock(channel, message, CLUSTERMSG_TYPE_PUBLISHSHARD); - msgblock = clusterCreatePublishMsgBlock(channel, message, CLUSTERMSG_TYPE_PUBLISHSHARD); while ((ln = listNext(&li))) { - clusterNode *node = listNodeValue(ln); + node = listNodeValue(ln); if (node->flags & (CLUSTER_NODE_MYSELF | CLUSTER_NODE_HANDSHAKE)) continue; - if (nodeSupportsPubsubMsgHdr(node)) clusterSendPublishMessage(node->link, msgblock_light); - else clusterSendMessage(node->link, msgblock); + if (nodeSupportsLightMsgHdr(node)) clusterSendPublishMessage(node->link, msgblock_light); + else continue; + } + clusterMsgSendBlockDecrRefCountPublish(msgblock_light); + + for (i = 0; i < count; i++) { + listRewind(nodes_for_slot, &li); + msgblock = clusterCreatePublishMsgBlock(channel, message[i], CLUSTERMSG_TYPE_PUBLISHSHARD); + while ((ln = listNext(&li))) { + node = listNodeValue(ln); + if (node->flags & (CLUSTER_NODE_MYSELF | CLUSTER_NODE_HANDSHAKE)) continue; + if (nodeSupportsLightMsgHdr(node)) continue; + else clusterSendMessage(node->link, msgblock); + } + clusterMsgSendBlockDecrRefCount(msgblock); } - clusterMsgSendBlockDecrRefCountPublish(msgblock_light, msgblock); } /* ----------------------------------------------------------------------------- @@ -5658,6 +5698,8 @@ const char *clusterGetMessageTypeString(int type) { case CLUSTERMSG_TYPE_FAIL: return "fail"; case CLUSTERMSG_TYPE_PUBLISH: return "publish"; case CLUSTERMSG_TYPE_PUBLISHSHARD: return "publishshard"; + case CLUSTERMSG_TYPE_PUBLISH_LIGHT: return "publish-light"; + case CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT: return "publishshard-light"; case CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST: return "auth-req"; case CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK: return "auth-ack"; case CLUSTERMSG_TYPE_UPDATE: return "update"; diff --git a/src/cluster_legacy.h b/src/cluster_legacy.h index 1a28c4004f..ea3347ba38 100644 --- a/src/cluster_legacy.h +++ b/src/cluster_legacy.h @@ -52,7 +52,7 @@ typedef struct clusterLink { #define CLUSTER_NODE_MIGRATE_TO (1 << 8) /* Primary eligible for replica migration. */ #define CLUSTER_NODE_NOFAILOVER (1 << 9) /* Replica will not try to failover. */ #define CLUSTER_NODE_EXTENSIONS_SUPPORTED (1 << 10) /* This node supports extensions. */ -#define CLUSTER_NODE_LIGHT_PUBSUB_SUPPORTED (1 << 11) /* This node supports light pubsub message header. */ +#define CLUSTER_NODE_LIGHT_HDR_SUPPORTED (1 << 11) /* This node supports light pubsub message header. */ #define CLUSTER_NODE_NULL_NAME \ "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ "\000\000\000\000\000\000\000\000\000\000\000\000" @@ -65,7 +65,7 @@ typedef struct clusterLink { #define nodeFailed(n) ((n)->flags & CLUSTER_NODE_FAIL) #define nodeCantFailover(n) ((n)->flags & CLUSTER_NODE_NOFAILOVER) #define nodeSupportsExtensions(n) ((n)->flags & CLUSTER_NODE_EXTENSIONS_SUPPORTED) -#define nodeSupportsPubsubMsgHdr(n) ((n)->flags & CLUSTER_NODE_LIGHT_PUBSUB_SUPPORTED) +#define nodeSupportsLightMsgHdr(n) ((n)->flags & CLUSTER_NODE_LIGHT_HDR_SUPPORTED) /* This structure represent elements of node->fail_reports. */ typedef struct clusterNodeFailReport { @@ -92,7 +92,9 @@ typedef struct clusterNodeFailReport { #define CLUSTERMSG_TYPE_MFSTART 8 /* Pause clients for manual failover */ #define CLUSTERMSG_TYPE_MODULE 9 /* Module cluster API message. */ #define CLUSTERMSG_TYPE_PUBLISHSHARD 10 /* Pub/Sub Publish shard propagation */ -#define CLUSTERMSG_TYPE_COUNT 11 /* Total number of message types. */ +#define CLUSTERMSG_TYPE_PUBLISH_LIGHT 11 /* Pub/Sub Publish propagation using light header*/ +#define CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT 12 /* Pub/Sub Publish shard propagation using light header*/ +#define CLUSTERMSG_TYPE_COUNT 13 /* Total number of message types. */ /* Initially we don't know our "name", but we'll find it once we connect * to the first node, using the getsockname() function. Then we'll use this @@ -119,6 +121,16 @@ typedef struct { unsigned char bulk_data[8]; /* 8 bytes just as placeholder. */ } clusterMsgDataPublish; +typedef struct { + uint32_t message_len; + unsigned char message_data[8]; /* 8 bytes just as placeholder. */ +} clusterMsgDataPublishMessage; + +typedef struct { + uint64_t data_count; + clusterMsgDataPublishMessage *bulk_data; +} clusterMsgDataPublishMulti; + typedef struct { uint64_t configEpoch; /* Config epoch of the specified instance. */ char nodename[CLUSTER_NAMELEN]; /* Name of the slots owner. */ @@ -208,6 +220,13 @@ union clusterMsgData { } module; }; +union clusterMsgDataLight { + /* PUBLISH */ + struct { + clusterMsgDataPublishMulti msg; + } publish; +}; + #define CLUSTER_PROTO_VER 1 /* Cluster bus protocol version. */ typedef struct { @@ -285,18 +304,17 @@ typedef struct { uint16_t ver; /* Protocol version, currently set to CLUSTER_PROTO_VER. */ uint16_t port; /* Primary port number (TCP or TLS). */ uint16_t type; /* Message type */ - union clusterMsgData data; -} clusterMsgPubsub; - + union clusterMsgDataLight data; +} clusterMsgLight; -static_assert(offsetof(clusterMsgPubsub, sig) == 0, "unexpected field offset"); -static_assert(offsetof(clusterMsgPubsub, totlen) == 4, "unexpected field offset"); -static_assert(offsetof(clusterMsgPubsub, ver) == 8, "unexpected field offset"); -static_assert(offsetof(clusterMsgPubsub, port) == 10, "unexpected field offset"); -static_assert(offsetof(clusterMsgPubsub, type) == 12, "unexpected field offset"); -static_assert(offsetof(clusterMsgPubsub, data) == 16, "unexpected field offset"); +static_assert(offsetof(clusterMsgLight, sig) == offsetof(clusterMsg, sig), "unexpected field offset"); +static_assert(offsetof(clusterMsgLight, totlen) == offsetof(clusterMsg, totlen),"unexpected field offset"); +static_assert(offsetof(clusterMsgLight, ver) == offsetof(clusterMsg, ver) , "unexpected field offset"); +static_assert(offsetof(clusterMsgLight, port) == offsetof(clusterMsg, port) , "unexpected field offset"); +static_assert(offsetof(clusterMsgLight, type) == offsetof(clusterMsg, type) , "unexpected field offset"); +static_assert(offsetof(clusterMsgLight, data) == 16, "unexpected field offset"); -#define CLUSTERMSGPUBSUB_MIN_LEN (sizeof(clusterMsgPubsub) - sizeof(union clusterMsgData)) +#define CLUSTERMSG_LIGHT_MIN_LEN (sizeof(clusterMsgLight) - sizeof(union clusterMsgDataLight)) struct _clusterNode { mstime_t ctime; /* Node object creation time. */ diff --git a/src/commands.def b/src/commands.def index 06cdb4b87e..245cfd1712 100644 --- a/src/commands.def +++ b/src/commands.def @@ -4481,7 +4481,7 @@ struct COMMAND_ARG PSUBSCRIBE_Args[] = { /* PUBLISH argument table */ struct COMMAND_ARG PUBLISH_Args[] = { {MAKE_ARG("channel",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, -{MAKE_ARG("message",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("message",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)}, }; /********** PUBSUB CHANNELS ********************/ @@ -4678,7 +4678,7 @@ keySpec SPUBLISH_Keyspecs[1] = { /* SPUBLISH argument table */ struct COMMAND_ARG SPUBLISH_Args[] = { {MAKE_ARG("shardchannel",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, -{MAKE_ARG("message",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("message",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)}, }; /********** SSUBSCRIBE ********************/ @@ -10777,10 +10777,10 @@ struct COMMAND_STRUCT serverCommandTable[] = { {MAKE_CMD("rpushx","Appends an element to a list only when the list exists.","O(1) for each element added, so O(N) to add N elements when the command is called with multiple arguments.","2.2.0",CMD_DOC_NONE,NULL,NULL,"list",COMMAND_GROUP_LIST,RPUSHX_History,1,RPUSHX_Tips,0,rpushxCommand,-3,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_LIST,RPUSHX_Keyspecs,1,NULL,2),.args=RPUSHX_Args}, /* pubsub */ {MAKE_CMD("psubscribe","Listens for messages published to channels that match one or more patterns.","O(N) where N is the number of patterns to subscribe to.","2.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,PSUBSCRIBE_History,0,PSUBSCRIBE_Tips,0,psubscribeCommand,-2,CMD_PUBSUB|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,PSUBSCRIBE_Keyspecs,0,NULL,1),.args=PSUBSCRIBE_Args}, -{MAKE_CMD("publish","Posts a message to a channel.","O(N+M) where N is the number of clients subscribed to the receiving channel and M is the total number of subscribed patterns (by any client).","2.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,PUBLISH_History,0,PUBLISH_Tips,0,publishCommand,3,CMD_PUBSUB|CMD_LOADING|CMD_STALE|CMD_FAST|CMD_MAY_REPLICATE|CMD_SENTINEL,0,PUBLISH_Keyspecs,0,NULL,2),.args=PUBLISH_Args}, +{MAKE_CMD("publish","Posts a message to a channel.","O(N+M) where N is the number of clients subscribed to the receiving channel and M is the total number of subscribed patterns (by any client).","2.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,PUBLISH_History,0,PUBLISH_Tips,0,publishCommand,-3,CMD_PUBSUB|CMD_LOADING|CMD_STALE|CMD_FAST|CMD_MAY_REPLICATE|CMD_SENTINEL,0,PUBLISH_Keyspecs,0,NULL,2),.args=PUBLISH_Args}, {MAKE_CMD("pubsub","A container for Pub/Sub commands.","Depends on subcommand.","2.8.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,PUBSUB_History,0,PUBSUB_Tips,0,NULL,-2,0,0,PUBSUB_Keyspecs,0,NULL,0),.subcommands=PUBSUB_Subcommands}, {MAKE_CMD("punsubscribe","Stops listening to messages published to channels that match one or more patterns.","O(N) where N is the number of patterns to unsubscribe.","2.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,PUNSUBSCRIBE_History,0,PUNSUBSCRIBE_Tips,0,punsubscribeCommand,-1,CMD_PUBSUB|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,PUNSUBSCRIBE_Keyspecs,0,NULL,1),.args=PUNSUBSCRIBE_Args}, -{MAKE_CMD("spublish","Post a message to a shard channel","O(N) where N is the number of clients subscribed to the receiving shard channel.","7.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,SPUBLISH_History,0,SPUBLISH_Tips,0,spublishCommand,3,CMD_PUBSUB|CMD_LOADING|CMD_STALE|CMD_FAST|CMD_MAY_REPLICATE,0,SPUBLISH_Keyspecs,1,NULL,2),.args=SPUBLISH_Args}, +{MAKE_CMD("spublish","Post a message to a shard channel","O(N) where N is the number of clients subscribed to the receiving shard channel.","7.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,SPUBLISH_History,0,SPUBLISH_Tips,0,spublishCommand,-3,CMD_PUBSUB|CMD_LOADING|CMD_STALE|CMD_FAST|CMD_MAY_REPLICATE,0,SPUBLISH_Keyspecs,1,NULL,2),.args=SPUBLISH_Args}, {MAKE_CMD("ssubscribe","Listens for messages published to shard channels.","O(N) where N is the number of shard channels to subscribe to.","7.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,SSUBSCRIBE_History,0,SSUBSCRIBE_Tips,0,ssubscribeCommand,-2,CMD_PUBSUB|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,0,SSUBSCRIBE_Keyspecs,1,NULL,1),.args=SSUBSCRIBE_Args}, {MAKE_CMD("subscribe","Listens for messages published to channels.","O(N) where N is the number of channels to subscribe to.","2.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,SUBSCRIBE_History,0,SUBSCRIBE_Tips,0,subscribeCommand,-2,CMD_PUBSUB|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,SUBSCRIBE_Keyspecs,0,NULL,1),.args=SUBSCRIBE_Args}, {MAKE_CMD("sunsubscribe","Stops listening to messages posted to shard channels.","O(N) where N is the number of shard channels to unsubscribe.","7.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,SUNSUBSCRIBE_History,0,SUNSUBSCRIBE_Tips,0,sunsubscribeCommand,-1,CMD_PUBSUB|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,0,SUNSUBSCRIBE_Keyspecs,1,NULL,1),.args=SUNSUBSCRIBE_Args}, diff --git a/src/pubsub.c b/src/pubsub.c index a7e3e283ed..7fa2747a0f 100644 --- a/src/pubsub.c +++ b/src/pubsub.c @@ -532,6 +532,15 @@ int pubsubPublishMessage(robj *channel, robj *message, int sharded) { return pubsubPublishMessageInternal(channel, message, sharded ? pubSubShardType : pubSubType); } +/* Publish messages to all the subscribers. */ +int pubsubPublishMessages(robj *channel, robj **messages, int count, int sharded) { + int total_receivers = 0; + for (int i = 0; i < count; i++) { + total_receivers += pubsubPublishMessage(channel, messages[i], sharded); + } + return total_receivers; +} + /*----------------------------------------------------------------------------- * Pubsub commands implementation *----------------------------------------------------------------------------*/ @@ -603,12 +612,16 @@ void punsubscribeCommand(client *c) { /* This function wraps pubsubPublishMessage and also propagates the message to cluster. * Used by the commands PUBLISH/SPUBLISH and their respective module APIs.*/ -int pubsubPublishMessageAndPropagateToCluster(robj *channel, robj *message, int sharded) { - int receivers = pubsubPublishMessage(channel, message, sharded); - if (server.cluster_enabled) clusterPropagatePublish(channel, message, sharded); +int pubsubPublishMessagesAndPropagateToCluster(robj *channel, robj **messages, int count, int sharded) { + int receivers = pubsubPublishMessages(channel, messages, count, sharded); + if (server.cluster_enabled) clusterPropagatePublish(channel, messages, count, sharded); return receivers; } +int pubsubPublishMessageAndPropagateToCluster(robj *channel, robj *message, int sharded) { + return pubsubPublishMessagesAndPropagateToCluster(channel, &message, 1, sharded); +} + /* PUBLISH */ void publishCommand(client *c) { if (server.sentinel_mode) { @@ -616,7 +629,7 @@ void publishCommand(client *c) { return; } - int receivers = pubsubPublishMessageAndPropagateToCluster(c->argv[1], c->argv[2], 0); + int receivers = pubsubPublishMessagesAndPropagateToCluster(c->argv[1], &c->argv[2],c->argc-2, 0); if (!server.cluster_enabled) forceCommandPropagation(c, PROPAGATE_REPL); addReplyLongLong(c, receivers); } @@ -702,7 +715,7 @@ void channelList(client *c, sds pat, kvstore *pubsub_channels) { /* SPUBLISH */ void spublishCommand(client *c) { - int receivers = pubsubPublishMessageAndPropagateToCluster(c->argv[1], c->argv[2], 1); + int receivers = pubsubPublishMessagesAndPropagateToCluster(c->argv[1], &c->argv[2], c->argc-2, 1); if (!server.cluster_enabled) forceCommandPropagation(c, PROPAGATE_REPL); addReplyLongLong(c, receivers); } diff --git a/src/server.h b/src/server.h index ae2d23b99f..bfb725e25f 100644 --- a/src/server.h +++ b/src/server.h @@ -3242,7 +3242,9 @@ int pubsubUnsubscribeShardAllChannels(client *c, int notify); void pubsubShardUnsubscribeAllChannelsInSlot(unsigned int slot); int pubsubUnsubscribeAllPatterns(client *c, int notify); int pubsubPublishMessage(robj *channel, robj *message, int sharded); +int pubsubPublishMessages(robj *channel, robj **message, int count, int sharded); int pubsubPublishMessageAndPropagateToCluster(robj *channel, robj *message, int sharded); +int pubsubPublishMessagesAndPropagateToCluster(robj *channel, robj **message, int count, int sharded); void addReplyPubsubMessage(client *c, robj *channel, robj *msg, robj *message_bulk); int serverPubsubSubscriptionCount(void); int serverPubsubShardSubscriptionCount(void); From d36be204a5567784d9cc2e5fc68dde3b03d39240 Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Mon, 1 Jul 2024 01:12:59 +0000 Subject: [PATCH 04/45] Fix mac-os-build Signed-off-by: Roshan Khatri --- src/cluster_legacy.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 9a1c29cd52..a7cb74ee2c 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -2895,19 +2895,16 @@ int clusterIsValidPacket(clusterLink *link) { explen += sizeof(clusterMsgDataPublish) - 8 + ntohl(hdr->data.publish.msg.channel_len) + ntohl(hdr->data.publish.msg.message_len); } else if (type == CLUSTERMSG_TYPE_PUBLISH_LIGHT || type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT) { - clusterNode *sender = getNodeFromLinkAndMsg(link, hdr); - if (sender && nodeSupportsLightMsgHdr(sender)) { - clusterMsgLight *hdr_pubsub = (clusterMsgLight *)link->rcvbuf; - explen = sizeof(clusterMsgLight) - sizeof(union clusterMsgDataLight); - explen += sizeof(clusterMsgDataPublishMulti); - uint64_t data_count = ntohu64(hdr_pubsub->data.publish.msg.data_count); - explen += ((data_count) * (sizeof(clusterMsgDataPublishMessage) - 8)); - clusterMsgDataPublishMessage *msg_data = getInitialBulkData(&hdr_pubsub->data.publish.msg); - while(data_count--){ - uint32_t msglen = getPublishMsgLength(msg_data); - explen += msglen; - msg_data = getNextCursorBulkData(msg_data); - } + clusterMsgLight *hdr_pubsub = (clusterMsgLight *)link->rcvbuf; + explen = sizeof(clusterMsgLight) - sizeof(union clusterMsgDataLight); + explen += sizeof(clusterMsgDataPublishMulti); + uint64_t data_count = ntohu64(hdr_pubsub->data.publish.msg.data_count); + explen += ((data_count) * (sizeof(clusterMsgDataPublishMessage) - 8)); + clusterMsgDataPublishMessage *msg_data = getInitialBulkData(&hdr_pubsub->data.publish.msg); + while(data_count--){ + uint32_t msglen = getPublishMsgLength(msg_data); + explen += msglen; + msg_data = getNextCursorBulkData(msg_data); } } else if (type == CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST || type == CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK || type == CLUSTERMSG_TYPE_MFSTART) { From 1ebd796b2bd1c090e8fb6677c954b02ad40c8931 Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Mon, 1 Jul 2024 01:27:47 +0000 Subject: [PATCH 05/45] Fix format Signed-off-by: Roshan Khatri --- src/cluster_legacy.c | 64 +++++++++++++++++++++++++------------------- src/cluster_legacy.h | 18 ++++++------- src/pubsub.c | 4 +-- 3 files changed, 47 insertions(+), 39 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index a7cb74ee2c..921cc39ab7 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -2784,7 +2784,8 @@ static clusterMsgDataPublishMessage *getInitialBulkData(clusterMsgDataPublishMul } static clusterMsgDataPublishMessage *getNextCursorBulkData(clusterMsgDataPublishMessage *message_cursor) { - clusterMsgDataPublishMessage *next = (clusterMsgDataPublishMessage *)((char *)message_cursor->message_data + ntohl(message_cursor->message_len)); + clusterMsgDataPublishMessage *next = + (clusterMsgDataPublishMessage *)((char *)message_cursor->message_data + ntohl(message_cursor->message_len)); return next; } @@ -2813,17 +2814,14 @@ int pubsubProcessLightPacket(clusterLink *link, uint16_t type) { uint64_t data_count; /* Don't bother creating useless objects if there are no - * Pub/Sub subscribers. */ + * Pub/Sub subscribers. */ if ((type == CLUSTERMSG_TYPE_PUBLISH_LIGHT && serverPubsubSubscriptionCount() > 0) || (type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT && serverPubsubShardSubscriptionCount() > 0)) { data_count = ntohu64(hdr->data.publish.msg.data_count); clusterMsgDataPublishMessage *cursor = getInitialBulkData(&hdr->data.publish.msg); channel = readBulkDataFromCursor(cursor); - while(--data_count){ + while (--data_count) { cursor = getNextCursorBulkData(cursor); - if (type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT) { - serverLog(LL_NOTICE, "type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT"); - } pubsubPublishMessage(channel, readBulkDataFromCursor(cursor), type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT); } decrRefCount(channel); @@ -2901,7 +2899,7 @@ int clusterIsValidPacket(clusterLink *link) { uint64_t data_count = ntohu64(hdr_pubsub->data.publish.msg.data_count); explen += ((data_count) * (sizeof(clusterMsgDataPublishMessage) - 8)); clusterMsgDataPublishMessage *msg_data = getInitialBulkData(&hdr_pubsub->data.publish.msg); - while(data_count--){ + while (data_count--) { uint32_t msglen = getPublishMsgLength(msg_data); explen += msglen; msg_data = getNextCursorBulkData(msg_data); @@ -2952,7 +2950,8 @@ int clusterProcessPacket(clusterLink *link) { * because of Pub/Sub. */ if (sender) sender->data_received = now; - if (sender && (type == CLUSTERMSG_TYPE_PUBLISH_LIGHT || type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT) && nodeSupportsLightMsgHdr(sender)) { + if (sender && (type == CLUSTERMSG_TYPE_PUBLISH_LIGHT || type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT) && + nodeSupportsLightMsgHdr(sender)) { return pubsubProcessLightPacket(link, type); } @@ -3514,9 +3513,10 @@ void clusterReadHandler(connection *conn) { /* Perform some sanity check on the message signature * and length. */ if (memcmp(hdr->sig, "RCmb", 4) != 0 || ntohl(hdr->totlen) < CLUSTERMSG_MIN_LEN) { - /* The minimum length for PUBLISH and PUBLISHSHARD will be clusterMsgLight_MIN_LEN + /* The minimum length for PUBLISH and PUBLISHSHARD will be clusterMsgLight_MIN_LEN * as we are using the `clusterMsgLight` hdr. */ - if ((type == CLUSTERMSG_TYPE_PUBLISH_LIGHT || type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT) && ((ntohl(hdr->totlen) >= CLUSTERMSG_LIGHT_MIN_LEN))) { + if ((type == CLUSTERMSG_TYPE_PUBLISH_LIGHT || type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT) && + ((ntohl(hdr->totlen) >= CLUSTERMSG_LIGHT_MIN_LEN))) { is_msg_valid = 1; } if (!is_msg_valid) { @@ -3524,12 +3524,12 @@ void clusterReadHandler(connection *conn) { int port; if (connAddrPeerName(conn, ip, sizeof(ip), &port) == -1) { serverLog(LL_WARNING, "Bad message length or signature received " - "on the Cluster bus."); + "on the Cluster bus."); } else { serverLog(LL_WARNING, - "Bad message length or signature received " - "on the Cluster bus from %s:%d", - ip, port); + "Bad message length or signature received " + "on the Cluster bus from %s:%d", + ip, port); } handleLinkIOError(link); return; @@ -3655,8 +3655,10 @@ void clusterBroadcastPublishMessage(clusterMsgSendBlock *msgblock) { clusterNode *node = dictGetVal(de); if (node->flags & (CLUSTER_NODE_MYSELF | CLUSTER_NODE_HANDSHAKE)) continue; - if (nodeSupportsLightMsgHdr(node)) continue; - else clusterSendMessage(node->link, msgblock); + if (nodeSupportsLightMsgHdr(node)) + continue; + else + clusterSendMessage(node->link, msgblock); } dictReleaseIterator(di); } @@ -3670,8 +3672,10 @@ void clusterBroadcastPublishLightMessage(clusterMsgSendBlockLight *msgblock_ligh clusterNode *node = dictGetVal(de); if (node->flags & (CLUSTER_NODE_MYSELF | CLUSTER_NODE_HANDSHAKE)) continue; - if (nodeSupportsLightMsgHdr(node)) clusterSendPublishMessage(node->link, msgblock_light); - else continue; + if (nodeSupportsLightMsgHdr(node)) + clusterSendPublishMessage(node->link, msgblock_light); + else + continue; } dictReleaseIterator(di); } @@ -3739,7 +3743,6 @@ static void clusterBuildMessageHdr(clusterMsg *hdr, int type, size_t msglen) { } static void clusterBuildPubsubMessageHdr(clusterMsgLight *hdr, int type, size_t msglen) { - hdr->ver = htons(CLUSTER_PROTO_VER); hdr->sig[0] = 'R'; hdr->sig[1] = 'C'; @@ -3993,7 +3996,7 @@ clusterMsgSendBlockLight *clusterCreatePublishLightMsgBlock(robj *channel, robj channel = getDecodedObject(channel); channel_len = sdslen(channel->ptr); - uint32_t aggregated_msg_len = 0 ; + uint32_t aggregated_msg_len = 0; aggregated_msg_len += channel_len; for (i = 0; i < count; i++) { messages[i] = getDecodedObject(messages[i]); @@ -4001,14 +4004,14 @@ clusterMsgSendBlockLight *clusterCreatePublishLightMsgBlock(robj *channel, robj aggregated_msg_len += message_len; } - size_t msglen = sizeof(clusterMsgLight) - sizeof(union clusterMsgDataLight) ; + size_t msglen = sizeof(clusterMsgLight) - sizeof(union clusterMsgDataLight); msglen += sizeof(clusterMsgDataPublishMulti); - msglen += ((count+1) * (sizeof(clusterMsgDataPublishMessage) - 8)); + msglen += ((count + 1) * (sizeof(clusterMsgDataPublishMessage) - 8)); msglen += aggregated_msg_len; clusterMsgSendBlockLight *msgblock = createPubsubClusterMsgSendBlock(type, msglen); clusterMsgLight *hdr = &msgblock->msg; - hdr->data.publish.msg.data_count = htonu64(count+1); + hdr->data.publish.msg.data_count = htonu64(count + 1); clusterMsgDataPublishMessage *cursor = getInitialBulkData(&hdr->data.publish.msg); writeDataToCursor(cursor, channel); for (i = 0; i < count; i++) { @@ -4118,7 +4121,8 @@ void clusterPropagatePublish(robj *channel, robj **message, int count, int shard clusterMsgSendBlockLight *msgblock_light; clusterMsgSendBlock *msgblock; int i; - msgblock_light = clusterCreatePublishLightMsgBlock(channel, message, count, sharded ? CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT : CLUSTERMSG_TYPE_PUBLISH_LIGHT); + msgblock_light = clusterCreatePublishLightMsgBlock( + channel, message, count, sharded ? CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT : CLUSTERMSG_TYPE_PUBLISH_LIGHT); if (!sharded) { clusterBroadcastPublishLightMessage(msgblock_light); clusterMsgSendBlockDecrRefCountPublish(msgblock_light); @@ -4139,8 +4143,10 @@ void clusterPropagatePublish(robj *channel, robj **message, int count, int shard while ((ln = listNext(&li))) { node = listNodeValue(ln); if (node->flags & (CLUSTER_NODE_MYSELF | CLUSTER_NODE_HANDSHAKE)) continue; - if (nodeSupportsLightMsgHdr(node)) clusterSendPublishMessage(node->link, msgblock_light); - else continue; + if (nodeSupportsLightMsgHdr(node)) + clusterSendPublishMessage(node->link, msgblock_light); + else + continue; } clusterMsgSendBlockDecrRefCountPublish(msgblock_light); @@ -4150,8 +4156,10 @@ void clusterPropagatePublish(robj *channel, robj **message, int count, int shard while ((ln = listNext(&li))) { node = listNodeValue(ln); if (node->flags & (CLUSTER_NODE_MYSELF | CLUSTER_NODE_HANDSHAKE)) continue; - if (nodeSupportsLightMsgHdr(node)) continue; - else clusterSendMessage(node->link, msgblock); + if (nodeSupportsLightMsgHdr(node)) + continue; + else + clusterSendMessage(node->link, msgblock); } clusterMsgSendBlockDecrRefCount(msgblock); } diff --git a/src/cluster_legacy.h b/src/cluster_legacy.h index ea3347ba38..66102b296d 100644 --- a/src/cluster_legacy.h +++ b/src/cluster_legacy.h @@ -299,19 +299,19 @@ static_assert(offsetof(clusterMsg, data) == 2256, "unexpected field offset"); #define CLUSTERMSG_FLAG0_EXT_DATA (1 << 2) /* Message contains extension data */ typedef struct { - char sig[4]; /* Signature "RCmb" (Cluster message bus). */ - uint32_t totlen; /* Total length of this message */ - uint16_t ver; /* Protocol version, currently set to CLUSTER_PROTO_VER. */ - uint16_t port; /* Primary port number (TCP or TLS). */ - uint16_t type; /* Message type */ + char sig[4]; /* Signature "RCmb" (Cluster message bus). */ + uint32_t totlen; /* Total length of this message */ + uint16_t ver; /* Protocol version, currently set to CLUSTER_PROTO_VER. */ + uint16_t port; /* Primary port number (TCP or TLS). */ + uint16_t type; /* Message type */ union clusterMsgDataLight data; } clusterMsgLight; static_assert(offsetof(clusterMsgLight, sig) == offsetof(clusterMsg, sig), "unexpected field offset"); -static_assert(offsetof(clusterMsgLight, totlen) == offsetof(clusterMsg, totlen),"unexpected field offset"); -static_assert(offsetof(clusterMsgLight, ver) == offsetof(clusterMsg, ver) , "unexpected field offset"); -static_assert(offsetof(clusterMsgLight, port) == offsetof(clusterMsg, port) , "unexpected field offset"); -static_assert(offsetof(clusterMsgLight, type) == offsetof(clusterMsg, type) , "unexpected field offset"); +static_assert(offsetof(clusterMsgLight, totlen) == offsetof(clusterMsg, totlen), "unexpected field offset"); +static_assert(offsetof(clusterMsgLight, ver) == offsetof(clusterMsg, ver), "unexpected field offset"); +static_assert(offsetof(clusterMsgLight, port) == offsetof(clusterMsg, port), "unexpected field offset"); +static_assert(offsetof(clusterMsgLight, type) == offsetof(clusterMsg, type), "unexpected field offset"); static_assert(offsetof(clusterMsgLight, data) == 16, "unexpected field offset"); #define CLUSTERMSG_LIGHT_MIN_LEN (sizeof(clusterMsgLight) - sizeof(union clusterMsgDataLight)) diff --git a/src/pubsub.c b/src/pubsub.c index 7fa2747a0f..7507df2a27 100644 --- a/src/pubsub.c +++ b/src/pubsub.c @@ -629,7 +629,7 @@ void publishCommand(client *c) { return; } - int receivers = pubsubPublishMessagesAndPropagateToCluster(c->argv[1], &c->argv[2],c->argc-2, 0); + int receivers = pubsubPublishMessagesAndPropagateToCluster(c->argv[1], &c->argv[2], c->argc - 2, 0); if (!server.cluster_enabled) forceCommandPropagation(c, PROPAGATE_REPL); addReplyLongLong(c, receivers); } @@ -715,7 +715,7 @@ void channelList(client *c, sds pat, kvstore *pubsub_channels) { /* SPUBLISH */ void spublishCommand(client *c) { - int receivers = pubsubPublishMessagesAndPropagateToCluster(c->argv[1], &c->argv[2], c->argc-2, 1); + int receivers = pubsubPublishMessagesAndPropagateToCluster(c->argv[1], &c->argv[2], c->argc - 2, 1); if (!server.cluster_enabled) forceCommandPropagation(c, PROPAGATE_REPL); addReplyLongLong(c, receivers); } From 5886313a72afe828edb43040ad7eeaa5fbe59357 Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Mon, 1 Jul 2024 01:59:03 +0000 Subject: [PATCH 06/45] update commands' json files Signed-off-by: Roshan Khatri --- src/commands/publish.json | 5 +++-- src/commands/spublish.json | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/commands/publish.json b/src/commands/publish.json index 0dade72064..c7f10c1406 100644 --- a/src/commands/publish.json +++ b/src/commands/publish.json @@ -4,7 +4,7 @@ "complexity": "O(N+M) where N is the number of clients subscribed to the receiving channel and M is the total number of subscribed patterns (by any client).", "group": "pubsub", "since": "2.0.0", - "arity": 3, + "arity": -3, "function": "publishCommand", "command_flags": [ "PUBSUB", @@ -21,7 +21,8 @@ }, { "name": "message", - "type": "string" + "type": "string", + "multiple": true } ], "reply_schema": { diff --git a/src/commands/spublish.json b/src/commands/spublish.json index da1360de7c..33b14d58fc 100644 --- a/src/commands/spublish.json +++ b/src/commands/spublish.json @@ -4,7 +4,7 @@ "complexity": "O(N) where N is the number of clients subscribed to the receiving shard channel.", "group": "pubsub", "since": "7.0.0", - "arity": 3, + "arity": -3, "function": "spublishCommand", "command_flags": [ "PUBSUB", @@ -20,7 +20,8 @@ }, { "name": "message", - "type": "string" + "type": "string", + "multiple": true } ], "key_specs": [ From 9e76b6857bb98d3244f799bd42c449f563a18370 Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Tue, 2 Jul 2024 02:31:25 +0000 Subject: [PATCH 07/45] Code refactor and fix memory leak Signed-off-by: Roshan Khatri --- src/cluster_legacy.c | 103 ++++++++++++++++--------------------------- src/cluster_legacy.h | 6 +-- 2 files changed, 42 insertions(+), 67 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 921cc39ab7..e9525862f0 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -111,7 +111,6 @@ int auxTlsPortSetter(clusterNode *n, void *value, int length); sds auxTlsPortGetter(clusterNode *n, sds s); int auxTlsPortPresent(clusterNode *n); static void clusterBuildMessageHdr(clusterMsg *hdr, int type, size_t msglen); -static void clusterBuildPubsubMessageHdr(clusterMsgLight *hdr, int type, size_t msglen); void freeClusterLink(clusterLink *link); int verifyClusterNodeId(const char *name, int length); sds clusterEncodeOpenSlotsAuxField(int rdbflags); @@ -1145,24 +1144,19 @@ void clusterReset(int hard) { /* ----------------------------------------------------------------------------- * CLUSTER communication link * -------------------------------------------------------------------------- */ -static clusterMsgSendBlock *createClusterMsgSendBlock(int type, uint32_t msglen) { - uint32_t blocklen = msglen + sizeof(clusterMsgSendBlock) - sizeof(clusterMsg); +static void *createClusterMsgSendBlock(int type, uint32_t msglen) { + uint32_t blocklen; + if (type == CLUSTERMSG_TYPE_PUBLISH_LIGHT || type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT) { + blocklen = msglen + sizeof(clusterMsgSendBlockLight) - sizeof(clusterMsgLight); + } else { + blocklen = msglen + sizeof(clusterMsgSendBlock) - sizeof(clusterMsg); + } clusterMsgSendBlock *msgblock = zcalloc(blocklen); msgblock->refcount = 1; msgblock->totlen = blocklen; server.stat_cluster_links_memory += blocklen; clusterBuildMessageHdr(&msgblock->msg, type, msglen); - return msgblock; -} - -static clusterMsgSendBlockLight *createPubsubClusterMsgSendBlock(int type, uint32_t msglen) { - uint32_t blocklen = msglen + sizeof(clusterMsgSendBlockLight) - sizeof(clusterMsgLight); - clusterMsgSendBlockLight *msgblock = zcalloc(blocklen); - msgblock->refcount = 1; - msgblock->totlen = blocklen; - server.stat_cluster_links_memory += blocklen; - clusterBuildPubsubMessageHdr(&msgblock->msg, type, msglen); - return msgblock; + return (void *)msgblock; } static void clusterMsgSendBlockDecrRefCount(void *node) { @@ -1175,16 +1169,6 @@ static void clusterMsgSendBlockDecrRefCount(void *node) { } } -static void clusterMsgSendBlockDecrRefCountPublish(void *light_block) { - clusterMsgSendBlockLight *msgblock = (clusterMsgSendBlockLight *)light_block; - msgblock->refcount--; - serverAssert(msgblock->refcount >= 0); - if (msgblock->refcount == 0) { - server.stat_cluster_links_memory -= msgblock->totlen; - zfree(msgblock); - } -} - clusterLink *createClusterLink(clusterNode *node) { clusterLink *link = zmalloc(sizeof(*link)); link->ctime = mstime(); @@ -2810,7 +2794,7 @@ static uint32_t getPublishMsgLength(clusterMsgDataPublishMessage *cursor) { int pubsubProcessLightPacket(clusterLink *link, uint16_t type) { clusterMsgLight *hdr = (clusterMsgLight *)link->rcvbuf; - robj *channel; + robj *channel, *message; uint64_t data_count; /* Don't bother creating useless objects if there are no @@ -2822,7 +2806,9 @@ int pubsubProcessLightPacket(clusterLink *link, uint16_t type) { channel = readBulkDataFromCursor(cursor); while (--data_count) { cursor = getNextCursorBulkData(cursor); - pubsubPublishMessage(channel, readBulkDataFromCursor(cursor), type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT); + message = readBulkDataFromCursor(cursor); + pubsubPublishMessage(channel, message, type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT); + decrRefCount(message); } decrRefCount(channel); } @@ -3682,6 +3668,20 @@ void clusterBroadcastPublishLightMessage(clusterMsgSendBlockLight *msgblock_ligh /* Build the message header. hdr must point to a buffer at least * sizeof(clusterMsg) in bytes. */ static void clusterBuildMessageHdr(clusterMsg *hdr, int type, size_t msglen) { + hdr->ver = htons(CLUSTER_PROTO_VER); + hdr->sig[0] = 'R'; + hdr->sig[1] = 'C'; + hdr->sig[2] = 'm'; + hdr->sig[3] = 'b'; + hdr->type = htons(type); + hdr->totlen = htonl(msglen); + + if (type == CLUSTERMSG_TYPE_PUBLISH_LIGHT || type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT) { + clusterMsgLight *hdr_light = (clusterMsgLight *)(void *)hdr; + hdr_light->notused1 = 0; + return; + } + uint64_t offset; clusterNode *primary; @@ -3691,12 +3691,6 @@ static void clusterBuildMessageHdr(clusterMsg *hdr, int type, size_t msglen) { * in charge for this slots. */ primary = (nodeIsReplica(myself) && myself->replicaof) ? myself->replicaof : myself; - hdr->ver = htons(CLUSTER_PROTO_VER); - hdr->sig[0] = 'R'; - hdr->sig[1] = 'C'; - hdr->sig[2] = 'm'; - hdr->sig[3] = 'b'; - hdr->type = htons(type); memcpy(hdr->sender, myself->name, CLUSTER_NAMELEN); /* If cluster-announce-ip option is enabled, force the receivers of our @@ -3738,27 +3732,6 @@ static void clusterBuildMessageHdr(clusterMsg *hdr, int type, size_t msglen) { /* Set the message flags. */ if (clusterNodeIsPrimary(myself) && server.cluster->mf_end) hdr->mflags[0] |= CLUSTERMSG_FLAG0_PAUSED; - - hdr->totlen = htonl(msglen); -} - -static void clusterBuildPubsubMessageHdr(clusterMsgLight *hdr, int type, size_t msglen) { - hdr->ver = htons(CLUSTER_PROTO_VER); - hdr->sig[0] = 'R'; - hdr->sig[1] = 'C'; - hdr->sig[2] = 'm'; - hdr->sig[3] = 'b'; - hdr->type = htons(type); - - /* Handle cluster-announce-[tls-|bus-]port. */ - int announced_tcp_port, announced_tls_port, announced_cport; - deriveAnnouncedPorts(&announced_tcp_port, &announced_tls_port, &announced_cport); - if (server.tls_cluster) { - hdr->port = htons(announced_tls_port); - } else { - hdr->port = htons(announced_tcp_port); - } - hdr->totlen = htonl(msglen); } /* Set the i-th entry of the gossip section in the message pointed by 'hdr' @@ -3841,7 +3814,7 @@ void clusterSendPing(clusterLink *link, int type) { /* Note: clusterBuildMessageHdr() expects the buffer to be always at least * sizeof(clusterMsg) or more. */ if (estlen < (int)sizeof(clusterMsg)) estlen = sizeof(clusterMsg); - clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(type, estlen); + clusterMsgSendBlock *msgblock = (clusterMsgSendBlock *)createClusterMsgSendBlock(type, estlen); clusterMsg *hdr = &msgblock->msg; if (!link->inbound && type == CLUSTERMSG_TYPE_PING) link->node->ping_sent = mstime(); @@ -3975,7 +3948,7 @@ clusterMsgSendBlock *clusterCreatePublishMsgBlock(robj *channel, robj *message, size_t msglen = sizeof(clusterMsg) - sizeof(union clusterMsgData); msglen += sizeof(clusterMsgDataPublish) - 8 + channel_len + message_len; - clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(type, msglen); + clusterMsgSendBlock *msgblock = (clusterMsgSendBlock *)createClusterMsgSendBlock(type, msglen); clusterMsg *hdr = &msgblock->msg; hdr->data.publish.msg.channel_len = htonl(channel_len); @@ -4008,7 +3981,7 @@ clusterMsgSendBlockLight *clusterCreatePublishLightMsgBlock(robj *channel, robj msglen += sizeof(clusterMsgDataPublishMulti); msglen += ((count + 1) * (sizeof(clusterMsgDataPublishMessage) - 8)); msglen += aggregated_msg_len; - clusterMsgSendBlockLight *msgblock = createPubsubClusterMsgSendBlock(type, msglen); + clusterMsgSendBlockLight *msgblock = (clusterMsgSendBlockLight *)createClusterMsgSendBlock(type, msglen); clusterMsgLight *hdr = &msgblock->msg; hdr->data.publish.msg.data_count = htonu64(count + 1); @@ -4031,7 +4004,7 @@ clusterMsgSendBlockLight *clusterCreatePublishLightMsgBlock(robj *channel, robj * nodes to do the same ASAP. */ void clusterSendFail(char *nodename) { uint32_t msglen = sizeof(clusterMsg) - sizeof(union clusterMsgData) + sizeof(clusterMsgDataFail); - clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(CLUSTERMSG_TYPE_FAIL, msglen); + clusterMsgSendBlock *msgblock = (clusterMsgSendBlock *)createClusterMsgSendBlock(CLUSTERMSG_TYPE_FAIL, msglen); clusterMsg *hdr = &msgblock->msg; memcpy(hdr->data.fail.about.nodename, nodename, CLUSTER_NAMELEN); @@ -4047,7 +4020,7 @@ void clusterSendUpdate(clusterLink *link, clusterNode *node) { if (link == NULL) return; uint32_t msglen = sizeof(clusterMsg) - sizeof(union clusterMsgData) + sizeof(clusterMsgDataUpdate); - clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(CLUSTERMSG_TYPE_UPDATE, msglen); + clusterMsgSendBlock *msgblock = (clusterMsgSendBlock *)createClusterMsgSendBlock(CLUSTERMSG_TYPE_UPDATE, msglen); clusterMsg *hdr = &msgblock->msg; memcpy(hdr->data.update.nodecfg.nodename, node->name, CLUSTER_NAMELEN); @@ -4069,7 +4042,7 @@ void clusterSendUpdate(clusterLink *link, clusterNode *node) { void clusterSendModule(clusterLink *link, uint64_t module_id, uint8_t type, const char *payload, uint32_t len) { uint32_t msglen = sizeof(clusterMsg) - sizeof(union clusterMsgData); msglen += sizeof(clusterMsgModule) - 3 + len; - clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(CLUSTERMSG_TYPE_MODULE, msglen); + clusterMsgSendBlock *msgblock = (clusterMsgSendBlock *)createClusterMsgSendBlock(CLUSTERMSG_TYPE_MODULE, msglen); clusterMsg *hdr = &msgblock->msg; hdr->data.module.msg.module_id = module_id; /* Already endian adjusted. */ @@ -4125,7 +4098,7 @@ void clusterPropagatePublish(robj *channel, robj **message, int count, int shard channel, message, count, sharded ? CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT : CLUSTERMSG_TYPE_PUBLISH_LIGHT); if (!sharded) { clusterBroadcastPublishLightMessage(msgblock_light); - clusterMsgSendBlockDecrRefCountPublish(msgblock_light); + clusterMsgSendBlockDecrRefCount(msgblock_light); for (i = 0; i < count; i++) { msgblock = clusterCreatePublishMsgBlock(channel, message[i], CLUSTERMSG_TYPE_PUBLISH); clusterBroadcastPublishMessage(msgblock); @@ -4148,7 +4121,7 @@ void clusterPropagatePublish(robj *channel, robj **message, int count, int shard else continue; } - clusterMsgSendBlockDecrRefCountPublish(msgblock_light); + clusterMsgSendBlockDecrRefCount(msgblock_light); for (i = 0; i < count; i++) { listRewind(nodes_for_slot, &li); @@ -4177,7 +4150,8 @@ void clusterPropagatePublish(robj *channel, robj **message, int count, int shard * but only the primaries are supposed to reply to our query. */ void clusterRequestFailoverAuth(void) { uint32_t msglen = sizeof(clusterMsg) - sizeof(union clusterMsgData); - clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST, msglen); + clusterMsgSendBlock *msgblock = + (clusterMsgSendBlock *)createClusterMsgSendBlock(CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST, msglen); clusterMsg *hdr = &msgblock->msg; /* If this is a manual failover, set the CLUSTERMSG_FLAG0_FORCEACK bit @@ -4193,7 +4167,8 @@ void clusterSendFailoverAuth(clusterNode *node) { if (!node->link) return; uint32_t msglen = sizeof(clusterMsg) - sizeof(union clusterMsgData); - clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK, msglen); + clusterMsgSendBlock *msgblock = + (clusterMsgSendBlock *)createClusterMsgSendBlock(CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK, msglen); clusterSendMessage(node->link, msgblock); clusterMsgSendBlockDecrRefCount(msgblock); @@ -4204,7 +4179,7 @@ void clusterSendMFStart(clusterNode *node) { if (!node->link) return; uint32_t msglen = sizeof(clusterMsg) - sizeof(union clusterMsgData); - clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(CLUSTERMSG_TYPE_MFSTART, msglen); + clusterMsgSendBlock *msgblock = (clusterMsgSendBlock *)createClusterMsgSendBlock(CLUSTERMSG_TYPE_MFSTART, msglen); clusterSendMessage(node->link, msgblock); clusterMsgSendBlockDecrRefCount(msgblock); diff --git a/src/cluster_legacy.h b/src/cluster_legacy.h index 66102b296d..002c02e6d9 100644 --- a/src/cluster_legacy.h +++ b/src/cluster_legacy.h @@ -302,15 +302,15 @@ typedef struct { char sig[4]; /* Signature "RCmb" (Cluster message bus). */ uint32_t totlen; /* Total length of this message */ uint16_t ver; /* Protocol version, currently set to CLUSTER_PROTO_VER. */ - uint16_t port; /* Primary port number (TCP or TLS). */ - uint16_t type; /* Message type */ + uint16_t notused1; + uint16_t type; /* Message type */ union clusterMsgDataLight data; } clusterMsgLight; static_assert(offsetof(clusterMsgLight, sig) == offsetof(clusterMsg, sig), "unexpected field offset"); static_assert(offsetof(clusterMsgLight, totlen) == offsetof(clusterMsg, totlen), "unexpected field offset"); static_assert(offsetof(clusterMsgLight, ver) == offsetof(clusterMsg, ver), "unexpected field offset"); -static_assert(offsetof(clusterMsgLight, port) == offsetof(clusterMsg, port), "unexpected field offset"); +static_assert(offsetof(clusterMsgLight, notused1) == offsetof(clusterMsg, port), "unexpected field offset"); static_assert(offsetof(clusterMsgLight, type) == offsetof(clusterMsg, type), "unexpected field offset"); static_assert(offsetof(clusterMsgLight, data) == 16, "unexpected field offset"); From e11faba37b4d1010509f411c12bd5ac231e2fd21 Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Tue, 2 Jul 2024 02:31:41 +0000 Subject: [PATCH 08/45] Revert "update commands' json files" This reverts commit 5886313a72afe828edb43040ad7eeaa5fbe59357. Signed-off-by: Roshan Khatri --- src/commands/publish.json | 5 ++--- src/commands/spublish.json | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/commands/publish.json b/src/commands/publish.json index c7f10c1406..0dade72064 100644 --- a/src/commands/publish.json +++ b/src/commands/publish.json @@ -4,7 +4,7 @@ "complexity": "O(N+M) where N is the number of clients subscribed to the receiving channel and M is the total number of subscribed patterns (by any client).", "group": "pubsub", "since": "2.0.0", - "arity": -3, + "arity": 3, "function": "publishCommand", "command_flags": [ "PUBSUB", @@ -21,8 +21,7 @@ }, { "name": "message", - "type": "string", - "multiple": true + "type": "string" } ], "reply_schema": { diff --git a/src/commands/spublish.json b/src/commands/spublish.json index 33b14d58fc..da1360de7c 100644 --- a/src/commands/spublish.json +++ b/src/commands/spublish.json @@ -4,7 +4,7 @@ "complexity": "O(N) where N is the number of clients subscribed to the receiving shard channel.", "group": "pubsub", "since": "7.0.0", - "arity": -3, + "arity": 3, "function": "spublishCommand", "command_flags": [ "PUBSUB", @@ -20,8 +20,7 @@ }, { "name": "message", - "type": "string", - "multiple": true + "type": "string" } ], "key_specs": [ From e756bc99c994038eaeabbfb4ed22162141182ce3 Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Tue, 2 Jul 2024 02:33:53 +0000 Subject: [PATCH 09/45] revert changes in publish command. Signed-off-by: Roshan Khatri --- src/commands.def | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/commands.def b/src/commands.def index 245cfd1712..06cdb4b87e 100644 --- a/src/commands.def +++ b/src/commands.def @@ -4481,7 +4481,7 @@ struct COMMAND_ARG PSUBSCRIBE_Args[] = { /* PUBLISH argument table */ struct COMMAND_ARG PUBLISH_Args[] = { {MAKE_ARG("channel",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, -{MAKE_ARG("message",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)}, +{MAKE_ARG("message",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, }; /********** PUBSUB CHANNELS ********************/ @@ -4678,7 +4678,7 @@ keySpec SPUBLISH_Keyspecs[1] = { /* SPUBLISH argument table */ struct COMMAND_ARG SPUBLISH_Args[] = { {MAKE_ARG("shardchannel",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, -{MAKE_ARG("message",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)}, +{MAKE_ARG("message",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, }; /********** SSUBSCRIBE ********************/ @@ -10777,10 +10777,10 @@ struct COMMAND_STRUCT serverCommandTable[] = { {MAKE_CMD("rpushx","Appends an element to a list only when the list exists.","O(1) for each element added, so O(N) to add N elements when the command is called with multiple arguments.","2.2.0",CMD_DOC_NONE,NULL,NULL,"list",COMMAND_GROUP_LIST,RPUSHX_History,1,RPUSHX_Tips,0,rpushxCommand,-3,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_LIST,RPUSHX_Keyspecs,1,NULL,2),.args=RPUSHX_Args}, /* pubsub */ {MAKE_CMD("psubscribe","Listens for messages published to channels that match one or more patterns.","O(N) where N is the number of patterns to subscribe to.","2.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,PSUBSCRIBE_History,0,PSUBSCRIBE_Tips,0,psubscribeCommand,-2,CMD_PUBSUB|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,PSUBSCRIBE_Keyspecs,0,NULL,1),.args=PSUBSCRIBE_Args}, -{MAKE_CMD("publish","Posts a message to a channel.","O(N+M) where N is the number of clients subscribed to the receiving channel and M is the total number of subscribed patterns (by any client).","2.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,PUBLISH_History,0,PUBLISH_Tips,0,publishCommand,-3,CMD_PUBSUB|CMD_LOADING|CMD_STALE|CMD_FAST|CMD_MAY_REPLICATE|CMD_SENTINEL,0,PUBLISH_Keyspecs,0,NULL,2),.args=PUBLISH_Args}, +{MAKE_CMD("publish","Posts a message to a channel.","O(N+M) where N is the number of clients subscribed to the receiving channel and M is the total number of subscribed patterns (by any client).","2.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,PUBLISH_History,0,PUBLISH_Tips,0,publishCommand,3,CMD_PUBSUB|CMD_LOADING|CMD_STALE|CMD_FAST|CMD_MAY_REPLICATE|CMD_SENTINEL,0,PUBLISH_Keyspecs,0,NULL,2),.args=PUBLISH_Args}, {MAKE_CMD("pubsub","A container for Pub/Sub commands.","Depends on subcommand.","2.8.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,PUBSUB_History,0,PUBSUB_Tips,0,NULL,-2,0,0,PUBSUB_Keyspecs,0,NULL,0),.subcommands=PUBSUB_Subcommands}, {MAKE_CMD("punsubscribe","Stops listening to messages published to channels that match one or more patterns.","O(N) where N is the number of patterns to unsubscribe.","2.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,PUNSUBSCRIBE_History,0,PUNSUBSCRIBE_Tips,0,punsubscribeCommand,-1,CMD_PUBSUB|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,PUNSUBSCRIBE_Keyspecs,0,NULL,1),.args=PUNSUBSCRIBE_Args}, -{MAKE_CMD("spublish","Post a message to a shard channel","O(N) where N is the number of clients subscribed to the receiving shard channel.","7.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,SPUBLISH_History,0,SPUBLISH_Tips,0,spublishCommand,-3,CMD_PUBSUB|CMD_LOADING|CMD_STALE|CMD_FAST|CMD_MAY_REPLICATE,0,SPUBLISH_Keyspecs,1,NULL,2),.args=SPUBLISH_Args}, +{MAKE_CMD("spublish","Post a message to a shard channel","O(N) where N is the number of clients subscribed to the receiving shard channel.","7.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,SPUBLISH_History,0,SPUBLISH_Tips,0,spublishCommand,3,CMD_PUBSUB|CMD_LOADING|CMD_STALE|CMD_FAST|CMD_MAY_REPLICATE,0,SPUBLISH_Keyspecs,1,NULL,2),.args=SPUBLISH_Args}, {MAKE_CMD("ssubscribe","Listens for messages published to shard channels.","O(N) where N is the number of shard channels to subscribe to.","7.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,SSUBSCRIBE_History,0,SSUBSCRIBE_Tips,0,ssubscribeCommand,-2,CMD_PUBSUB|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,0,SSUBSCRIBE_Keyspecs,1,NULL,1),.args=SSUBSCRIBE_Args}, {MAKE_CMD("subscribe","Listens for messages published to channels.","O(N) where N is the number of channels to subscribe to.","2.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,SUBSCRIBE_History,0,SUBSCRIBE_Tips,0,subscribeCommand,-2,CMD_PUBSUB|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,SUBSCRIBE_Keyspecs,0,NULL,1),.args=SUBSCRIBE_Args}, {MAKE_CMD("sunsubscribe","Stops listening to messages posted to shard channels.","O(N) where N is the number of shard channels to unsubscribe.","7.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,SUNSUBSCRIBE_History,0,SUNSUBSCRIBE_Tips,0,sunsubscribeCommand,-1,CMD_PUBSUB|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,0,SUNSUBSCRIBE_Keyspecs,1,NULL,1),.args=SUNSUBSCRIBE_Args}, From 5373938d88a58d4ac4461746109623d1891f5245 Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Tue, 2 Jul 2024 21:21:33 +0000 Subject: [PATCH 10/45] address some feedback Signed-off-by: Roshan Khatri --- src/cluster.h | 2 +- src/cluster_legacy.c | 13 ++++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/cluster.h b/src/cluster.h index 6699f93d7e..a1d617b695 100644 --- a/src/cluster.h +++ b/src/cluster.h @@ -52,7 +52,7 @@ void clusterUpdateMyselfHostname(void); void clusterUpdateMyselfAnnouncedPorts(void); void clusterUpdateMyselfHumanNodename(void); -void clusterPropagatePublish(robj *channel, robj **message, int count, int sharded); +void clusterPropagatePublish(robj *channel, robj **messages, int count, int sharded); unsigned long getClusterConnectionsCount(void); int isClusterHealthy(void); diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index e9525862f0..d490b95c7f 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -139,6 +139,8 @@ static inline int defaultClientPort(void) { #define RCVBUF_MIN_READ_LEN 16 #define RCVBUF_MAX_PREALLOC (1 << 20) /* 1MB */ +#define PLACEHOLDER_LEN 8 + /* Fixed timeout value for cluster operations (milliseconds) */ #define CLUSTER_OPERATION_TIMEOUT 2000 @@ -3979,7 +3981,8 @@ clusterMsgSendBlockLight *clusterCreatePublishLightMsgBlock(robj *channel, robj size_t msglen = sizeof(clusterMsgLight) - sizeof(union clusterMsgDataLight); msglen += sizeof(clusterMsgDataPublishMulti); - msglen += ((count + 1) * (sizeof(clusterMsgDataPublishMessage) - 8)); + msglen += ((count + 1) * + (sizeof(clusterMsgDataPublishMessage) - (sizeof(((clusterMsgDataPublishMessage *)0)->message_data)))); msglen += aggregated_msg_len; clusterMsgSendBlockLight *msgblock = (clusterMsgSendBlockLight *)createClusterMsgSendBlock(type, msglen); @@ -4090,17 +4093,17 @@ int clusterSendModuleMessageToTarget(const char *target, * Otherwise: * Publish this message across the slot (primary/replica). * -------------------------------------------------------------------------- */ -void clusterPropagatePublish(robj *channel, robj **message, int count, int sharded) { +void clusterPropagatePublish(robj *channel, robj **messages, int count, int sharded) { clusterMsgSendBlockLight *msgblock_light; clusterMsgSendBlock *msgblock; int i; msgblock_light = clusterCreatePublishLightMsgBlock( - channel, message, count, sharded ? CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT : CLUSTERMSG_TYPE_PUBLISH_LIGHT); + channel, messages, count, sharded ? CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT : CLUSTERMSG_TYPE_PUBLISH_LIGHT); if (!sharded) { clusterBroadcastPublishLightMessage(msgblock_light); clusterMsgSendBlockDecrRefCount(msgblock_light); for (i = 0; i < count; i++) { - msgblock = clusterCreatePublishMsgBlock(channel, message[i], CLUSTERMSG_TYPE_PUBLISH); + msgblock = clusterCreatePublishMsgBlock(channel, messages[i], CLUSTERMSG_TYPE_PUBLISH); clusterBroadcastPublishMessage(msgblock); clusterMsgSendBlockDecrRefCount(msgblock); } @@ -4125,7 +4128,7 @@ void clusterPropagatePublish(robj *channel, robj **message, int count, int shard for (i = 0; i < count; i++) { listRewind(nodes_for_slot, &li); - msgblock = clusterCreatePublishMsgBlock(channel, message[i], CLUSTERMSG_TYPE_PUBLISHSHARD); + msgblock = clusterCreatePublishMsgBlock(channel, messages[i], CLUSTERMSG_TYPE_PUBLISHSHARD); while ((ln = listNext(&li))) { node = listNodeValue(ln); if (node->flags & (CLUSTER_NODE_MYSELF | CLUSTER_NODE_HANDSHAKE)) continue; From 9e42ada3bd01f67e3f32ec177b209bb7f50ca784 Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Wed, 3 Jul 2024 01:44:55 +0000 Subject: [PATCH 11/45] more changes1 Signed-off-by: Roshan Khatri --- src/cluster_legacy.c | 70 ++++++++++++-------------------------------- 1 file changed, 19 insertions(+), 51 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index d490b95c7f..550f9e8504 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -3595,23 +3595,16 @@ void clusterSendMessage(clusterLink *link, clusterMsgSendBlock *msgblock) { if (type < CLUSTERMSG_TYPE_COUNT) server.cluster->stats_bus_messages_sent[type]++; } -void clusterSendPublishMessage(clusterLink *link, clusterMsgSendBlockLight *msgblock) { - if (!link) { - return; +/* Helper function to send message to node depending on + * the header type supported by the node. */ +void clusterSendPublishMessage(clusterNode *node, int type, clusterMsgSendBlock *msgblock) { + if ((type == CLUSTERMSG_TYPE_PUBLISH || type == CLUSTERMSG_TYPE_PUBLISHSHARD) & !nodeSupportsLightMsgHdr(node)) { + clusterSendMessage(node->link, msgblock); + } else if ((type == CLUSTERMSG_TYPE_PUBLISH_LIGHT || type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT) & + nodeSupportsLightMsgHdr(node)) { + clusterSendMessage(node->link, msgblock); } - if (listLength(link->send_msg_queue) == 0 && msgblock->msg.totlen != 0) - connSetWriteHandlerWithBarrier(link->conn, clusterWriteHandler, 1); - - listAddNodeTail(link->send_msg_queue, msgblock); - msgblock->refcount++; - - /* Update memory tracking */ - link->send_msg_queue_mem += sizeof(listNode) + msgblock->totlen; - server.stat_cluster_links_memory += sizeof(listNode); - - /* Populate sent messages stats. */ - uint16_t type = ntohs(msgblock->msg.type); - if (type < CLUSTERMSG_TYPE_COUNT) server.cluster->stats_bus_messages_sent[type]++; + return; } /* Send a message to all the nodes that are part of the cluster having @@ -3624,49 +3617,24 @@ void clusterBroadcastMessage(clusterMsgSendBlock *msgblock) { dictIterator *di; dictEntry *de; - di = dictGetSafeIterator(server.cluster->nodes); - while ((de = dictNext(di)) != NULL) { - clusterNode *node = dictGetVal(de); - - if (node->flags & (CLUSTER_NODE_MYSELF | CLUSTER_NODE_HANDSHAKE)) continue; - clusterSendMessage(node->link, msgblock); - } - dictReleaseIterator(di); -} - -void clusterBroadcastPublishMessage(clusterMsgSendBlock *msgblock) { - dictIterator *di; - dictEntry *de; + uint16_t type = ntohs(msgblock->msg.type); + int is_msg_type_publish = (type == CLUSTERMSG_TYPE_PUBLISH || type == CLUSTERMSG_TYPE_PUBLISHSHARD || + type == CLUSTERMSG_TYPE_PUBLISH_LIGHT || type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT); di = dictGetSafeIterator(server.cluster->nodes); while ((de = dictNext(di)) != NULL) { clusterNode *node = dictGetVal(de); if (node->flags & (CLUSTER_NODE_MYSELF | CLUSTER_NODE_HANDSHAKE)) continue; - if (nodeSupportsLightMsgHdr(node)) - continue; - else + if (!is_msg_type_publish) { clusterSendMessage(node->link, msgblock); + } else { + clusterSendPublishMessage(node, type, msgblock); + } } dictReleaseIterator(di); } -void clusterBroadcastPublishLightMessage(clusterMsgSendBlockLight *msgblock_light) { - dictIterator *di; - dictEntry *de; - - di = dictGetSafeIterator(server.cluster->nodes); - while ((de = dictNext(di)) != NULL) { - clusterNode *node = dictGetVal(de); - - if (node->flags & (CLUSTER_NODE_MYSELF | CLUSTER_NODE_HANDSHAKE)) continue; - if (nodeSupportsLightMsgHdr(node)) - clusterSendPublishMessage(node->link, msgblock_light); - else - continue; - } - dictReleaseIterator(di); -} /* Build the message header. hdr must point to a buffer at least * sizeof(clusterMsg) in bytes. */ static void clusterBuildMessageHdr(clusterMsg *hdr, int type, size_t msglen) { @@ -4100,11 +4068,11 @@ void clusterPropagatePublish(robj *channel, robj **messages, int count, int shar msgblock_light = clusterCreatePublishLightMsgBlock( channel, messages, count, sharded ? CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT : CLUSTERMSG_TYPE_PUBLISH_LIGHT); if (!sharded) { - clusterBroadcastPublishLightMessage(msgblock_light); + clusterBroadcastMessage((clusterMsgSendBlock *)msgblock_light); clusterMsgSendBlockDecrRefCount(msgblock_light); for (i = 0; i < count; i++) { msgblock = clusterCreatePublishMsgBlock(channel, messages[i], CLUSTERMSG_TYPE_PUBLISH); - clusterBroadcastPublishMessage(msgblock); + clusterBroadcastMessage(msgblock); clusterMsgSendBlockDecrRefCount(msgblock); } return; @@ -4120,7 +4088,7 @@ void clusterPropagatePublish(robj *channel, robj **messages, int count, int shar node = listNodeValue(ln); if (node->flags & (CLUSTER_NODE_MYSELF | CLUSTER_NODE_HANDSHAKE)) continue; if (nodeSupportsLightMsgHdr(node)) - clusterSendPublishMessage(node->link, msgblock_light); + clusterSendMessage(node->link, (clusterMsgSendBlock *)msgblock_light); else continue; } From e634d6aa2babce6e300f6ed07a341d0b077ac97f Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Wed, 3 Jul 2024 02:52:54 +0000 Subject: [PATCH 12/45] minor change Signed-off-by: Roshan Khatri --- src/cluster_legacy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 550f9e8504..6d3b3ed2d3 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -3598,9 +3598,9 @@ void clusterSendMessage(clusterLink *link, clusterMsgSendBlock *msgblock) { /* Helper function to send message to node depending on * the header type supported by the node. */ void clusterSendPublishMessage(clusterNode *node, int type, clusterMsgSendBlock *msgblock) { - if ((type == CLUSTERMSG_TYPE_PUBLISH || type == CLUSTERMSG_TYPE_PUBLISHSHARD) & !nodeSupportsLightMsgHdr(node)) { + if ((type == CLUSTERMSG_TYPE_PUBLISH || type == CLUSTERMSG_TYPE_PUBLISHSHARD) && !nodeSupportsLightMsgHdr(node)) { clusterSendMessage(node->link, msgblock); - } else if ((type == CLUSTERMSG_TYPE_PUBLISH_LIGHT || type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT) & + } else if ((type == CLUSTERMSG_TYPE_PUBLISH_LIGHT || type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT) && nodeSupportsLightMsgHdr(node)) { clusterSendMessage(node->link, msgblock); } From a1ef510c079a565dac6e3bd72c238424d783d29b Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Tue, 9 Jul 2024 21:42:21 +0000 Subject: [PATCH 13/45] addressing feedbacks Signed-off-by: Roshan Khatri --- src/cluster_legacy.c | 121 +++++++++++++++++++------------------------ 1 file changed, 53 insertions(+), 68 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 6d3b3ed2d3..1a30105817 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -312,15 +312,12 @@ int auxTlsPortPresent(clusterNode *n) { typedef struct { size_t totlen; /* Total length of this block including the message */ int refcount; /* Number of cluster link send msg queues containing the message */ - clusterMsg msg; + union { + clusterMsg msg; + clusterMsgLight light_msg; + }; } clusterMsgSendBlock; -typedef struct { - size_t totlen; /* Total length of this block including the message */ - int refcount; /* Number of cluster link send msg queues containing the message */ - clusterMsgLight msg; -} clusterMsgSendBlockLight; - /* ----------------------------------------------------------------------------- * Initialization * -------------------------------------------------------------------------- */ @@ -1147,18 +1144,13 @@ void clusterReset(int hard) { * CLUSTER communication link * -------------------------------------------------------------------------- */ static void *createClusterMsgSendBlock(int type, uint32_t msglen) { - uint32_t blocklen; - if (type == CLUSTERMSG_TYPE_PUBLISH_LIGHT || type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT) { - blocklen = msglen + sizeof(clusterMsgSendBlockLight) - sizeof(clusterMsgLight); - } else { - blocklen = msglen + sizeof(clusterMsgSendBlock) - sizeof(clusterMsg); - } + uint32_t blocklen = msglen + offsetof(clusterMsgSendBlock, msg); clusterMsgSendBlock *msgblock = zcalloc(blocklen); msgblock->refcount = 1; msgblock->totlen = blocklen; server.stat_cluster_links_memory += blocklen; clusterBuildMessageHdr(&msgblock->msg, type, msglen); - return (void *)msgblock; + return msgblock; } static void clusterMsgSendBlockDecrRefCount(void *node) { @@ -2764,25 +2756,25 @@ static clusterNode *getNodeFromLinkAndMsg(clusterLink *link, clusterMsg *hdr) { return sender; } -static clusterMsgDataPublishMessage *getInitialBulkData(clusterMsgDataPublishMulti *msg) { +static clusterMsgDataPublishMessage *getPublishMessageFirst(clusterMsgDataPublishMulti *msg) { clusterMsgDataPublishMessage *initial = (clusterMsgDataPublishMessage *)&(msg->bulk_data); return initial; } -static clusterMsgDataPublishMessage *getNextCursorBulkData(clusterMsgDataPublishMessage *message_cursor) { +static clusterMsgDataPublishMessage *getPublishMessageNext(clusterMsgDataPublishMessage *message_cursor) { clusterMsgDataPublishMessage *next = (clusterMsgDataPublishMessage *)((char *)message_cursor->message_data + ntohl(message_cursor->message_len)); return next; } -void writeDataToCursor(clusterMsgDataPublishMessage *cursor, robj *data) { +void writePublishMessageToCursor(clusterMsgDataPublishMessage *cursor, robj *data) { uint32_t data_len = sdslen(data->ptr); cursor->message_len = htonl(data_len); memcpy(cursor->message_data, data->ptr, data_len); return; } -static robj *readBulkDataFromCursor(clusterMsgDataPublishMessage *cursor) { +static robj *readPublishMessageFromCursor(clusterMsgDataPublishMessage *cursor) { uint32_t data_len; data_len = ntohl(cursor->message_len); robj *data = (createStringObject((char *)cursor->message_data, data_len)); @@ -2804,11 +2796,11 @@ int pubsubProcessLightPacket(clusterLink *link, uint16_t type) { if ((type == CLUSTERMSG_TYPE_PUBLISH_LIGHT && serverPubsubSubscriptionCount() > 0) || (type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT && serverPubsubShardSubscriptionCount() > 0)) { data_count = ntohu64(hdr->data.publish.msg.data_count); - clusterMsgDataPublishMessage *cursor = getInitialBulkData(&hdr->data.publish.msg); - channel = readBulkDataFromCursor(cursor); + clusterMsgDataPublishMessage *cursor = getPublishMessageFirst(&hdr->data.publish.msg); + channel = readPublishMessageFromCursor(cursor); while (--data_count) { - cursor = getNextCursorBulkData(cursor); - message = readBulkDataFromCursor(cursor); + cursor = getPublishMessageNext(cursor); + message = readPublishMessageFromCursor(cursor); pubsubPublishMessage(channel, message, type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT); decrRefCount(message); } @@ -2886,11 +2878,11 @@ int clusterIsValidPacket(clusterLink *link) { explen += sizeof(clusterMsgDataPublishMulti); uint64_t data_count = ntohu64(hdr_pubsub->data.publish.msg.data_count); explen += ((data_count) * (sizeof(clusterMsgDataPublishMessage) - 8)); - clusterMsgDataPublishMessage *msg_data = getInitialBulkData(&hdr_pubsub->data.publish.msg); + clusterMsgDataPublishMessage *msg_data = getPublishMessageFirst(&hdr_pubsub->data.publish.msg); while (data_count--) { uint32_t msglen = getPublishMsgLength(msg_data); explen += msglen; - msg_data = getNextCursorBulkData(msg_data); + msg_data = getPublishMessageNext(msg_data); } } else if (type == CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST || type == CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK || type == CLUSTERMSG_TYPE_MFSTART) { @@ -3496,32 +3488,28 @@ void clusterReadHandler(connection *conn) { /* Finally read the full message. */ hdr = (clusterMsg *)link->rcvbuf; uint16_t type = ntohs(hdr->type); + int is_msg_light = (type == CLUSTERMSG_TYPE_PUBLISH_LIGHT || type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT); if (rcvbuflen == RCVBUF_MIN_READ_LEN) { - int is_msg_valid = 0; /* Perform some sanity check on the message signature - * and length. */ - if (memcmp(hdr->sig, "RCmb", 4) != 0 || ntohl(hdr->totlen) < CLUSTERMSG_MIN_LEN) { - /* The minimum length for PUBLISH and PUBLISHSHARD will be clusterMsgLight_MIN_LEN - * as we are using the `clusterMsgLight` hdr. */ - if ((type == CLUSTERMSG_TYPE_PUBLISH_LIGHT || type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT) && - ((ntohl(hdr->totlen) >= CLUSTERMSG_LIGHT_MIN_LEN))) { - is_msg_valid = 1; - } - if (!is_msg_valid) { - char ip[NET_IP_STR_LEN]; - int port; - if (connAddrPeerName(conn, ip, sizeof(ip), &port) == -1) { - serverLog(LL_WARNING, "Bad message length or signature received " - "on the Cluster bus."); - } else { - serverLog(LL_WARNING, - "Bad message length or signature received " - "on the Cluster bus from %s:%d", - ip, port); - } - handleLinkIOError(link); - return; + * and length. + * The minimum length for PUBLISH and PUBLISHSHARD will be clusterMsgLight_MIN_LEN + * as we are using the `clusterMsgLight` hdr. */ + if (memcmp(hdr->sig, "RCmb", 4) != 0 || + (!is_msg_light && (ntohl(hdr->totlen) < CLUSTERMSG_MIN_LEN)) || + (is_msg_light && (ntohl(hdr->totlen) < CLUSTERMSG_LIGHT_MIN_LEN))) { + char ip[NET_IP_STR_LEN]; + int port; + if (connAddrPeerName(conn, ip, sizeof(ip), &port) == -1) { + serverLog(LL_WARNING, "Bad message length or signature received " + "on the Cluster bus."); + } else { + serverLog(LL_WARNING, + "Bad message length or signature received " + "on the Cluster bus from %s:%d", + ip, port); } + handleLinkIOError(link); + return; } } readlen = ntohl(hdr->totlen) - rcvbuflen; @@ -3784,7 +3772,7 @@ void clusterSendPing(clusterLink *link, int type) { /* Note: clusterBuildMessageHdr() expects the buffer to be always at least * sizeof(clusterMsg) or more. */ if (estlen < (int)sizeof(clusterMsg)) estlen = sizeof(clusterMsg); - clusterMsgSendBlock *msgblock = (clusterMsgSendBlock *)createClusterMsgSendBlock(type, estlen); + clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(type, estlen); clusterMsg *hdr = &msgblock->msg; if (!link->inbound && type == CLUSTERMSG_TYPE_PING) link->node->ping_sent = mstime(); @@ -3918,7 +3906,7 @@ clusterMsgSendBlock *clusterCreatePublishMsgBlock(robj *channel, robj *message, size_t msglen = sizeof(clusterMsg) - sizeof(union clusterMsgData); msglen += sizeof(clusterMsgDataPublish) - 8 + channel_len + message_len; - clusterMsgSendBlock *msgblock = (clusterMsgSendBlock *)createClusterMsgSendBlock(type, msglen); + clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(type, msglen); clusterMsg *hdr = &msgblock->msg; hdr->data.publish.msg.channel_len = htonl(channel_len); @@ -3932,7 +3920,7 @@ clusterMsgSendBlock *clusterCreatePublishMsgBlock(robj *channel, robj *message, return msgblock; } -clusterMsgSendBlockLight *clusterCreatePublishLightMsgBlock(robj *channel, robj **messages, int count, uint16_t type) { +clusterMsgSendBlock *clusterCreatePublishLightMsgBlock(robj *channel, robj **messages, int count, uint16_t type) { uint32_t channel_len, message_len; int i; @@ -3952,15 +3940,15 @@ clusterMsgSendBlockLight *clusterCreatePublishLightMsgBlock(robj *channel, robj msglen += ((count + 1) * (sizeof(clusterMsgDataPublishMessage) - (sizeof(((clusterMsgDataPublishMessage *)0)->message_data)))); msglen += aggregated_msg_len; - clusterMsgSendBlockLight *msgblock = (clusterMsgSendBlockLight *)createClusterMsgSendBlock(type, msglen); + clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(type, msglen); - clusterMsgLight *hdr = &msgblock->msg; + clusterMsgLight *hdr = &msgblock->light_msg; hdr->data.publish.msg.data_count = htonu64(count + 1); - clusterMsgDataPublishMessage *cursor = getInitialBulkData(&hdr->data.publish.msg); - writeDataToCursor(cursor, channel); + clusterMsgDataPublishMessage *cursor = getPublishMessageFirst(&hdr->data.publish.msg); + writePublishMessageToCursor(cursor, channel); for (i = 0; i < count; i++) { - cursor = getNextCursorBulkData(cursor); - writeDataToCursor(cursor, messages[i]); + cursor = getPublishMessageNext(cursor); + writePublishMessageToCursor(cursor, messages[i]); decrRefCount(messages[i]); } decrRefCount(channel); @@ -3975,7 +3963,7 @@ clusterMsgSendBlockLight *clusterCreatePublishLightMsgBlock(robj *channel, robj * nodes to do the same ASAP. */ void clusterSendFail(char *nodename) { uint32_t msglen = sizeof(clusterMsg) - sizeof(union clusterMsgData) + sizeof(clusterMsgDataFail); - clusterMsgSendBlock *msgblock = (clusterMsgSendBlock *)createClusterMsgSendBlock(CLUSTERMSG_TYPE_FAIL, msglen); + clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(CLUSTERMSG_TYPE_FAIL, msglen); clusterMsg *hdr = &msgblock->msg; memcpy(hdr->data.fail.about.nodename, nodename, CLUSTER_NAMELEN); @@ -3991,7 +3979,7 @@ void clusterSendUpdate(clusterLink *link, clusterNode *node) { if (link == NULL) return; uint32_t msglen = sizeof(clusterMsg) - sizeof(union clusterMsgData) + sizeof(clusterMsgDataUpdate); - clusterMsgSendBlock *msgblock = (clusterMsgSendBlock *)createClusterMsgSendBlock(CLUSTERMSG_TYPE_UPDATE, msglen); + clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(CLUSTERMSG_TYPE_UPDATE, msglen); clusterMsg *hdr = &msgblock->msg; memcpy(hdr->data.update.nodecfg.nodename, node->name, CLUSTER_NAMELEN); @@ -4013,7 +4001,7 @@ void clusterSendUpdate(clusterLink *link, clusterNode *node) { void clusterSendModule(clusterLink *link, uint64_t module_id, uint8_t type, const char *payload, uint32_t len) { uint32_t msglen = sizeof(clusterMsg) - sizeof(union clusterMsgData); msglen += sizeof(clusterMsgModule) - 3 + len; - clusterMsgSendBlock *msgblock = (clusterMsgSendBlock *)createClusterMsgSendBlock(CLUSTERMSG_TYPE_MODULE, msglen); + clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(CLUSTERMSG_TYPE_MODULE, msglen); clusterMsg *hdr = &msgblock->msg; hdr->data.module.msg.module_id = module_id; /* Already endian adjusted. */ @@ -4062,13 +4050,12 @@ int clusterSendModuleMessageToTarget(const char *target, * Publish this message across the slot (primary/replica). * -------------------------------------------------------------------------- */ void clusterPropagatePublish(robj *channel, robj **messages, int count, int sharded) { - clusterMsgSendBlockLight *msgblock_light; - clusterMsgSendBlock *msgblock; + clusterMsgSendBlock *msgblock, *msgblock_light; int i; msgblock_light = clusterCreatePublishLightMsgBlock( channel, messages, count, sharded ? CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT : CLUSTERMSG_TYPE_PUBLISH_LIGHT); if (!sharded) { - clusterBroadcastMessage((clusterMsgSendBlock *)msgblock_light); + clusterBroadcastMessage(msgblock_light); clusterMsgSendBlockDecrRefCount(msgblock_light); for (i = 0; i < count; i++) { msgblock = clusterCreatePublishMsgBlock(channel, messages[i], CLUSTERMSG_TYPE_PUBLISH); @@ -4088,7 +4075,7 @@ void clusterPropagatePublish(robj *channel, robj **messages, int count, int shar node = listNodeValue(ln); if (node->flags & (CLUSTER_NODE_MYSELF | CLUSTER_NODE_HANDSHAKE)) continue; if (nodeSupportsLightMsgHdr(node)) - clusterSendMessage(node->link, (clusterMsgSendBlock *)msgblock_light); + clusterSendMessage(node->link, msgblock_light); else continue; } @@ -4121,8 +4108,7 @@ void clusterPropagatePublish(robj *channel, robj **messages, int count, int shar * but only the primaries are supposed to reply to our query. */ void clusterRequestFailoverAuth(void) { uint32_t msglen = sizeof(clusterMsg) - sizeof(union clusterMsgData); - clusterMsgSendBlock *msgblock = - (clusterMsgSendBlock *)createClusterMsgSendBlock(CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST, msglen); + clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST, msglen); clusterMsg *hdr = &msgblock->msg; /* If this is a manual failover, set the CLUSTERMSG_FLAG0_FORCEACK bit @@ -4138,8 +4124,7 @@ void clusterSendFailoverAuth(clusterNode *node) { if (!node->link) return; uint32_t msglen = sizeof(clusterMsg) - sizeof(union clusterMsgData); - clusterMsgSendBlock *msgblock = - (clusterMsgSendBlock *)createClusterMsgSendBlock(CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK, msglen); + clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK, msglen); clusterSendMessage(node->link, msgblock); clusterMsgSendBlockDecrRefCount(msgblock); @@ -4150,7 +4135,7 @@ void clusterSendMFStart(clusterNode *node) { if (!node->link) return; uint32_t msglen = sizeof(clusterMsg) - sizeof(union clusterMsgData); - clusterMsgSendBlock *msgblock = (clusterMsgSendBlock *)createClusterMsgSendBlock(CLUSTERMSG_TYPE_MFSTART, msglen); + clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(CLUSTERMSG_TYPE_MFSTART, msglen); clusterSendMessage(node->link, msgblock); clusterMsgSendBlockDecrRefCount(msgblock); From 6f7126e17b346f0d125daf7fc6f785199869d81e Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Tue, 9 Jul 2024 21:48:27 +0000 Subject: [PATCH 14/45] correct formatting. Signed-off-by: Roshan Khatri --- src/cluster_legacy.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 1a30105817..a31142afd5 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -3494,8 +3494,7 @@ void clusterReadHandler(connection *conn) { * and length. * The minimum length for PUBLISH and PUBLISHSHARD will be clusterMsgLight_MIN_LEN * as we are using the `clusterMsgLight` hdr. */ - if (memcmp(hdr->sig, "RCmb", 4) != 0 || - (!is_msg_light && (ntohl(hdr->totlen) < CLUSTERMSG_MIN_LEN)) || + if (memcmp(hdr->sig, "RCmb", 4) != 0 || (!is_msg_light && (ntohl(hdr->totlen) < CLUSTERMSG_MIN_LEN)) || (is_msg_light && (ntohl(hdr->totlen) < CLUSTERMSG_LIGHT_MIN_LEN))) { char ip[NET_IP_STR_LEN]; int port; From fc4dd81f20ea03ceebad0f8e4d366da3baa90fc1 Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Wed, 10 Jul 2024 21:34:34 +0000 Subject: [PATCH 15/45] minor change Signed-off-by: Roshan Khatri --- src/cluster_legacy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index a31142afd5..bf83b977a4 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -999,7 +999,7 @@ void clusterInit(void) { * by the createClusterNode() function. */ myself = server.cluster->myself = createClusterNode(NULL, CLUSTER_NODE_MYSELF | CLUSTER_NODE_PRIMARY); serverLog(LL_NOTICE, "No cluster configuration found, I'm %.40s", myself->name); - myself->flags |= CLUSTER_NODE_LIGHT_HDR_SUPPORTED; + clusterUpdateMyselfFlags(); clusterAddNode(myself); clusterAddNodeToShard(myself->shard_id, myself); saveconf = 1; From 3dff6c4cb33bfacc9c9d93822e37fcb72ac54aab Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Thu, 11 Jul 2024 05:38:06 +0000 Subject: [PATCH 16/45] Add flag for creating only light header for hoomogeneous cluster Signed-off-by: Roshan Khatri --- src/cluster_legacy.c | 57 ++++++++++++++++++++++++++++++-------------- src/cluster_legacy.h | 1 + 2 files changed, 40 insertions(+), 18 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index bf83b977a4..9a9d485bd7 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -974,6 +974,7 @@ void clusterInit(void) { server.cluster->failover_auth_epoch = 0; server.cluster->cant_failover_reason = CLUSTER_CANT_FAILOVER_NONE; server.cluster->lastVoteEpoch = 0; + server.cluster->is_light_hdr_supported = 1; /* Initialize stats */ for (int i = 0; i < CLUSTERMSG_TYPE_COUNT; i++) { @@ -3607,11 +3608,12 @@ void clusterBroadcastMessage(clusterMsgSendBlock *msgblock) { uint16_t type = ntohs(msgblock->msg.type); int is_msg_type_publish = (type == CLUSTERMSG_TYPE_PUBLISH || type == CLUSTERMSG_TYPE_PUBLISHSHARD || type == CLUSTERMSG_TYPE_PUBLISH_LIGHT || type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT); + int nodes_not_supporting_light_header = 0; di = dictGetSafeIterator(server.cluster->nodes); while ((de = dictNext(di)) != NULL) { clusterNode *node = dictGetVal(de); - + if (!nodeSupportsLightMsgHdr(node)) nodes_not_supporting_light_header++; if (node->flags & (CLUSTER_NODE_MYSELF | CLUSTER_NODE_HANDSHAKE)) continue; if (!is_msg_type_publish) { clusterSendMessage(node->link, msgblock); @@ -3620,6 +3622,12 @@ void clusterBroadcastMessage(clusterMsgSendBlock *msgblock) { } } dictReleaseIterator(di); + + if (nodes_not_supporting_light_header) { + server.cluster->is_light_hdr_supported = 0; + } else { + server.cluster->is_light_hdr_supported = 1; + } } /* Build the message header. hdr must point to a buffer at least @@ -4056,10 +4064,12 @@ void clusterPropagatePublish(robj *channel, robj **messages, int count, int shar if (!sharded) { clusterBroadcastMessage(msgblock_light); clusterMsgSendBlockDecrRefCount(msgblock_light); - for (i = 0; i < count; i++) { - msgblock = clusterCreatePublishMsgBlock(channel, messages[i], CLUSTERMSG_TYPE_PUBLISH); - clusterBroadcastMessage(msgblock); - clusterMsgSendBlockDecrRefCount(msgblock); + if (!server.cluster->is_light_hdr_supported) { + for (i = 0; i < count; i++) { + msgblock = clusterCreatePublishMsgBlock(channel, messages[i], CLUSTERMSG_TYPE_PUBLISH); + clusterBroadcastMessage(msgblock); + clusterMsgSendBlockDecrRefCount(msgblock); + } } return; } @@ -4070,28 +4080,39 @@ void clusterPropagatePublish(robj *channel, robj **messages, int count, int shar list *nodes_for_slot = clusterGetNodesInMyShard(server.cluster->myself); serverAssert(nodes_for_slot != NULL); listRewind(nodes_for_slot, &li); + int nodes_not_supporting_light_header = 0; while ((ln = listNext(&li))) { node = listNodeValue(ln); if (node->flags & (CLUSTER_NODE_MYSELF | CLUSTER_NODE_HANDSHAKE)) continue; - if (nodeSupportsLightMsgHdr(node)) + if (nodeSupportsLightMsgHdr(node)) { clusterSendMessage(node->link, msgblock_light); - else + } else { + nodes_not_supporting_light_header++; continue; + } } clusterMsgSendBlockDecrRefCount(msgblock_light); - for (i = 0; i < count; i++) { - listRewind(nodes_for_slot, &li); - msgblock = clusterCreatePublishMsgBlock(channel, messages[i], CLUSTERMSG_TYPE_PUBLISHSHARD); - while ((ln = listNext(&li))) { - node = listNodeValue(ln); - if (node->flags & (CLUSTER_NODE_MYSELF | CLUSTER_NODE_HANDSHAKE)) continue; - if (nodeSupportsLightMsgHdr(node)) - continue; - else - clusterSendMessage(node->link, msgblock); + if (nodes_not_supporting_light_header) { + server.cluster->is_light_hdr_supported = 0; + } else { + server.cluster->is_light_hdr_supported = 1; + } + + if (!server.cluster->is_light_hdr_supported) { + for (i = 0; i < count; i++) { + listRewind(nodes_for_slot, &li); + msgblock = clusterCreatePublishMsgBlock(channel, messages[i], CLUSTERMSG_TYPE_PUBLISHSHARD); + while ((ln = listNext(&li))) { + node = listNodeValue(ln); + if (node->flags & (CLUSTER_NODE_MYSELF | CLUSTER_NODE_HANDSHAKE)) continue; + if (nodeSupportsLightMsgHdr(node)) + continue; + else + clusterSendMessage(node->link, msgblock); + } + clusterMsgSendBlockDecrRefCount(msgblock); } - clusterMsgSendBlockDecrRefCount(msgblock); } } diff --git a/src/cluster_legacy.h b/src/cluster_legacy.h index 002c02e6d9..e962cc94f2 100644 --- a/src/cluster_legacy.h +++ b/src/cluster_legacy.h @@ -365,6 +365,7 @@ struct clusterState { clusterNode *migrating_slots_to[CLUSTER_SLOTS]; clusterNode *importing_slots_from[CLUSTER_SLOTS]; clusterNode *slots[CLUSTER_SLOTS]; + int is_light_hdr_supported; /* The following fields are used to take the replica state on elections. */ mstime_t failover_auth_time; /* Time of previous or next election. */ int failover_auth_count; /* Number of votes received so far. */ From aeeceee26edb2d863114c16162ef44c5c18f9d34 Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Tue, 16 Jul 2024 07:06:32 +0000 Subject: [PATCH 17/45] Add a new Iterator, address feedback Signed-off-by: Roshan Khatri --- src/cluster_legacy.c | 198 ++++++++++++++++++++++++------------------- 1 file changed, 110 insertions(+), 88 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 1e138d61d4..5dc273dd94 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -151,8 +151,6 @@ static inline int defaultClientPort(void) { #define RCVBUF_MIN_READ_LEN 16 #define RCVBUF_MAX_PREALLOC (1 << 20) /* 1MB */ -#define PLACEHOLDER_LEN 8 - /* Fixed timeout value for cluster operations (milliseconds) */ #define CLUSTER_OPERATION_TIMEOUT 2000 @@ -189,6 +187,48 @@ dictType clusterSdsToListType = { NULL /* allow to expand */ }; +typedef struct { + enum { ITER_DICT, ITER_LIST } type; + union { + dictIterator di; + listIter li; + }; +} DictOrListIterator; + +void iterInitForDict(DictOrListIterator *iter, dict *d) { + iter->type = ITER_DICT; + dictInitSafeIterator(&iter->di, d); +} + +void iterInitForList(DictOrListIterator *iter, list *l) { + iter->type = ITER_LIST; + listRewind(l, &iter->li); +} + +void *iterNext(DictOrListIterator *iter) { + switch (iter->type) { + case ITER_DICT: { + // Get the next entry in the dictionary + dictEntry *de = dictNext(&iter->di); + // Return the value associated with the entry, or NULL if no more entries + return de ? dictGetVal(de) : NULL; + } + case ITER_LIST: { + // Get the next node in the list + listNode *ln = listNext(&iter->li); + // Return the value associated with the node, or NULL if no more nodes + return ln ? listNodeValue(ln) : NULL; + } + default: serverPanic("bad type"); return NULL; // This line is unreachable but added to avoid compiler warnings + } +} + +void iterReset(DictOrListIterator *iter) { + if (iter->type == ITER_DICT) { + dictResetIterator(&iter->di); + } +} + /* Aux fields were introduced in Redis OSS 7.2 to support the persistence * of various important node properties, such as shard id, in nodes.conf. * Aux fields take an explicit format of name=value pairs and have no @@ -2824,7 +2864,7 @@ static clusterNode *getNodeFromLinkAndMsg(clusterLink *link, clusterMsg *hdr) { return sender; } -static clusterMsgDataPublishMessage *getPublishMessageFirst(clusterMsgDataPublishMulti *msg) { +static clusterMsgDataPublishMessage *getFirstPublishMessage(clusterMsgDataPublishMulti *msg) { clusterMsgDataPublishMessage *initial = (clusterMsgDataPublishMessage *)&(msg->bulk_data); return initial; } @@ -2849,7 +2889,7 @@ static robj *readPublishMessageFromCursor(clusterMsgDataPublishMessage *cursor) return data; } -static uint32_t getPublishMsgLength(clusterMsgDataPublishMessage *cursor) { +static uint32_t getPublishMessageLength(clusterMsgDataPublishMessage *cursor) { uint32_t msg_length = ntohl(cursor->message_len); return msg_length; } @@ -2864,9 +2904,10 @@ int pubsubProcessLightPacket(clusterLink *link, uint16_t type) { if ((type == CLUSTERMSG_TYPE_PUBLISH_LIGHT && serverPubsubSubscriptionCount() > 0) || (type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT && serverPubsubShardSubscriptionCount() > 0)) { data_count = ntohu64(hdr->data.publish.msg.data_count); - clusterMsgDataPublishMessage *cursor = getPublishMessageFirst(&hdr->data.publish.msg); + clusterMsgDataPublishMessage *cursor = getFirstPublishMessage(&hdr->data.publish.msg); channel = readPublishMessageFromCursor(cursor); - while (--data_count) { + data_count -= 1; + while (data_count--) { cursor = getPublishMessageNext(cursor); message = readPublishMessageFromCursor(cursor); pubsubPublishMessage(channel, message, type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT); @@ -2946,9 +2987,9 @@ int clusterIsValidPacket(clusterLink *link) { explen += sizeof(clusterMsgDataPublishMulti); uint64_t data_count = ntohu64(hdr_pubsub->data.publish.msg.data_count); explen += ((data_count) * (sizeof(clusterMsgDataPublishMessage) - 8)); - clusterMsgDataPublishMessage *msg_data = getPublishMessageFirst(&hdr_pubsub->data.publish.msg); + clusterMsgDataPublishMessage *msg_data = getFirstPublishMessage(&hdr_pubsub->data.publish.msg); while (data_count--) { - uint32_t msglen = getPublishMsgLength(msg_data); + uint32_t msglen = getPublishMessageLength(msg_data); explen += msglen; msg_data = getPublishMessageNext(msg_data); } @@ -3019,8 +3060,13 @@ int clusterProcessPacket(clusterLink *link) { sender->flags |= CLUSTER_NODE_EXTENSIONS_SUPPORTED; } - if (sender && (!nodeSupportsLightMsgHdr(link->node)) && (flags & CLUSTER_NODE_LIGHT_HDR_SUPPORTED)) { - sender->flags |= CLUSTER_NODE_LIGHT_HDR_SUPPORTED; + /* Checks if the node supports light message hdr */ + if (sender) { + if (flags & CLUSTER_NODE_LIGHT_HDR_SUPPORTED) { + sender->flags |= CLUSTER_NODE_LIGHT_HDR_SUPPORTED; + } else { + server.cluster->is_light_hdr_supported = 0; + } } if (sender && !nodeInHandshake(sender)) { @@ -3579,9 +3625,7 @@ void clusterReadHandler(connection *conn) { int is_msg_light = (type == CLUSTERMSG_TYPE_PUBLISH_LIGHT || type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT); if (rcvbuflen == RCVBUF_MIN_READ_LEN) { /* Perform some sanity check on the message signature - * and length. - * The minimum length for PUBLISH and PUBLISHSHARD will be clusterMsgLight_MIN_LEN - * as we are using the `clusterMsgLight` hdr. */ + * and length. */ if (memcmp(hdr->sig, "RCmb", 4) != 0 || (!is_msg_light && (ntohl(hdr->totlen) < CLUSTERMSG_MIN_LEN)) || (is_msg_light && (ntohl(hdr->totlen) < CLUSTERMSG_LIGHT_MIN_LEN))) { char ip[NET_IP_STR_LEN]; @@ -3670,18 +3714,6 @@ void clusterSendMessage(clusterLink *link, clusterMsgSendBlock *msgblock) { if (type < CLUSTERMSG_TYPE_COUNT) server.cluster->stats_bus_messages_sent[type]++; } -/* Helper function to send message to node depending on - * the header type supported by the node. */ -void clusterSendPublishMessage(clusterNode *node, int type, clusterMsgSendBlock *msgblock) { - if ((type == CLUSTERMSG_TYPE_PUBLISH || type == CLUSTERMSG_TYPE_PUBLISHSHARD) && !nodeSupportsLightMsgHdr(node)) { - clusterSendMessage(node->link, msgblock); - } else if ((type == CLUSTERMSG_TYPE_PUBLISH_LIGHT || type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT) && - nodeSupportsLightMsgHdr(node)) { - clusterSendMessage(node->link, msgblock); - } - return; -} - /* Send a message to all the nodes that are part of the cluster having * a connected link. * @@ -3692,28 +3724,29 @@ void clusterBroadcastMessage(clusterMsgSendBlock *msgblock) { dictIterator *di; dictEntry *de; - uint16_t type = ntohs(msgblock->msg.type); - int is_msg_type_publish = (type == CLUSTERMSG_TYPE_PUBLISH || type == CLUSTERMSG_TYPE_PUBLISHSHARD || - type == CLUSTERMSG_TYPE_PUBLISH_LIGHT || type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT); - int nodes_not_supporting_light_header = 0; - di = dictGetSafeIterator(server.cluster->nodes); while ((de = dictNext(di)) != NULL) { clusterNode *node = dictGetVal(de); - if (!nodeSupportsLightMsgHdr(node)) nodes_not_supporting_light_header++; + if (node->flags & (CLUSTER_NODE_MYSELF | CLUSTER_NODE_HANDSHAKE)) continue; - if (!is_msg_type_publish) { - clusterSendMessage(node->link, msgblock); - } else { - clusterSendPublishMessage(node, type, msgblock); - } + clusterSendMessage(node->link, msgblock); } dictReleaseIterator(di); +} - if (nodes_not_supporting_light_header) { - server.cluster->is_light_hdr_supported = 0; - } else { - server.cluster->is_light_hdr_supported = 1; +/* Send a message to all the nodes in the shard */ +void clusterShardBroadcastMessage(clusterMsgSendBlock *msgblock) { + listIter li; + listNode *ln; + + list *nodes_for_slot = clusterGetNodesInMyShard(server.cluster->myself); + serverAssert(nodes_for_slot != NULL); + + listRewind(nodes_for_slot, &li); + while ((ln = listNext(&li))) { + clusterNode *node = listNodeValue(ln); + if (node->flags & (CLUSTER_NODE_MYSELF | CLUSTER_NODE_HANDSHAKE)) continue; + clusterSendMessage(node->link, msgblock); } } @@ -4038,7 +4071,7 @@ clusterMsgSendBlock *clusterCreatePublishLightMsgBlock(robj *channel, robj **mes clusterMsgLight *hdr = &msgblock->light_msg; hdr->data.publish.msg.data_count = htonu64(count + 1); - clusterMsgDataPublishMessage *cursor = getPublishMessageFirst(&hdr->data.publish.msg); + clusterMsgDataPublishMessage *cursor = getFirstPublishMessage(&hdr->data.publish.msg); writePublishMessageToCursor(cursor, channel); for (i = 0; i < count; i++) { cursor = getPublishMessageNext(cursor); @@ -4144,63 +4177,52 @@ int clusterSendModuleMessageToTarget(const char *target, * Publish this message across the slot (primary/replica). * -------------------------------------------------------------------------- */ void clusterPropagatePublish(robj *channel, robj **messages, int count, int sharded) { + /* Currently publish command does not support multiple payload. */ + serverAssert(count == 1); + clusterMsgSendBlock *msgblock, *msgblock_light; - int i; + int nodes_not_supporting_light_header = 0; + msgblock_light = clusterCreatePublishLightMsgBlock( channel, messages, count, sharded ? CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT : CLUSTERMSG_TYPE_PUBLISH_LIGHT); - if (!sharded) { - clusterBroadcastMessage(msgblock_light); - clusterMsgSendBlockDecrRefCount(msgblock_light); - if (!server.cluster->is_light_hdr_supported) { - for (i = 0; i < count; i++) { - msgblock = clusterCreatePublishMsgBlock(channel, messages[i], CLUSTERMSG_TYPE_PUBLISH); - clusterBroadcastMessage(msgblock); - clusterMsgSendBlockDecrRefCount(msgblock); - } - } - return; - } - listIter li; - listNode *ln; - clusterNode *node; - list *nodes_for_slot = clusterGetNodesInMyShard(server.cluster->myself); - serverAssert(nodes_for_slot != NULL); - listRewind(nodes_for_slot, &li); - int nodes_not_supporting_light_header = 0; - while ((ln = listNext(&li))) { - node = listNodeValue(ln); - if (node->flags & (CLUSTER_NODE_MYSELF | CLUSTER_NODE_HANDSHAKE)) continue; - if (nodeSupportsLightMsgHdr(node)) { - clusterSendMessage(node->link, msgblock_light); + if (server.cluster->is_light_hdr_supported) { + /* If the cluster is homogeneous broadcast to all nodes */ + if (sharded) { + clusterShardBroadcastMessage(msgblock_light); } else { - nodes_not_supporting_light_header++; - continue; + clusterBroadcastMessage(msgblock_light); } - } - clusterMsgSendBlockDecrRefCount(msgblock_light); - - if (nodes_not_supporting_light_header) { - server.cluster->is_light_hdr_supported = 0; } else { - server.cluster->is_light_hdr_supported = 1; - } - - if (!server.cluster->is_light_hdr_supported) { - for (i = 0; i < count; i++) { - listRewind(nodes_for_slot, &li); - msgblock = clusterCreatePublishMsgBlock(channel, messages[i], CLUSTERMSG_TYPE_PUBLISHSHARD); - while ((ln = listNext(&li))) { - node = listNodeValue(ln); - if (node->flags & (CLUSTER_NODE_MYSELF | CLUSTER_NODE_HANDSHAKE)) continue; - if (nodeSupportsLightMsgHdr(node)) - continue; - else - clusterSendMessage(node->link, msgblock); + msgblock = clusterCreatePublishMsgBlock(channel, messages[0], + sharded ? CLUSTERMSG_TYPE_PUBLISHSHARD : CLUSTERMSG_TYPE_PUBLISH); + DictOrListIterator iter; + if (sharded) { + list *nodes_for_slot = clusterGetNodesInMyShard(server.cluster->myself); + serverAssert(nodes_for_slot != NULL); + iterInitForList(&iter, nodes_for_slot); + } else { + iterInitForDict(&iter, server.cluster->nodes); + } + clusterNode *node; + while ((node = iterNext(&iter)) != NULL) { + /* If the cluster is not marked as homogeneous send respective message blocks to all nodes. */ + if (node->flags & (CLUSTER_NODE_MYSELF | CLUSTER_NODE_HANDSHAKE)) continue; + if (!nodeSupportsLightMsgHdr(node)) { + clusterSendMessage(node->link, msgblock); + nodes_not_supporting_light_header++; + continue; } - clusterMsgSendBlockDecrRefCount(msgblock); + clusterSendMessage(node->link, msgblock_light); } + iterReset(&iter); + /* If the cluster is not heterogeneous anymore, update the is_light_hdr_supported to 1 */ + if (!nodes_not_supporting_light_header) { + server.cluster->is_light_hdr_supported = 1; + } + clusterMsgSendBlockDecrRefCount(msgblock); } + clusterMsgSendBlockDecrRefCount(msgblock_light); } /* ----------------------------------------------------------------------------- From a578f1eeb9b97d99a6d9bd794e74a29de62d5864 Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Tue, 16 Jul 2024 19:22:08 +0000 Subject: [PATCH 18/45] address feedback Signed-off-by: Roshan Khatri --- src/cluster_legacy.c | 10 +++++----- src/cluster_legacy.h | 2 +- src/server.h | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 5dc273dd94..fbf83a1040 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -1085,7 +1085,7 @@ void clusterInit(void) { server.cluster->failover_auth_epoch = 0; server.cluster->cant_failover_reason = CLUSTER_CANT_FAILOVER_NONE; server.cluster->lastVoteEpoch = 0; - server.cluster->is_light_hdr_supported = 1; + server.cluster->all_nodes_are_known_to_support_light_hdr = 1; /* Initialize stats */ for (int i = 0; i < CLUSTERMSG_TYPE_COUNT; i++) { @@ -3065,7 +3065,7 @@ int clusterProcessPacket(clusterLink *link) { if (flags & CLUSTER_NODE_LIGHT_HDR_SUPPORTED) { sender->flags |= CLUSTER_NODE_LIGHT_HDR_SUPPORTED; } else { - server.cluster->is_light_hdr_supported = 0; + server.cluster->all_nodes_are_known_to_support_light_hdr = 0; } } @@ -4186,7 +4186,7 @@ void clusterPropagatePublish(robj *channel, robj **messages, int count, int shar msgblock_light = clusterCreatePublishLightMsgBlock( channel, messages, count, sharded ? CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT : CLUSTERMSG_TYPE_PUBLISH_LIGHT); - if (server.cluster->is_light_hdr_supported) { + if (server.cluster->all_nodes_are_known_to_support_light_hdr) { /* If the cluster is homogeneous broadcast to all nodes */ if (sharded) { clusterShardBroadcastMessage(msgblock_light); @@ -4216,9 +4216,9 @@ void clusterPropagatePublish(robj *channel, robj **messages, int count, int shar clusterSendMessage(node->link, msgblock_light); } iterReset(&iter); - /* If the cluster is not heterogeneous anymore, update the is_light_hdr_supported to 1 */ + /* If the cluster is not heterogeneous anymore, update the all_nodes_are_known_to_support_light_hdr to 1 */ if (!nodes_not_supporting_light_header) { - server.cluster->is_light_hdr_supported = 1; + server.cluster->all_nodes_are_known_to_support_light_hdr = 1; } clusterMsgSendBlockDecrRefCount(msgblock); } diff --git a/src/cluster_legacy.h b/src/cluster_legacy.h index eb1c139a30..adb4bb75fa 100644 --- a/src/cluster_legacy.h +++ b/src/cluster_legacy.h @@ -379,7 +379,7 @@ struct clusterState { clusterNode *migrating_slots_to[CLUSTER_SLOTS]; clusterNode *importing_slots_from[CLUSTER_SLOTS]; clusterNode *slots[CLUSTER_SLOTS]; - int is_light_hdr_supported; + int all_nodes_are_known_to_support_light_hdr; /* The following fields are used to take the replica state on elections. */ mstime_t failover_auth_time; /* Time of previous or next election. */ int failover_auth_count; /* Number of votes received so far. */ diff --git a/src/server.h b/src/server.h index cb4a89481a..cc072379cc 100644 --- a/src/server.h +++ b/src/server.h @@ -3289,9 +3289,9 @@ int pubsubUnsubscribeShardAllChannels(client *c, int notify); void pubsubShardUnsubscribeAllChannelsInSlot(unsigned int slot); int pubsubUnsubscribeAllPatterns(client *c, int notify); int pubsubPublishMessage(robj *channel, robj *message, int sharded); -int pubsubPublishMessages(robj *channel, robj **message, int count, int sharded); +int pubsubPublishMessages(robj *channel, robj **messages, int count, int sharded); int pubsubPublishMessageAndPropagateToCluster(robj *channel, robj *message, int sharded); -int pubsubPublishMessagesAndPropagateToCluster(robj *channel, robj **message, int count, int sharded); +int pubsubPublishMessagesAndPropagateToCluster(robj *channel, robj **messages, int count, int sharded); void addReplyPubsubMessage(client *c, robj *channel, robj *msg, robj *message_bulk); int serverPubsubSubscriptionCount(void); int serverPubsubShardSubscriptionCount(void); From 511694e90b5ec05a23d10962ce10a05cdef962cb Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Tue, 16 Jul 2024 21:21:14 +0000 Subject: [PATCH 19/45] Address feeaback Signed-off-by: Roshan Khatri --- src/cluster_legacy.c | 113 ++++++++++++++++--------------------------- src/cluster_legacy.h | 22 ++++----- src/pubsub.c | 4 +- 3 files changed, 56 insertions(+), 83 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index fbf83a1040..d181344557 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -193,37 +193,38 @@ typedef struct { dictIterator di; listIter li; }; -} DictOrListIterator; +} ClusterNodeIterator; -void iterInitForDict(DictOrListIterator *iter, dict *d) { +void clusterNodeIterInitAllNodes(ClusterNodeIterator *iter, dict *d) { iter->type = ITER_DICT; dictInitSafeIterator(&iter->di, d); } -void iterInitForList(DictOrListIterator *iter, list *l) { +void clusterNodeIterInitMyShard(ClusterNodeIterator *iter, list *l) { iter->type = ITER_LIST; listRewind(l, &iter->li); } -void *iterNext(DictOrListIterator *iter) { +clusterNode *clusterNodeIterNext(ClusterNodeIterator *iter) { switch (iter->type) { case ITER_DICT: { - // Get the next entry in the dictionary + /* Get the next entry in the dictionary */ dictEntry *de = dictNext(&iter->di); - // Return the value associated with the entry, or NULL if no more entries + /* Return the value associated with the entry, or NULL if no more entries */ return de ? dictGetVal(de) : NULL; } case ITER_LIST: { - // Get the next node in the list + /* Get the next node in the list */ listNode *ln = listNext(&iter->li); - // Return the value associated with the node, or NULL if no more nodes + /* Return the value associated with the node, or NULL if no more nodes */ return ln ? listNodeValue(ln) : NULL; } - default: serverPanic("bad type"); return NULL; // This line is unreachable but added to avoid compiler warnings + /* This line is unreachable but added to avoid compiler warnings */ + default: serverPanic("bad type"); return NULL; } } -void iterReset(DictOrListIterator *iter) { +void clusterNodeIterReset(ClusterNodeIterator *iter) { if (iter->type == ITER_DICT) { dictResetIterator(&iter->di); } @@ -1085,7 +1086,6 @@ void clusterInit(void) { server.cluster->failover_auth_epoch = 0; server.cluster->cant_failover_reason = CLUSTER_CANT_FAILOVER_NONE; server.cluster->lastVoteEpoch = 0; - server.cluster->all_nodes_are_known_to_support_light_hdr = 1; /* Initialize stats */ for (int i = 0; i < CLUSTERMSG_TYPE_COUNT; i++) { @@ -3061,12 +3061,8 @@ int clusterProcessPacket(clusterLink *link) { } /* Checks if the node supports light message hdr */ - if (sender) { - if (flags & CLUSTER_NODE_LIGHT_HDR_SUPPORTED) { + if (sender && (flags & CLUSTER_NODE_LIGHT_HDR_SUPPORTED)) { sender->flags |= CLUSTER_NODE_LIGHT_HDR_SUPPORTED; - } else { - server.cluster->all_nodes_are_known_to_support_light_hdr = 0; - } } if (sender && !nodeInHandshake(sender)) { @@ -3602,6 +3598,16 @@ void clusterLinkConnectHandler(connection *conn) { serverLog(LL_DEBUG, "Connecting with Node %.40s at %s:%d", node->name, node->ip, node->cport); } +static inline int isHeaderValid(clusterMsg *hdr) { + if (memcmp(hdr->sig, "RCmb", 4) != 0) return 0; + uint16_t type = ntohs(hdr->type); + int is_msg_light = (type == CLUSTERMSG_TYPE_PUBLISH_LIGHT || type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT); + uint32_t totlen = ntohl(hdr->totlen); + uint32_t minlen = is_msg_light ? CLUSTERMSG_LIGHT_MIN_LEN : CLUSTERMSG_MIN_LEN; + if (totlen < minlen) return 0; + return 1; +} + /* Read data. Try to read the first field of the header first to check the * full length of the packet. When a whole packet is in memory this function * will call the function to process the packet. And so forth. */ @@ -3621,13 +3627,10 @@ void clusterReadHandler(connection *conn) { } else { /* Finally read the full message. */ hdr = (clusterMsg *)link->rcvbuf; - uint16_t type = ntohs(hdr->type); - int is_msg_light = (type == CLUSTERMSG_TYPE_PUBLISH_LIGHT || type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT); if (rcvbuflen == RCVBUF_MIN_READ_LEN) { /* Perform some sanity check on the message signature * and length. */ - if (memcmp(hdr->sig, "RCmb", 4) != 0 || (!is_msg_light && (ntohl(hdr->totlen) < CLUSTERMSG_MIN_LEN)) || - (is_msg_light && (ntohl(hdr->totlen) < CLUSTERMSG_LIGHT_MIN_LEN))) { + if (isHeaderValid(hdr)) { char ip[NET_IP_STR_LEN]; int port; if (connAddrPeerName(conn, ip, sizeof(ip), &port) == -1) { @@ -3734,22 +3737,6 @@ void clusterBroadcastMessage(clusterMsgSendBlock *msgblock) { dictReleaseIterator(di); } -/* Send a message to all the nodes in the shard */ -void clusterShardBroadcastMessage(clusterMsgSendBlock *msgblock) { - listIter li; - listNode *ln; - - list *nodes_for_slot = clusterGetNodesInMyShard(server.cluster->myself); - serverAssert(nodes_for_slot != NULL); - - listRewind(nodes_for_slot, &li); - while ((ln = listNext(&li))) { - clusterNode *node = listNodeValue(ln); - if (node->flags & (CLUSTER_NODE_MYSELF | CLUSTER_NODE_HANDSHAKE)) continue; - clusterSendMessage(node->link, msgblock); - } -} - /* Build the message header. hdr must point to a buffer at least * sizeof(clusterMsg) in bytes. */ static void clusterBuildMessageHdr(clusterMsg *hdr, int type, size_t msglen) { @@ -3762,7 +3749,7 @@ static void clusterBuildMessageHdr(clusterMsg *hdr, int type, size_t msglen) { hdr->totlen = htonl(msglen); if (type == CLUSTERMSG_TYPE_PUBLISH_LIGHT || type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT) { - clusterMsgLight *hdr_light = (clusterMsgLight *)(void *)hdr; + clusterMsgLight *hdr_light = (clusterMsgLight *)hdr; hdr_light->notused1 = 0; return; } @@ -4181,47 +4168,33 @@ void clusterPropagatePublish(robj *channel, robj **messages, int count, int shar serverAssert(count == 1); clusterMsgSendBlock *msgblock, *msgblock_light; - int nodes_not_supporting_light_header = 0; msgblock_light = clusterCreatePublishLightMsgBlock( channel, messages, count, sharded ? CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT : CLUSTERMSG_TYPE_PUBLISH_LIGHT); - if (server.cluster->all_nodes_are_known_to_support_light_hdr) { - /* If the cluster is homogeneous broadcast to all nodes */ - if (sharded) { - clusterShardBroadcastMessage(msgblock_light); - } else { - clusterBroadcastMessage(msgblock_light); - } + ClusterNodeIterator iter; + if (sharded) { + list *nodes_for_slot = clusterGetNodesInMyShard(server.cluster->myself); + serverAssert(nodes_for_slot != NULL); + clusterNodeIterInitMyShard(&iter, nodes_for_slot); } else { - msgblock = clusterCreatePublishMsgBlock(channel, messages[0], - sharded ? CLUSTERMSG_TYPE_PUBLISHSHARD : CLUSTERMSG_TYPE_PUBLISH); - DictOrListIterator iter; - if (sharded) { - list *nodes_for_slot = clusterGetNodesInMyShard(server.cluster->myself); - serverAssert(nodes_for_slot != NULL); - iterInitForList(&iter, nodes_for_slot); - } else { - iterInitForDict(&iter, server.cluster->nodes); - } - clusterNode *node; - while ((node = iterNext(&iter)) != NULL) { - /* If the cluster is not marked as homogeneous send respective message blocks to all nodes. */ - if (node->flags & (CLUSTER_NODE_MYSELF | CLUSTER_NODE_HANDSHAKE)) continue; - if (!nodeSupportsLightMsgHdr(node)) { - clusterSendMessage(node->link, msgblock); - nodes_not_supporting_light_header++; - continue; - } + clusterNodeIterInitAllNodes(&iter, server.cluster->nodes); + } + + clusterNode *node; + while ((node = clusterNodeIterNext(&iter)) != NULL) { + if (node->flags & (CLUSTER_NODE_MYSELF | CLUSTER_NODE_HANDSHAKE)) continue; + /* If the cluster is not marked as homogeneous send respective message blocks to all nodes. */ + if (nodeSupportsLightMsgHdr(node)) { clusterSendMessage(node->link, msgblock_light); + } else { + msgblock = clusterCreatePublishMsgBlock(channel, messages[0], + sharded ? CLUSTERMSG_TYPE_PUBLISHSHARD : CLUSTERMSG_TYPE_PUBLISH); + clusterSendMessage(node->link, msgblock); + clusterMsgSendBlockDecrRefCount(msgblock); } - iterReset(&iter); - /* If the cluster is not heterogeneous anymore, update the all_nodes_are_known_to_support_light_hdr to 1 */ - if (!nodes_not_supporting_light_header) { - server.cluster->all_nodes_are_known_to_support_light_hdr = 1; - } - clusterMsgSendBlockDecrRefCount(msgblock); } + clusterNodeIterReset(&iter); clusterMsgSendBlockDecrRefCount(msgblock_light); } diff --git a/src/cluster_legacy.h b/src/cluster_legacy.h index adb4bb75fa..2b83fc89fc 100644 --- a/src/cluster_legacy.h +++ b/src/cluster_legacy.h @@ -121,16 +121,6 @@ typedef struct { unsigned char bulk_data[8]; /* 8 bytes just as placeholder. */ } clusterMsgDataPublish; -typedef struct { - uint32_t message_len; - unsigned char message_data[8]; /* 8 bytes just as placeholder. */ -} clusterMsgDataPublishMessage; - -typedef struct { - uint64_t data_count; - clusterMsgDataPublishMessage *bulk_data; -} clusterMsgDataPublishMulti; - typedef struct { uint64_t configEpoch; /* Config epoch of the specified instance. */ char nodename[CLUSTER_NAMELEN]; /* Name of the slots owner. */ @@ -232,6 +222,17 @@ union clusterMsgData { } module; }; +/* Light message data and it's supported message types. */ +typedef struct { + uint32_t message_len; + unsigned char message_data[8]; /* 8 bytes just as placeholder. */ +} clusterMsgDataPublishMessage; + +typedef struct { + uint64_t data_count; + clusterMsgDataPublishMessage *bulk_data; +} clusterMsgDataPublishMulti; + union clusterMsgDataLight { /* PUBLISH */ struct { @@ -379,7 +380,6 @@ struct clusterState { clusterNode *migrating_slots_to[CLUSTER_SLOTS]; clusterNode *importing_slots_from[CLUSTER_SLOTS]; clusterNode *slots[CLUSTER_SLOTS]; - int all_nodes_are_known_to_support_light_hdr; /* The following fields are used to take the replica state on elections. */ mstime_t failover_auth_time; /* Time of previous or next election. */ int failover_auth_count; /* Number of votes received so far. */ diff --git a/src/pubsub.c b/src/pubsub.c index 168b0f1d08..eab2485cae 100644 --- a/src/pubsub.c +++ b/src/pubsub.c @@ -629,7 +629,7 @@ void publishCommand(client *c) { return; } - int receivers = pubsubPublishMessagesAndPropagateToCluster(c->argv[1], &c->argv[2], c->argc - 2, 0); + int receivers = pubsubPublishMessagesAndPropagateToCluster(c->argv[1], &c->argv[2], 1, 0); if (!server.cluster_enabled) forceCommandPropagation(c, PROPAGATE_REPL); addReplyLongLong(c, receivers); } @@ -715,7 +715,7 @@ void channelList(client *c, sds pat, kvstore *pubsub_channels) { /* SPUBLISH */ void spublishCommand(client *c) { - int receivers = pubsubPublishMessagesAndPropagateToCluster(c->argv[1], &c->argv[2], c->argc - 2, 1); + int receivers = pubsubPublishMessagesAndPropagateToCluster(c->argv[1], &c->argv[2], 1, 1); if (!server.cluster_enabled) forceCommandPropagation(c, PROPAGATE_REPL); addReplyLongLong(c, receivers); } From f4da9355678a0d4068b7daa1d41fd95d78d367e5 Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Tue, 16 Jul 2024 21:45:22 +0000 Subject: [PATCH 20/45] minor Signed-off-by: Roshan Khatri --- src/cluster_legacy.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index d181344557..a8d4c79c14 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -3062,7 +3062,7 @@ int clusterProcessPacket(clusterLink *link) { /* Checks if the node supports light message hdr */ if (sender && (flags & CLUSTER_NODE_LIGHT_HDR_SUPPORTED)) { - sender->flags |= CLUSTER_NODE_LIGHT_HDR_SUPPORTED; + sender->flags |= CLUSTER_NODE_LIGHT_HDR_SUPPORTED; } if (sender && !nodeInHandshake(sender)) { @@ -3630,7 +3630,7 @@ void clusterReadHandler(connection *conn) { if (rcvbuflen == RCVBUF_MIN_READ_LEN) { /* Perform some sanity check on the message signature * and length. */ - if (isHeaderValid(hdr)) { + if (!isHeaderValid(hdr)) { char ip[NET_IP_STR_LEN]; int port; if (connAddrPeerName(conn, ip, sizeof(ip), &port) == -1) { @@ -4189,7 +4189,7 @@ void clusterPropagatePublish(robj *channel, robj **messages, int count, int shar clusterSendMessage(node->link, msgblock_light); } else { msgblock = clusterCreatePublishMsgBlock(channel, messages[0], - sharded ? CLUSTERMSG_TYPE_PUBLISHSHARD : CLUSTERMSG_TYPE_PUBLISH); + sharded ? CLUSTERMSG_TYPE_PUBLISHSHARD : CLUSTERMSG_TYPE_PUBLISH); clusterSendMessage(node->link, msgblock); clusterMsgSendBlockDecrRefCount(msgblock); } From 88b1271d4d7f8682e7f96f08e445c29f138c06a1 Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Tue, 16 Jul 2024 22:28:21 +0000 Subject: [PATCH 21/45] address feedback Signed-off-by: Roshan Khatri --- src/cluster_legacy.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 61799035d3..5d407bf10b 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -195,14 +195,16 @@ typedef struct { }; } ClusterNodeIterator; -void clusterNodeIterInitAllNodes(ClusterNodeIterator *iter, dict *d) { +void clusterNodeIterInitAllNodes(ClusterNodeIterator *iter) { iter->type = ITER_DICT; - dictInitSafeIterator(&iter->di, d); + dictInitSafeIterator(&iter->di, server.cluster->nodes); } -void clusterNodeIterInitMyShard(ClusterNodeIterator *iter, list *l) { +void clusterNodeIterInitMyShard(ClusterNodeIterator *iter) { + list *nodes = clusterGetNodesInMyShard(server.cluster->myself); + serverAssert(nodes != NULL); iter->type = ITER_LIST; - listRewind(l, &iter->li); + listRewind(nodes, &iter->li); } clusterNode *clusterNodeIterNext(ClusterNodeIterator *iter) { @@ -4167,14 +4169,13 @@ void clusterPropagatePublish(robj *channel, robj **messages, int count, int shar msgblock_light = clusterCreatePublishLightMsgBlock( channel, messages, count, sharded ? CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT : CLUSTERMSG_TYPE_PUBLISH_LIGHT); - + /* We will only create msgblock with normal hdr if there are any nodes that do not support light hdr */ + msgblock = NULL; ClusterNodeIterator iter; if (sharded) { - list *nodes_for_slot = clusterGetNodesInMyShard(server.cluster->myself); - serverAssert(nodes_for_slot != NULL); - clusterNodeIterInitMyShard(&iter, nodes_for_slot); + clusterNodeIterInitMyShard(&iter); } else { - clusterNodeIterInitAllNodes(&iter, server.cluster->nodes); + clusterNodeIterInitAllNodes(&iter); } clusterNode *node; @@ -4184,13 +4185,15 @@ void clusterPropagatePublish(robj *channel, robj **messages, int count, int shar if (nodeSupportsLightMsgHdr(node)) { clusterSendMessage(node->link, msgblock_light); } else { - msgblock = clusterCreatePublishMsgBlock(channel, messages[0], + if (msgblock == NULL) { + msgblock = clusterCreatePublishMsgBlock(channel, messages[0], sharded ? CLUSTERMSG_TYPE_PUBLISHSHARD : CLUSTERMSG_TYPE_PUBLISH); + } clusterSendMessage(node->link, msgblock); - clusterMsgSendBlockDecrRefCount(msgblock); } } clusterNodeIterReset(&iter); + if (msgblock != NULL) clusterMsgSendBlockDecrRefCount(msgblock); clusterMsgSendBlockDecrRefCount(msgblock_light); } From 396fdb66187ec347ca6496ea644de49d3de3f217 Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Tue, 16 Jul 2024 22:34:10 +0000 Subject: [PATCH 22/45] fixes tests Signed-off-by: Roshan Khatri --- src/cluster_legacy.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 5d407bf10b..597d5bb363 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -3041,20 +3041,10 @@ int clusterProcessPacket(clusterLink *link) { } clusterMsg *hdr = (clusterMsg *)link->rcvbuf; + clusterNode *sender = getNodeFromLinkAndMsg(link, hdr); uint16_t type = ntohs(hdr->type); mstime_t now = mstime(); - uint16_t flags = ntohs(hdr->flags); - uint64_t sender_claimed_current_epoch = 0, sender_claimed_config_epoch = 0; - clusterNode *sender = getNodeFromLinkAndMsg(link, hdr); - int sender_claims_to_be_primary = !memcmp(hdr->replicaof, CLUSTER_NODE_NULL_NAME, CLUSTER_NAMELEN); - int sender_last_reported_as_replica = sender && nodeIsReplica(sender); - int sender_last_reported_as_primary = sender && nodeIsPrimary(sender); - - if (sender && (hdr->mflags[0] & CLUSTERMSG_FLAG0_EXT_DATA)) { - sender->flags |= CLUSTER_NODE_EXTENSIONS_SUPPORTED; - } - /* Update the last time we saw any data from this node. We * use this in order to avoid detecting a timeout from a node that * is just sending a lot of data in the cluster bus, for instance @@ -3066,6 +3056,16 @@ int clusterProcessPacket(clusterLink *link) { return pubsubProcessLightPacket(link, type); } + uint16_t flags = ntohs(hdr->flags); + uint64_t sender_claimed_current_epoch = 0, sender_claimed_config_epoch = 0; + int sender_claims_to_be_primary = !memcmp(hdr->replicaof, CLUSTER_NODE_NULL_NAME, CLUSTER_NAMELEN); + int sender_last_reported_as_replica = sender && nodeIsReplica(sender); + int sender_last_reported_as_primary = sender && nodeIsPrimary(sender); + + if (sender && (hdr->mflags[0] & CLUSTERMSG_FLAG0_EXT_DATA)) { + sender->flags |= CLUSTER_NODE_EXTENSIONS_SUPPORTED; + } + if (sender && (hdr->mflags[0] & CLUSTERMSG_FLAG0_EXT_DATA)) { sender->flags |= CLUSTER_NODE_EXTENSIONS_SUPPORTED; From 79dddd691c32697070edb112ae305511cab9dd4d Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Tue, 16 Jul 2024 22:49:41 +0000 Subject: [PATCH 23/45] correct format Signed-off-by: Roshan Khatri --- src/cluster_legacy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 597d5bb363..df5d704dc8 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -4186,8 +4186,8 @@ void clusterPropagatePublish(robj *channel, robj **messages, int count, int shar clusterSendMessage(node->link, msgblock_light); } else { if (msgblock == NULL) { - msgblock = clusterCreatePublishMsgBlock(channel, messages[0], - sharded ? CLUSTERMSG_TYPE_PUBLISHSHARD : CLUSTERMSG_TYPE_PUBLISH); + msgblock = clusterCreatePublishMsgBlock( + channel, messages[0], sharded ? CLUSTERMSG_TYPE_PUBLISHSHARD : CLUSTERMSG_TYPE_PUBLISH); } clusterSendMessage(node->link, msgblock); } From a11ae95cdfcaeb7633a05e4566a56c1b909d274d Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Tue, 16 Jul 2024 23:10:27 +0000 Subject: [PATCH 24/45] remove unsafe call to getNodeFromLinkAndMsg Signed-off-by: Roshan Khatri --- src/cluster_legacy.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index df5d704dc8..1eaa24d4fa 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -3039,23 +3039,29 @@ int clusterProcessPacket(clusterLink *link) { } return 1; } - + clusterNode *sender; clusterMsg *hdr = (clusterMsg *)link->rcvbuf; - clusterNode *sender = getNodeFromLinkAndMsg(link, hdr); uint16_t type = ntohs(hdr->type); mstime_t now = mstime(); + if (type == CLUSTERMSG_TYPE_PUBLISH_LIGHT || type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT) { + if (!link->node || nodeInHandshake(link->node)) { + freeClusterLink(link); + serverLog(LL_WARNING, "Closing link for node that sent a lightweight message of type %hu as its first message on the link", type); + return 0; + } + sender = link->node; + sender->data_received = now; + return pubsubProcessLightPacket(link, type); + } + + sender = getNodeFromLinkAndMsg(link, hdr); /* Update the last time we saw any data from this node. We * use this in order to avoid detecting a timeout from a node that * is just sending a lot of data in the cluster bus, for instance * because of Pub/Sub. */ if (sender) sender->data_received = now; - if (sender && (type == CLUSTERMSG_TYPE_PUBLISH_LIGHT || type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT) && - nodeSupportsLightMsgHdr(sender)) { - return pubsubProcessLightPacket(link, type); - } - uint16_t flags = ntohs(hdr->flags); uint64_t sender_claimed_current_epoch = 0, sender_claimed_config_epoch = 0; int sender_claims_to_be_primary = !memcmp(hdr->replicaof, CLUSTER_NODE_NULL_NAME, CLUSTER_NAMELEN); @@ -3066,7 +3072,6 @@ int clusterProcessPacket(clusterLink *link) { sender->flags |= CLUSTER_NODE_EXTENSIONS_SUPPORTED; } - if (sender && (hdr->mflags[0] & CLUSTERMSG_FLAG0_EXT_DATA)) { sender->flags |= CLUSTER_NODE_EXTENSIONS_SUPPORTED; } From 0637ca6f92ea2f0a77e0c842d7915e0775cac237 Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Tue, 16 Jul 2024 23:15:56 +0000 Subject: [PATCH 25/45] correct format Signed-off-by: Roshan Khatri --- src/cluster_legacy.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 1eaa24d4fa..ecf9502e15 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -3047,8 +3047,11 @@ int clusterProcessPacket(clusterLink *link) { if (type == CLUSTERMSG_TYPE_PUBLISH_LIGHT || type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT) { if (!link->node || nodeInHandshake(link->node)) { freeClusterLink(link); - serverLog(LL_WARNING, "Closing link for node that sent a lightweight message of type %hu as its first message on the link", type); - return 0; + serverLog( + LL_WARNING, + "Closing link for node that sent a lightweight message of type %hu as its first message on the link", + type); + return 0; } sender = link->node; sender->data_received = now; From 3d2c38e9088dc864a2be6ff25deaaffb74c33b53 Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Tue, 16 Jul 2024 23:29:38 +0000 Subject: [PATCH 26/45] correct format Signed-off-by: Roshan Khatri --- src/cluster_legacy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index ecf9502e15..e1a32a1cf5 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -3051,7 +3051,7 @@ int clusterProcessPacket(clusterLink *link) { LL_WARNING, "Closing link for node that sent a lightweight message of type %hu as its first message on the link", type); - return 0; + return 0; } sender = link->node; sender->data_received = now; From ff31e963682acdec16328015ebcdf9a37f2ead7f Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Tue, 16 Jul 2024 23:56:46 +0000 Subject: [PATCH 27/45] reduce some code delta Signed-off-by: Roshan Khatri --- src/cluster_legacy.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index e1a32a1cf5..322b899aa8 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -3039,7 +3039,7 @@ int clusterProcessPacket(clusterLink *link) { } return 1; } - clusterNode *sender; + clusterMsg *hdr = (clusterMsg *)link->rcvbuf; uint16_t type = ntohs(hdr->type); mstime_t now = mstime(); @@ -3053,20 +3053,14 @@ int clusterProcessPacket(clusterLink *link) { type); return 0; } - sender = link->node; + clusterNode *sender = link->node; sender->data_received = now; return pubsubProcessLightPacket(link, type); } - sender = getNodeFromLinkAndMsg(link, hdr); - /* Update the last time we saw any data from this node. We - * use this in order to avoid detecting a timeout from a node that - * is just sending a lot of data in the cluster bus, for instance - * because of Pub/Sub. */ - if (sender) sender->data_received = now; - uint16_t flags = ntohs(hdr->flags); uint64_t sender_claimed_current_epoch = 0, sender_claimed_config_epoch = 0; + clusterNode *sender = getNodeFromLinkAndMsg(link, hdr); int sender_claims_to_be_primary = !memcmp(hdr->replicaof, CLUSTER_NODE_NULL_NAME, CLUSTER_NAMELEN); int sender_last_reported_as_replica = sender && nodeIsReplica(sender); int sender_last_reported_as_primary = sender && nodeIsPrimary(sender); @@ -3075,15 +3069,17 @@ int clusterProcessPacket(clusterLink *link) { sender->flags |= CLUSTER_NODE_EXTENSIONS_SUPPORTED; } - if (sender && (hdr->mflags[0] & CLUSTERMSG_FLAG0_EXT_DATA)) { - sender->flags |= CLUSTER_NODE_EXTENSIONS_SUPPORTED; - } - /* Checks if the node supports light message hdr */ if (sender && (flags & CLUSTER_NODE_LIGHT_HDR_SUPPORTED)) { sender->flags |= CLUSTER_NODE_LIGHT_HDR_SUPPORTED; } + /* Update the last time we saw any data from this node. We + * use this in order to avoid detecting a timeout from a node that + * is just sending a lot of data in the cluster bus, for instance + * because of Pub/Sub. */ + if (sender) sender->data_received = now; + if (sender && !nodeInHandshake(sender)) { /* Update our currentEpoch if we see a newer epoch in the cluster. */ sender_claimed_current_epoch = ntohu64(hdr->currentEpoch); From e9066b1db021fd685181d516cf2d9c11a6654e11 Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Wed, 17 Jul 2024 17:52:41 +0000 Subject: [PATCH 28/45] Address feedback Signed-off-by: Roshan Khatri --- src/cluster_legacy.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 322b899aa8..6bab37243c 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -1259,7 +1259,7 @@ void clusterReset(int hard) { /* ----------------------------------------------------------------------------- * CLUSTER communication link * -------------------------------------------------------------------------- */ -static void *createClusterMsgSendBlock(int type, uint32_t msglen) { +static clusterMsgSendBlock *createClusterMsgSendBlock(int type, uint32_t msglen) { uint32_t blocklen = msglen + offsetof(clusterMsgSendBlock, msg); clusterMsgSendBlock *msgblock = zcalloc(blocklen); msgblock->refcount = 1; @@ -3070,8 +3070,12 @@ int clusterProcessPacket(clusterLink *link) { } /* Checks if the node supports light message hdr */ - if (sender && (flags & CLUSTER_NODE_LIGHT_HDR_SUPPORTED)) { - sender->flags |= CLUSTER_NODE_LIGHT_HDR_SUPPORTED; + if (sender) { + if (flags & CLUSTER_NODE_LIGHT_HDR_SUPPORTED) { + sender->flags |= CLUSTER_NODE_LIGHT_HDR_SUPPORTED; + } else { + sender->flags &= ~CLUSTER_NODE_LIGHT_HDR_SUPPORTED; + } } /* Update the last time we saw any data from this node. We @@ -3600,7 +3604,7 @@ void clusterLinkConnectHandler(connection *conn) { serverLog(LL_DEBUG, "Connecting with Node %.40s at %s:%d", node->name, node->ip, node->cport); } -static inline int isHeaderValid(clusterMsg *hdr) { +static inline int isSigAndMsgLenValid(clusterMsg *hdr) { if (memcmp(hdr->sig, "RCmb", 4) != 0) return 0; uint16_t type = ntohs(hdr->type); int is_msg_light = (type == CLUSTERMSG_TYPE_PUBLISH_LIGHT || type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT); @@ -3632,7 +3636,7 @@ void clusterReadHandler(connection *conn) { if (rcvbuflen == RCVBUF_MIN_READ_LEN) { /* Perform some sanity check on the message signature * and length. */ - if (!isHeaderValid(hdr)) { + if (!isSigAndMsgLenValid(hdr)) { char ip[NET_IP_STR_LEN]; int port; if (connAddrPeerName(conn, ip, sizeof(ip), &port) == -1) { @@ -4185,7 +4189,6 @@ void clusterPropagatePublish(robj *channel, robj **messages, int count, int shar clusterNode *node; while ((node = clusterNodeIterNext(&iter)) != NULL) { if (node->flags & (CLUSTER_NODE_MYSELF | CLUSTER_NODE_HANDSHAKE)) continue; - /* If the cluster is not marked as homogeneous send respective message blocks to all nodes. */ if (nodeSupportsLightMsgHdr(node)) { clusterSendMessage(node->link, msgblock_light); } else { From 562e874b354cac30a37d24f476a813e84e9d44ba Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Thu, 18 Jul 2024 03:02:32 +0000 Subject: [PATCH 29/45] Address feedback Signed-off-by: Roshan Khatri --- src/cluster_legacy.c | 17 ++++++++++------- src/cluster_legacy.h | 3 +++ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 6bab37243c..6f799d0e0e 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -222,7 +222,10 @@ clusterNode *clusterNodeIterNext(ClusterNodeIterator *iter) { return ln ? listNodeValue(ln) : NULL; } /* This line is unreachable but added to avoid compiler warnings */ - default: serverPanic("bad type"); return NULL; + default: { + serverPanic("bad type"); + return NULL; + } } } @@ -2983,12 +2986,13 @@ int clusterIsValidPacket(clusterLink *link) { explen = sizeof(clusterMsg) - sizeof(union clusterMsgData); explen += sizeof(clusterMsgDataPublish) - 8 + ntohl(hdr->data.publish.msg.channel_len) + ntohl(hdr->data.publish.msg.message_len); - } else if (type == CLUSTERMSG_TYPE_PUBLISH_LIGHT || type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT) { + } else if (IS_PUBSUB_LIGHT_MESSAGE) { clusterMsgLight *hdr_pubsub = (clusterMsgLight *)link->rcvbuf; explen = sizeof(clusterMsgLight) - sizeof(union clusterMsgDataLight); explen += sizeof(clusterMsgDataPublishMulti); uint64_t data_count = ntohu64(hdr_pubsub->data.publish.msg.data_count); - explen += ((data_count) * (sizeof(clusterMsgDataPublishMessage) - 8)); + explen += ((data_count) * (sizeof(clusterMsgDataPublishMessage) - + (sizeof(((clusterMsgDataPublishMessage *)0)->message_data)))); clusterMsgDataPublishMessage *msg_data = getFirstPublishMessage(&hdr_pubsub->data.publish.msg); while (data_count--) { uint32_t msglen = getPublishMessageLength(msg_data); @@ -3044,7 +3048,7 @@ int clusterProcessPacket(clusterLink *link) { uint16_t type = ntohs(hdr->type); mstime_t now = mstime(); - if (type == CLUSTERMSG_TYPE_PUBLISH_LIGHT || type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT) { + if (IS_PUBSUB_LIGHT_MESSAGE) { if (!link->node || nodeInHandshake(link->node)) { freeClusterLink(link); serverLog( @@ -3607,9 +3611,8 @@ void clusterLinkConnectHandler(connection *conn) { static inline int isSigAndMsgLenValid(clusterMsg *hdr) { if (memcmp(hdr->sig, "RCmb", 4) != 0) return 0; uint16_t type = ntohs(hdr->type); - int is_msg_light = (type == CLUSTERMSG_TYPE_PUBLISH_LIGHT || type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT); uint32_t totlen = ntohl(hdr->totlen); - uint32_t minlen = is_msg_light ? CLUSTERMSG_LIGHT_MIN_LEN : CLUSTERMSG_MIN_LEN; + uint32_t minlen = IS_PUBSUB_LIGHT_MESSAGE ? CLUSTERMSG_LIGHT_MIN_LEN : CLUSTERMSG_MIN_LEN; if (totlen < minlen) return 0; return 1; } @@ -3754,7 +3757,7 @@ static void clusterBuildMessageHdr(clusterMsg *hdr, int type, size_t msglen) { hdr->type = htons(type); hdr->totlen = htonl(msglen); - if (type == CLUSTERMSG_TYPE_PUBLISH_LIGHT || type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT) { + if (IS_PUBSUB_LIGHT_MESSAGE) { clusterMsgLight *hdr_light = (clusterMsgLight *)hdr; hdr_light->notused1 = 0; return; diff --git a/src/cluster_legacy.h b/src/cluster_legacy.h index 2b83fc89fc..8ff38a87e7 100644 --- a/src/cluster_legacy.h +++ b/src/cluster_legacy.h @@ -96,6 +96,9 @@ typedef struct clusterNodeFailReport { #define CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT 12 /* Pub/Sub Publish shard propagation using light header*/ #define CLUSTERMSG_TYPE_COUNT 13 /* Total number of message types. */ +/* Macros for condition which are repeadedly used. */ +#define IS_PUBSUB_LIGHT_MESSAGE (type == CLUSTERMSG_TYPE_PUBLISH_LIGHT || type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT) + /* Initially we don't know our "name", but we'll find it once we connect * to the first node, using the getsockname() function. Then we'll use this * address for all the next messages. */ From c6f9bb5103d4cb99860e18af3554822249d125cd Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Thu, 18 Jul 2024 03:07:05 +0000 Subject: [PATCH 30/45] spell check Signed-off-by: Roshan Khatri --- src/cluster_legacy.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cluster_legacy.h b/src/cluster_legacy.h index 8ff38a87e7..8b192a5419 100644 --- a/src/cluster_legacy.h +++ b/src/cluster_legacy.h @@ -96,7 +96,7 @@ typedef struct clusterNodeFailReport { #define CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT 12 /* Pub/Sub Publish shard propagation using light header*/ #define CLUSTERMSG_TYPE_COUNT 13 /* Total number of message types. */ -/* Macros for condition which are repeadedly used. */ +/* Macros for condition which are repeatedly used. */ #define IS_PUBSUB_LIGHT_MESSAGE (type == CLUSTERMSG_TYPE_PUBLISH_LIGHT || type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT) /* Initially we don't know our "name", but we'll find it once we connect From aac78940217bd68f0368a93e32fd226d81a2804a Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Thu, 18 Jul 2024 03:17:48 +0000 Subject: [PATCH 31/45] Address feedback Signed-off-by: Roshan Khatri --- src/cluster_legacy.c | 8 ++++---- src/cluster_legacy.h | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 6f799d0e0e..d352ccc35d 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -2986,7 +2986,7 @@ int clusterIsValidPacket(clusterLink *link) { explen = sizeof(clusterMsg) - sizeof(union clusterMsgData); explen += sizeof(clusterMsgDataPublish) - 8 + ntohl(hdr->data.publish.msg.channel_len) + ntohl(hdr->data.publish.msg.message_len); - } else if (IS_PUBSUB_LIGHT_MESSAGE) { + } else if (IS_PUBSUB_LIGHT_MESSAGE(type)) { clusterMsgLight *hdr_pubsub = (clusterMsgLight *)link->rcvbuf; explen = sizeof(clusterMsgLight) - sizeof(union clusterMsgDataLight); explen += sizeof(clusterMsgDataPublishMulti); @@ -3048,7 +3048,7 @@ int clusterProcessPacket(clusterLink *link) { uint16_t type = ntohs(hdr->type); mstime_t now = mstime(); - if (IS_PUBSUB_LIGHT_MESSAGE) { + if (IS_PUBSUB_LIGHT_MESSAGE(type)) { if (!link->node || nodeInHandshake(link->node)) { freeClusterLink(link); serverLog( @@ -3612,7 +3612,7 @@ static inline int isSigAndMsgLenValid(clusterMsg *hdr) { if (memcmp(hdr->sig, "RCmb", 4) != 0) return 0; uint16_t type = ntohs(hdr->type); uint32_t totlen = ntohl(hdr->totlen); - uint32_t minlen = IS_PUBSUB_LIGHT_MESSAGE ? CLUSTERMSG_LIGHT_MIN_LEN : CLUSTERMSG_MIN_LEN; + uint32_t minlen = IS_PUBSUB_LIGHT_MESSAGE(type) ? CLUSTERMSG_LIGHT_MIN_LEN : CLUSTERMSG_MIN_LEN; if (totlen < minlen) return 0; return 1; } @@ -3757,7 +3757,7 @@ static void clusterBuildMessageHdr(clusterMsg *hdr, int type, size_t msglen) { hdr->type = htons(type); hdr->totlen = htonl(msglen); - if (IS_PUBSUB_LIGHT_MESSAGE) { + if (IS_PUBSUB_LIGHT_MESSAGE(type)) { clusterMsgLight *hdr_light = (clusterMsgLight *)hdr; hdr_light->notused1 = 0; return; diff --git a/src/cluster_legacy.h b/src/cluster_legacy.h index 8b192a5419..7634cb608e 100644 --- a/src/cluster_legacy.h +++ b/src/cluster_legacy.h @@ -96,8 +96,8 @@ typedef struct clusterNodeFailReport { #define CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT 12 /* Pub/Sub Publish shard propagation using light header*/ #define CLUSTERMSG_TYPE_COUNT 13 /* Total number of message types. */ -/* Macros for condition which are repeatedly used. */ -#define IS_PUBSUB_LIGHT_MESSAGE (type == CLUSTERMSG_TYPE_PUBLISH_LIGHT || type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT) +#define IS_PUBSUB_LIGHT_MESSAGE(type) \ + (type == CLUSTERMSG_TYPE_PUBLISH_LIGHT || type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT) /* Initially we don't know our "name", but we'll find it once we connect * to the first node, using the getsockname() function. Then we'll use this From f0db157ec504d801f4f02547e2584b536764ad64 Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Thu, 18 Jul 2024 19:18:56 +0000 Subject: [PATCH 32/45] Address feedback Signed-off-by: Roshan Khatri --- src/cluster_legacy.c | 2 +- src/pubsub.c | 12 ++++++------ src/server.h | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index d352ccc35d..53545dc8fc 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -3052,7 +3052,7 @@ int clusterProcessPacket(clusterLink *link) { if (!link->node || nodeInHandshake(link->node)) { freeClusterLink(link); serverLog( - LL_WARNING, + LL_NOTICE, "Closing link for node that sent a lightweight message of type %hu as its first message on the link", type); return 0; diff --git a/src/pubsub.c b/src/pubsub.c index eab2485cae..27496c7706 100644 --- a/src/pubsub.c +++ b/src/pubsub.c @@ -533,7 +533,7 @@ int pubsubPublishMessage(robj *channel, robj *message, int sharded) { } /* Publish messages to all the subscribers. */ -int pubsubPublishMessages(robj *channel, robj **messages, int count, int sharded) { +int pubsubPublishMultiMessages(robj *channel, robj **messages, int count, int sharded) { int total_receivers = 0; for (int i = 0; i < count; i++) { total_receivers += pubsubPublishMessage(channel, messages[i], sharded); @@ -612,14 +612,14 @@ void punsubscribeCommand(client *c) { /* This function wraps pubsubPublishMessage and also propagates the message to cluster. * Used by the commands PUBLISH/SPUBLISH and their respective module APIs.*/ -int pubsubPublishMessagesAndPropagateToCluster(robj *channel, robj **messages, int count, int sharded) { - int receivers = pubsubPublishMessages(channel, messages, count, sharded); +int pubsubPublishMultiMessagesAndPropagateToCluster(robj *channel, robj **messages, int count, int sharded) { + int receivers = pubsubPublishMultiMessages(channel, messages, count, sharded); if (server.cluster_enabled) clusterPropagatePublish(channel, messages, count, sharded); return receivers; } int pubsubPublishMessageAndPropagateToCluster(robj *channel, robj *message, int sharded) { - return pubsubPublishMessagesAndPropagateToCluster(channel, &message, 1, sharded); + return pubsubPublishMultiMessagesAndPropagateToCluster(channel, &message, 1, sharded); } /* PUBLISH */ @@ -629,7 +629,7 @@ void publishCommand(client *c) { return; } - int receivers = pubsubPublishMessagesAndPropagateToCluster(c->argv[1], &c->argv[2], 1, 0); + int receivers = pubsubPublishMultiMessagesAndPropagateToCluster(c->argv[1], &c->argv[2], 1, 0); if (!server.cluster_enabled) forceCommandPropagation(c, PROPAGATE_REPL); addReplyLongLong(c, receivers); } @@ -715,7 +715,7 @@ void channelList(client *c, sds pat, kvstore *pubsub_channels) { /* SPUBLISH */ void spublishCommand(client *c) { - int receivers = pubsubPublishMessagesAndPropagateToCluster(c->argv[1], &c->argv[2], 1, 1); + int receivers = pubsubPublishMultiMessagesAndPropagateToCluster(c->argv[1], &c->argv[2], 1, 1); if (!server.cluster_enabled) forceCommandPropagation(c, PROPAGATE_REPL); addReplyLongLong(c, receivers); } diff --git a/src/server.h b/src/server.h index cc072379cc..ae3e67ccc2 100644 --- a/src/server.h +++ b/src/server.h @@ -3289,9 +3289,9 @@ int pubsubUnsubscribeShardAllChannels(client *c, int notify); void pubsubShardUnsubscribeAllChannelsInSlot(unsigned int slot); int pubsubUnsubscribeAllPatterns(client *c, int notify); int pubsubPublishMessage(robj *channel, robj *message, int sharded); -int pubsubPublishMessages(robj *channel, robj **messages, int count, int sharded); +int pubsubPublishMultiMessages(robj *channel, robj **messages, int count, int sharded); int pubsubPublishMessageAndPropagateToCluster(robj *channel, robj *message, int sharded); -int pubsubPublishMessagesAndPropagateToCluster(robj *channel, robj **messages, int count, int sharded); +int pubsubPublishMultiMessagesAndPropagateToCluster(robj *channel, robj **messages, int count, int sharded); void addReplyPubsubMessage(client *c, robj *channel, robj *msg, robj *message_bulk); int serverPubsubSubscriptionCount(void); int serverPubsubShardSubscriptionCount(void); From 666132f744f5a729aa94b4688b832294fd901ce8 Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Thu, 18 Jul 2024 20:10:37 +0000 Subject: [PATCH 33/45] Address feedback Signed-off-by: Roshan Khatri --- src/cluster_legacy.c | 17 +++++++++-------- src/cluster_legacy.h | 5 +++-- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 53545dc8fc..de280357ac 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -2899,15 +2899,15 @@ static uint32_t getPublishMessageLength(clusterMsgDataPublishMessage *cursor) { return msg_length; } -int pubsubProcessLightPacket(clusterLink *link, uint16_t type) { +int processLightPacket(clusterLink *link, uint16_t type) { clusterMsgLight *hdr = (clusterMsgLight *)link->rcvbuf; - robj *channel, *message; - uint64_t data_count; /* Don't bother creating useless objects if there are no * Pub/Sub subscribers. */ if ((type == CLUSTERMSG_TYPE_PUBLISH_LIGHT && serverPubsubSubscriptionCount() > 0) || (type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT && serverPubsubShardSubscriptionCount() > 0)) { + robj *channel, *message; + uint64_t data_count; data_count = ntohu64(hdr->data.publish.msg.data_count); clusterMsgDataPublishMessage *cursor = getFirstPublishMessage(&hdr->data.publish.msg); channel = readPublishMessageFromCursor(cursor); @@ -2986,7 +2986,7 @@ int clusterIsValidPacket(clusterLink *link) { explen = sizeof(clusterMsg) - sizeof(union clusterMsgData); explen += sizeof(clusterMsgDataPublish) - 8 + ntohl(hdr->data.publish.msg.channel_len) + ntohl(hdr->data.publish.msg.message_len); - } else if (IS_PUBSUB_LIGHT_MESSAGE(type)) { + } else if (type == CLUSTERMSG_TYPE_PUBLISH_LIGHT || type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT) { clusterMsgLight *hdr_pubsub = (clusterMsgLight *)link->rcvbuf; explen = sizeof(clusterMsgLight) - sizeof(union clusterMsgDataLight); explen += sizeof(clusterMsgDataPublishMulti); @@ -3048,7 +3048,7 @@ int clusterProcessPacket(clusterLink *link) { uint16_t type = ntohs(hdr->type); mstime_t now = mstime(); - if (IS_PUBSUB_LIGHT_MESSAGE(type)) { + if (IS_LIGHT_MESSAGE(type)) { if (!link->node || nodeInHandshake(link->node)) { freeClusterLink(link); serverLog( @@ -3059,7 +3059,7 @@ int clusterProcessPacket(clusterLink *link) { } clusterNode *sender = link->node; sender->data_received = now; - return pubsubProcessLightPacket(link, type); + return processLightPacket(link, type); } uint16_t flags = ntohs(hdr->flags); @@ -3612,7 +3612,7 @@ static inline int isSigAndMsgLenValid(clusterMsg *hdr) { if (memcmp(hdr->sig, "RCmb", 4) != 0) return 0; uint16_t type = ntohs(hdr->type); uint32_t totlen = ntohl(hdr->totlen); - uint32_t minlen = IS_PUBSUB_LIGHT_MESSAGE(type) ? CLUSTERMSG_LIGHT_MIN_LEN : CLUSTERMSG_MIN_LEN; + uint32_t minlen = IS_LIGHT_MESSAGE(type) ? CLUSTERMSG_LIGHT_MIN_LEN : CLUSTERMSG_MIN_LEN; if (totlen < minlen) return 0; return 1; } @@ -3757,9 +3757,10 @@ static void clusterBuildMessageHdr(clusterMsg *hdr, int type, size_t msglen) { hdr->type = htons(type); hdr->totlen = htonl(msglen); - if (IS_PUBSUB_LIGHT_MESSAGE(type)) { + if (IS_LIGHT_MESSAGE(type)) { clusterMsgLight *hdr_light = (clusterMsgLight *)hdr; hdr_light->notused1 = 0; + hdr_light->notused2 = 0; return; } diff --git a/src/cluster_legacy.h b/src/cluster_legacy.h index 7634cb608e..2b41f6239a 100644 --- a/src/cluster_legacy.h +++ b/src/cluster_legacy.h @@ -96,8 +96,7 @@ typedef struct clusterNodeFailReport { #define CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT 12 /* Pub/Sub Publish shard propagation using light header*/ #define CLUSTERMSG_TYPE_COUNT 13 /* Total number of message types. */ -#define IS_PUBSUB_LIGHT_MESSAGE(type) \ - (type == CLUSTERMSG_TYPE_PUBLISH_LIGHT || type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT) +#define IS_LIGHT_MESSAGE(type) (type == CLUSTERMSG_TYPE_PUBLISH_LIGHT || type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT) /* Initially we don't know our "name", but we'll find it once we connect * to the first node, using the getsockname() function. Then we'll use this @@ -320,6 +319,7 @@ typedef struct { uint16_t ver; /* Protocol version, currently set to CLUSTER_PROTO_VER. */ uint16_t notused1; uint16_t type; /* Message type */ + uint16_t notused2; union clusterMsgDataLight data; } clusterMsgLight; @@ -328,6 +328,7 @@ static_assert(offsetof(clusterMsgLight, totlen) == offsetof(clusterMsg, totlen), static_assert(offsetof(clusterMsgLight, ver) == offsetof(clusterMsg, ver), "unexpected field offset"); static_assert(offsetof(clusterMsgLight, notused1) == offsetof(clusterMsg, port), "unexpected field offset"); static_assert(offsetof(clusterMsgLight, type) == offsetof(clusterMsg, type), "unexpected field offset"); +static_assert(offsetof(clusterMsgLight, notused2) == offsetof(clusterMsg, count), "unexpected field offset"); static_assert(offsetof(clusterMsgLight, data) == 16, "unexpected field offset"); #define CLUSTERMSG_LIGHT_MIN_LEN (sizeof(clusterMsgLight) - sizeof(union clusterMsgDataLight)) From 0a91a05e0ae9ac22b247132227377ae3d8963808 Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Sun, 21 Jul 2024 23:46:17 +0000 Subject: [PATCH 34/45] Keeps only light header and removes multi code This commit only adds the new format, removes the PUBLISH_LIGHT message types and sets MSB for any light msg type. Signed-off-by: Roshan Khatri --- src/cluster.h | 2 +- src/cluster_legacy.c | 193 ++++++++++++++----------------------------- src/cluster_legacy.h | 30 ++----- src/pubsub.c | 23 ++---- 4 files changed, 72 insertions(+), 176 deletions(-) diff --git a/src/cluster.h b/src/cluster.h index 6f93563650..b52a647d47 100644 --- a/src/cluster.h +++ b/src/cluster.h @@ -54,7 +54,7 @@ void clusterUpdateMyselfHostname(void); void clusterUpdateMyselfAnnouncedPorts(void); void clusterUpdateMyselfHumanNodename(void); -void clusterPropagatePublish(robj *channel, robj **messages, int count, int sharded); +void clusterPropagatePublish(robj *channel, robj *messages, int sharded); unsigned long getClusterConnectionsCount(void); int isClusterHealthy(void); diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index de280357ac..02073e1d31 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -2869,58 +2869,30 @@ static clusterNode *getNodeFromLinkAndMsg(clusterLink *link, clusterMsg *hdr) { return sender; } -static clusterMsgDataPublishMessage *getFirstPublishMessage(clusterMsgDataPublishMulti *msg) { - clusterMsgDataPublishMessage *initial = (clusterMsgDataPublishMessage *)&(msg->bulk_data); - return initial; -} - -static clusterMsgDataPublishMessage *getPublishMessageNext(clusterMsgDataPublishMessage *message_cursor) { - clusterMsgDataPublishMessage *next = - (clusterMsgDataPublishMessage *)((char *)message_cursor->message_data + ntohl(message_cursor->message_len)); - return next; -} - -void writePublishMessageToCursor(clusterMsgDataPublishMessage *cursor, robj *data) { - uint32_t data_len = sdslen(data->ptr); - cursor->message_len = htonl(data_len); - memcpy(cursor->message_data, data->ptr, data_len); - return; -} - -static robj *readPublishMessageFromCursor(clusterMsgDataPublishMessage *cursor) { - uint32_t data_len; - data_len = ntohl(cursor->message_len); - robj *data = (createStringObject((char *)cursor->message_data, data_len)); - return data; -} +void processPublishPacket(clusterMsgDataPublish *publish_data, uint16_t type) { + robj *channel, *message; + uint32_t channel_len, message_len; -static uint32_t getPublishMessageLength(clusterMsgDataPublishMessage *cursor) { - uint32_t msg_length = ntohl(cursor->message_len); - return msg_length; + /* Don't bother creating useless objects if there are no + * Pub/Sub subscribers. */ + if ((type == CLUSTERMSG_TYPE_PUBLISH && serverPubsubSubscriptionCount() > 0) || + (type == CLUSTERMSG_TYPE_PUBLISHSHARD && serverPubsubShardSubscriptionCount() > 0)) { + channel_len = ntohl(publish_data->channel_len); + message_len = ntohl(publish_data->message_len); + channel = createStringObject((char *)publish_data->bulk_data, channel_len); + message = createStringObject((char *)publish_data->bulk_data + channel_len, message_len); + pubsubPublishMessage(channel, message, type == CLUSTERMSG_TYPE_PUBLISHSHARD); + decrRefCount(channel); + decrRefCount(message); + } } -int processLightPacket(clusterLink *link, uint16_t type) { +void processLightPacket(clusterLink *link, uint16_t type) { clusterMsgLight *hdr = (clusterMsgLight *)link->rcvbuf; - /* Don't bother creating useless objects if there are no - * Pub/Sub subscribers. */ - if ((type == CLUSTERMSG_TYPE_PUBLISH_LIGHT && serverPubsubSubscriptionCount() > 0) || - (type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT && serverPubsubShardSubscriptionCount() > 0)) { - robj *channel, *message; - uint64_t data_count; - data_count = ntohu64(hdr->data.publish.msg.data_count); - clusterMsgDataPublishMessage *cursor = getFirstPublishMessage(&hdr->data.publish.msg); - channel = readPublishMessageFromCursor(cursor); - data_count -= 1; - while (data_count--) { - cursor = getPublishMessageNext(cursor); - message = readPublishMessageFromCursor(cursor); - pubsubPublishMessage(channel, message, type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT); - decrRefCount(message); - } - decrRefCount(channel); + if (type == CLUSTERMSG_TYPE_PUBLISH || type == CLUSTERMSG_TYPE_PUBLISHSHARD) { + processPublishPacket(&hdr->data.publish.msg, type); } - return 1; } @@ -2928,6 +2900,10 @@ int clusterIsValidPacket(clusterLink *link) { clusterMsg *hdr = (clusterMsg *)link->rcvbuf; uint32_t totlen = ntohl(hdr->totlen); uint16_t type = ntohs(hdr->type); + int is_light = IS_LIGHT_MESSAGE(type); + if (is_light) { + type &= ~CLUSTERMSG_MSB; + } if (type < CLUSTERMSG_TYPE_COUNT) server.cluster->stats_bus_messages_received[type]++; @@ -2983,22 +2959,18 @@ int clusterIsValidPacket(clusterLink *link) { explen = sizeof(clusterMsg) - sizeof(union clusterMsgData); explen += sizeof(clusterMsgDataFail); } else if (type == CLUSTERMSG_TYPE_PUBLISH || type == CLUSTERMSG_TYPE_PUBLISHSHARD) { - explen = sizeof(clusterMsg) - sizeof(union clusterMsgData); - explen += sizeof(clusterMsgDataPublish) - 8 + ntohl(hdr->data.publish.msg.channel_len) + - ntohl(hdr->data.publish.msg.message_len); - } else if (type == CLUSTERMSG_TYPE_PUBLISH_LIGHT || type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT) { - clusterMsgLight *hdr_pubsub = (clusterMsgLight *)link->rcvbuf; - explen = sizeof(clusterMsgLight) - sizeof(union clusterMsgDataLight); - explen += sizeof(clusterMsgDataPublishMulti); - uint64_t data_count = ntohu64(hdr_pubsub->data.publish.msg.data_count); - explen += ((data_count) * (sizeof(clusterMsgDataPublishMessage) - - (sizeof(((clusterMsgDataPublishMessage *)0)->message_data)))); - clusterMsgDataPublishMessage *msg_data = getFirstPublishMessage(&hdr_pubsub->data.publish.msg); - while (data_count--) { - uint32_t msglen = getPublishMessageLength(msg_data); - explen += msglen; - msg_data = getPublishMessageNext(msg_data); + clusterMsgDataPublish *publish_data; + if (is_light) { + clusterMsgLight *hdr_light = (clusterMsgLight *)link->rcvbuf; + publish_data = &hdr_light->data.publish.msg; + explen = sizeof(clusterMsgLight); + } else { + publish_data = &hdr->data.publish.msg; + explen = sizeof(clusterMsg); } + explen -= sizeof(union clusterMsgData); + explen += sizeof(clusterMsgDataPublish) - 8 + ntohl(publish_data->channel_len) + + ntohl(publish_data->message_len); } else if (type == CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST || type == CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK || type == CLUSTERMSG_TYPE_MFSTART) { explen = sizeof(clusterMsg) - sizeof(union clusterMsgData); @@ -3047,8 +3019,10 @@ int clusterProcessPacket(clusterLink *link) { clusterMsg *hdr = (clusterMsg *)link->rcvbuf; uint16_t type = ntohs(hdr->type); mstime_t now = mstime(); + int is_light = IS_LIGHT_MESSAGE(type); - if (IS_LIGHT_MESSAGE(type)) { + if (is_light) { + type &= ~CLUSTERMSG_MSB; if (!link->node || nodeInHandshake(link->node)) { freeClusterLink(link); serverLog( @@ -3059,7 +3033,8 @@ int clusterProcessPacket(clusterLink *link) { } clusterNode *sender = link->node; sender->data_received = now; - return processLightPacket(link, type); + processLightPacket(link, type); + return 1; } uint16_t flags = ntohs(hdr->flags); @@ -3428,22 +3403,7 @@ int clusterProcessPacket(clusterLink *link) { } } else if (type == CLUSTERMSG_TYPE_PUBLISH || type == CLUSTERMSG_TYPE_PUBLISHSHARD) { if (!sender) return 1; /* We don't know that node. */ - - robj *channel, *message; - uint32_t channel_len, message_len; - - /* Don't bother creating useless objects if there are no - * Pub/Sub subscribers. */ - if ((type == CLUSTERMSG_TYPE_PUBLISH && serverPubsubSubscriptionCount() > 0) || - (type == CLUSTERMSG_TYPE_PUBLISHSHARD && serverPubsubShardSubscriptionCount() > 0)) { - channel_len = ntohl(hdr->data.publish.msg.channel_len); - message_len = ntohl(hdr->data.publish.msg.message_len); - channel = createStringObject((char *)hdr->data.publish.msg.bulk_data, channel_len); - message = createStringObject((char *)hdr->data.publish.msg.bulk_data + channel_len, message_len); - pubsubPublishMessage(channel, message, type == CLUSTERMSG_TYPE_PUBLISHSHARD); - decrRefCount(channel); - decrRefCount(message); - } + processPublishPacket(&hdr->data.publish.msg, type); } else if (type == CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST) { if (!sender) return 1; /* We don't know that node. */ clusterSendFailoverAuthIfNeeded(sender, hdr); @@ -4020,62 +3980,35 @@ void clusterBroadcastPong(int target) { * the 'bulk_data', sanitizer generates an out-of-bounds error which is a false * positive in this context. */ VALKEY_NO_SANITIZE("bounds") -clusterMsgSendBlock *clusterCreatePublishMsgBlock(robj *channel, robj *message, uint16_t type) { +clusterMsgSendBlock *clusterCreatePublishMsgBlock(robj *channel, robj *message, int is_light, int is_sharded) { uint32_t channel_len, message_len; + uint16_t type = is_sharded ? CLUSTERMSG_TYPE_PUBLISHSHARD : CLUSTERMSG_TYPE_PUBLISH; + type = is_light? (type | CLUSTERMSG_MSB) : type; channel = getDecodedObject(channel); message = getDecodedObject(message); channel_len = sdslen(channel->ptr); message_len = sdslen(message->ptr); - size_t msglen = sizeof(clusterMsg) - sizeof(union clusterMsgData); + size_t msglen = is_light? sizeof(clusterMsgLight) : sizeof(clusterMsg); + msglen -= sizeof(union clusterMsgData); msglen += sizeof(clusterMsgDataPublish) - 8 + channel_len + message_len; clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(type, msglen); - - clusterMsg *hdr = &msgblock->msg; - hdr->data.publish.msg.channel_len = htonl(channel_len); - hdr->data.publish.msg.message_len = htonl(message_len); - memcpy(hdr->data.publish.msg.bulk_data, channel->ptr, sdslen(channel->ptr)); - memcpy(hdr->data.publish.msg.bulk_data + sdslen(channel->ptr), message->ptr, sdslen(message->ptr)); - - decrRefCount(channel); - decrRefCount(message); - - return msgblock; -} - -clusterMsgSendBlock *clusterCreatePublishLightMsgBlock(robj *channel, robj **messages, int count, uint16_t type) { - uint32_t channel_len, message_len; - int i; - - channel = getDecodedObject(channel); - channel_len = sdslen(channel->ptr); - - uint32_t aggregated_msg_len = 0; - aggregated_msg_len += channel_len; - for (i = 0; i < count; i++) { - messages[i] = getDecodedObject(messages[i]); - message_len = sdslen(messages[i]->ptr); - aggregated_msg_len += message_len; + clusterMsgDataPublish *hdr_data_msg; + if (is_light) { + clusterMsgLight *hdr_light = &msgblock->light_msg; + hdr_data_msg = &hdr_light->data.publish.msg; + } else { + clusterMsg *hdr = &msgblock->msg; + hdr_data_msg = &hdr->data.publish.msg; } + hdr_data_msg->channel_len = htonl(channel_len); + hdr_data_msg->message_len = htonl(message_len); + memcpy(hdr_data_msg->bulk_data, channel->ptr, sdslen(channel->ptr)); + memcpy(hdr_data_msg->bulk_data + sdslen(channel->ptr), message->ptr, sdslen(message->ptr)); - size_t msglen = sizeof(clusterMsgLight) - sizeof(union clusterMsgDataLight); - msglen += sizeof(clusterMsgDataPublishMulti); - msglen += ((count + 1) * - (sizeof(clusterMsgDataPublishMessage) - (sizeof(((clusterMsgDataPublishMessage *)0)->message_data)))); - msglen += aggregated_msg_len; - clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(type, msglen); - - clusterMsgLight *hdr = &msgblock->light_msg; - hdr->data.publish.msg.data_count = htonu64(count + 1); - clusterMsgDataPublishMessage *cursor = getFirstPublishMessage(&hdr->data.publish.msg); - writePublishMessageToCursor(cursor, channel); - for (i = 0; i < count; i++) { - cursor = getPublishMessageNext(cursor); - writePublishMessageToCursor(cursor, messages[i]); - decrRefCount(messages[i]); - } decrRefCount(channel); + decrRefCount(message); return msgblock; } @@ -4173,14 +4106,10 @@ int clusterSendModuleMessageToTarget(const char *target, * Otherwise: * Publish this message across the slot (primary/replica). * -------------------------------------------------------------------------- */ -void clusterPropagatePublish(robj *channel, robj **messages, int count, int sharded) { - /* Currently publish command does not support multiple payload. */ - serverAssert(count == 1); - +void clusterPropagatePublish(robj *channel, robj *message, int sharded) { clusterMsgSendBlock *msgblock, *msgblock_light; - - msgblock_light = clusterCreatePublishLightMsgBlock( - channel, messages, count, sharded ? CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT : CLUSTERMSG_TYPE_PUBLISH_LIGHT); + msgblock_light = clusterCreatePublishMsgBlock( + channel, message, 1, sharded); /* We will only create msgblock with normal hdr if there are any nodes that do not support light hdr */ msgblock = NULL; ClusterNodeIterator iter; @@ -4198,7 +4127,7 @@ void clusterPropagatePublish(robj *channel, robj **messages, int count, int shar } else { if (msgblock == NULL) { msgblock = clusterCreatePublishMsgBlock( - channel, messages[0], sharded ? CLUSTERMSG_TYPE_PUBLISHSHARD : CLUSTERMSG_TYPE_PUBLISH); + channel, message, 0, sharded ? CLUSTERMSG_TYPE_PUBLISHSHARD : CLUSTERMSG_TYPE_PUBLISH); } clusterSendMessage(node->link, msgblock); } @@ -5750,8 +5679,6 @@ const char *clusterGetMessageTypeString(int type) { case CLUSTERMSG_TYPE_FAIL: return "fail"; case CLUSTERMSG_TYPE_PUBLISH: return "publish"; case CLUSTERMSG_TYPE_PUBLISHSHARD: return "publishshard"; - case CLUSTERMSG_TYPE_PUBLISH_LIGHT: return "publish-light"; - case CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT: return "publishshard-light"; case CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST: return "auth-req"; case CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK: return "auth-ack"; case CLUSTERMSG_TYPE_UPDATE: return "update"; diff --git a/src/cluster_legacy.h b/src/cluster_legacy.h index 2b41f6239a..98eb8ef4fe 100644 --- a/src/cluster_legacy.h +++ b/src/cluster_legacy.h @@ -92,11 +92,11 @@ typedef struct clusterNodeFailReport { #define CLUSTERMSG_TYPE_MFSTART 8 /* Pause clients for manual failover */ #define CLUSTERMSG_TYPE_MODULE 9 /* Module cluster API message. */ #define CLUSTERMSG_TYPE_PUBLISHSHARD 10 /* Pub/Sub Publish shard propagation */ -#define CLUSTERMSG_TYPE_PUBLISH_LIGHT 11 /* Pub/Sub Publish propagation using light header*/ -#define CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT 12 /* Pub/Sub Publish shard propagation using light header*/ -#define CLUSTERMSG_TYPE_COUNT 13 /* Total number of message types. */ +#define CLUSTERMSG_TYPE_COUNT 11 /* Total number of message types. */ +#define CLUSTERMSG_MSB 0x8000 -#define IS_LIGHT_MESSAGE(type) (type == CLUSTERMSG_TYPE_PUBLISH_LIGHT || type == CLUSTERMSG_TYPE_PUBLISHSHARD_LIGHT) +/* For the message with light header, we will set the MSB.*/ +#define IS_LIGHT_MESSAGE(type) ((type) & CLUSTERMSG_MSB) /* Initially we don't know our "name", but we'll find it once we connect * to the first node, using the getsockname() function. Then we'll use this @@ -224,24 +224,6 @@ union clusterMsgData { } module; }; -/* Light message data and it's supported message types. */ -typedef struct { - uint32_t message_len; - unsigned char message_data[8]; /* 8 bytes just as placeholder. */ -} clusterMsgDataPublishMessage; - -typedef struct { - uint64_t data_count; - clusterMsgDataPublishMessage *bulk_data; -} clusterMsgDataPublishMulti; - -union clusterMsgDataLight { - /* PUBLISH */ - struct { - clusterMsgDataPublishMulti msg; - } publish; -}; - #define CLUSTER_PROTO_VER 1 /* Cluster bus protocol version. */ typedef struct { @@ -320,7 +302,7 @@ typedef struct { uint16_t notused1; uint16_t type; /* Message type */ uint16_t notused2; - union clusterMsgDataLight data; + union clusterMsgData data; } clusterMsgLight; static_assert(offsetof(clusterMsgLight, sig) == offsetof(clusterMsg, sig), "unexpected field offset"); @@ -331,7 +313,7 @@ static_assert(offsetof(clusterMsgLight, type) == offsetof(clusterMsg, type), "un static_assert(offsetof(clusterMsgLight, notused2) == offsetof(clusterMsg, count), "unexpected field offset"); static_assert(offsetof(clusterMsgLight, data) == 16, "unexpected field offset"); -#define CLUSTERMSG_LIGHT_MIN_LEN (sizeof(clusterMsgLight) - sizeof(union clusterMsgDataLight)) +#define CLUSTERMSG_LIGHT_MIN_LEN (sizeof(clusterMsgLight) - sizeof(union clusterMsgData)) struct _clusterNode { mstime_t ctime; /* Node object creation time. */ diff --git a/src/pubsub.c b/src/pubsub.c index 27496c7706..b79b532bf8 100644 --- a/src/pubsub.c +++ b/src/pubsub.c @@ -532,15 +532,6 @@ int pubsubPublishMessage(robj *channel, robj *message, int sharded) { return pubsubPublishMessageInternal(channel, message, sharded ? pubSubShardType : pubSubType); } -/* Publish messages to all the subscribers. */ -int pubsubPublishMultiMessages(robj *channel, robj **messages, int count, int sharded) { - int total_receivers = 0; - for (int i = 0; i < count; i++) { - total_receivers += pubsubPublishMessage(channel, messages[i], sharded); - } - return total_receivers; -} - /*----------------------------------------------------------------------------- * Pubsub commands implementation *----------------------------------------------------------------------------*/ @@ -612,14 +603,10 @@ void punsubscribeCommand(client *c) { /* This function wraps pubsubPublishMessage and also propagates the message to cluster. * Used by the commands PUBLISH/SPUBLISH and their respective module APIs.*/ -int pubsubPublishMultiMessagesAndPropagateToCluster(robj *channel, robj **messages, int count, int sharded) { - int receivers = pubsubPublishMultiMessages(channel, messages, count, sharded); - if (server.cluster_enabled) clusterPropagatePublish(channel, messages, count, sharded); - return receivers; -} - int pubsubPublishMessageAndPropagateToCluster(robj *channel, robj *message, int sharded) { - return pubsubPublishMultiMessagesAndPropagateToCluster(channel, &message, 1, sharded); + int receivers = pubsubPublishMessage(channel, message, sharded); + if (server.cluster_enabled) clusterPropagatePublish(channel, message, sharded); + return receivers; } /* PUBLISH */ @@ -629,7 +616,7 @@ void publishCommand(client *c) { return; } - int receivers = pubsubPublishMultiMessagesAndPropagateToCluster(c->argv[1], &c->argv[2], 1, 0); + int receivers = pubsubPublishMessageAndPropagateToCluster(c->argv[1], c->argv[2], 0); if (!server.cluster_enabled) forceCommandPropagation(c, PROPAGATE_REPL); addReplyLongLong(c, receivers); } @@ -715,7 +702,7 @@ void channelList(client *c, sds pat, kvstore *pubsub_channels) { /* SPUBLISH */ void spublishCommand(client *c) { - int receivers = pubsubPublishMultiMessagesAndPropagateToCluster(c->argv[1], &c->argv[2], 1, 1); + int receivers = pubsubPublishMessageAndPropagateToCluster(c->argv[1], c->argv[2], 1); if (!server.cluster_enabled) forceCommandPropagation(c, PROPAGATE_REPL); addReplyLongLong(c, receivers); } From b7c430c23efb8b33dcd6009ef089e8a28a95c912 Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Sun, 21 Jul 2024 23:57:41 +0000 Subject: [PATCH 35/45] Minor code and format change. Signed-off-by: Roshan Khatri --- src/cluster_legacy.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 02073e1d31..b6fd04c74d 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -2874,7 +2874,7 @@ void processPublishPacket(clusterMsgDataPublish *publish_data, uint16_t type) { uint32_t channel_len, message_len; /* Don't bother creating useless objects if there are no - * Pub/Sub subscribers. */ + * Pub/Sub subscribers. */ if ((type == CLUSTERMSG_TYPE_PUBLISH && serverPubsubSubscriptionCount() > 0) || (type == CLUSTERMSG_TYPE_PUBLISHSHARD && serverPubsubShardSubscriptionCount() > 0)) { channel_len = ntohl(publish_data->channel_len); @@ -2969,8 +2969,8 @@ int clusterIsValidPacket(clusterLink *link) { explen = sizeof(clusterMsg); } explen -= sizeof(union clusterMsgData); - explen += sizeof(clusterMsgDataPublish) - 8 + ntohl(publish_data->channel_len) + - ntohl(publish_data->message_len); + explen += + sizeof(clusterMsgDataPublish) - 8 + ntohl(publish_data->channel_len) + ntohl(publish_data->message_len); } else if (type == CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST || type == CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK || type == CLUSTERMSG_TYPE_MFSTART) { explen = sizeof(clusterMsg) - sizeof(union clusterMsgData); @@ -3983,14 +3983,20 @@ VALKEY_NO_SANITIZE("bounds") clusterMsgSendBlock *clusterCreatePublishMsgBlock(robj *channel, robj *message, int is_light, int is_sharded) { uint32_t channel_len, message_len; uint16_t type = is_sharded ? CLUSTERMSG_TYPE_PUBLISHSHARD : CLUSTERMSG_TYPE_PUBLISH; - type = is_light? (type | CLUSTERMSG_MSB) : type; channel = getDecodedObject(channel); message = getDecodedObject(message); channel_len = sdslen(channel->ptr); message_len = sdslen(message->ptr); + size_t msglen; - size_t msglen = is_light? sizeof(clusterMsgLight) : sizeof(clusterMsg); + if (is_light) { + /* We set the MSB for message that needs to sent using light header */ + type |= CLUSTERMSG_MSB; + msglen = sizeof(clusterMsgLight); + } else { + msglen = sizeof(clusterMsg); + } msglen -= sizeof(union clusterMsgData); msglen += sizeof(clusterMsgDataPublish) - 8 + channel_len + message_len; clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(type, msglen); @@ -4108,8 +4114,7 @@ int clusterSendModuleMessageToTarget(const char *target, * -------------------------------------------------------------------------- */ void clusterPropagatePublish(robj *channel, robj *message, int sharded) { clusterMsgSendBlock *msgblock, *msgblock_light; - msgblock_light = clusterCreatePublishMsgBlock( - channel, message, 1, sharded); + msgblock_light = clusterCreatePublishMsgBlock(channel, message, 1, sharded); /* We will only create msgblock with normal hdr if there are any nodes that do not support light hdr */ msgblock = NULL; ClusterNodeIterator iter; From 4dbc07d34c710882c230a0b0bbb7b6b013d06f02 Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Mon, 22 Jul 2024 17:34:59 +0000 Subject: [PATCH 36/45] Address some feedback Signed-off-by: Roshan Khatri --- src/cluster.h | 2 +- src/cluster_legacy.c | 17 ++++++++++++++--- src/cluster_legacy.h | 5 +++-- src/server.h | 2 -- 4 files changed, 18 insertions(+), 8 deletions(-) diff --git a/src/cluster.h b/src/cluster.h index b52a647d47..d841381088 100644 --- a/src/cluster.h +++ b/src/cluster.h @@ -54,7 +54,7 @@ void clusterUpdateMyselfHostname(void); void clusterUpdateMyselfAnnouncedPorts(void); void clusterUpdateMyselfHumanNodename(void); -void clusterPropagatePublish(robj *channel, robj *messages, int sharded); +void clusterPropagatePublish(robj *channel, robj *message, int sharded); unsigned long getClusterConnectionsCount(void); int isClusterHealthy(void); diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index b6fd04c74d..43fcc84a73 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -2895,6 +2895,16 @@ void processLightPacket(clusterLink *link, uint16_t type) { } } +inline int messageTypeSupportsLightHdr(uint16_t type) { + switch (type) { + case CLUSTERMSG_TYPE_PUBLISH: return 1; + case CLUSTERMSG_TYPE_PUBLISHSHARD: return 1; + } + serverLog(LL_NOTICE, "--- Packet of type %s does not support light cluster header", + clusterGetMessageTypeString(type)); + return 0; +} + int clusterIsValidPacket(clusterLink *link) { clusterMsg *hdr = (clusterMsg *)link->rcvbuf; @@ -2902,7 +2912,8 @@ int clusterIsValidPacket(clusterLink *link) { uint16_t type = ntohs(hdr->type); int is_light = IS_LIGHT_MESSAGE(type); if (is_light) { - type &= ~CLUSTERMSG_MSB; + type &= ~CLUSTERMSG_LIGHT; + serverAssert(messageTypeSupportsLightHdr(type)); } if (type < CLUSTERMSG_TYPE_COUNT) server.cluster->stats_bus_messages_received[type]++; @@ -3022,7 +3033,7 @@ int clusterProcessPacket(clusterLink *link) { int is_light = IS_LIGHT_MESSAGE(type); if (is_light) { - type &= ~CLUSTERMSG_MSB; + type &= ~CLUSTERMSG_LIGHT; if (!link->node || nodeInHandshake(link->node)) { freeClusterLink(link); serverLog( @@ -3992,7 +4003,7 @@ clusterMsgSendBlock *clusterCreatePublishMsgBlock(robj *channel, robj *message, if (is_light) { /* We set the MSB for message that needs to sent using light header */ - type |= CLUSTERMSG_MSB; + type |= CLUSTERMSG_LIGHT; msglen = sizeof(clusterMsgLight); } else { msglen = sizeof(clusterMsg); diff --git a/src/cluster_legacy.h b/src/cluster_legacy.h index 98eb8ef4fe..c28339e2e4 100644 --- a/src/cluster_legacy.h +++ b/src/cluster_legacy.h @@ -93,10 +93,11 @@ typedef struct clusterNodeFailReport { #define CLUSTERMSG_TYPE_MODULE 9 /* Module cluster API message. */ #define CLUSTERMSG_TYPE_PUBLISHSHARD 10 /* Pub/Sub Publish shard propagation */ #define CLUSTERMSG_TYPE_COUNT 11 /* Total number of message types. */ -#define CLUSTERMSG_MSB 0x8000 + +#define CLUSTERMSG_LIGHT 0x8000 /* For the message with light header, we will set the MSB.*/ -#define IS_LIGHT_MESSAGE(type) ((type) & CLUSTERMSG_MSB) +#define IS_LIGHT_MESSAGE(type) ((type) & CLUSTERMSG_LIGHT) /* Initially we don't know our "name", but we'll find it once we connect * to the first node, using the getsockname() function. Then we'll use this diff --git a/src/server.h b/src/server.h index ae3e67ccc2..f76824784e 100644 --- a/src/server.h +++ b/src/server.h @@ -3289,9 +3289,7 @@ int pubsubUnsubscribeShardAllChannels(client *c, int notify); void pubsubShardUnsubscribeAllChannelsInSlot(unsigned int slot); int pubsubUnsubscribeAllPatterns(client *c, int notify); int pubsubPublishMessage(robj *channel, robj *message, int sharded); -int pubsubPublishMultiMessages(robj *channel, robj **messages, int count, int sharded); int pubsubPublishMessageAndPropagateToCluster(robj *channel, robj *message, int sharded); -int pubsubPublishMultiMessagesAndPropagateToCluster(robj *channel, robj **messages, int count, int sharded); void addReplyPubsubMessage(client *c, robj *channel, robj *msg, robj *message_bulk); int serverPubsubSubscriptionCount(void); int serverPubsubShardSubscriptionCount(void); From f9bd8ffdb6121279a750bcb318b568b8b2be0192 Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Mon, 22 Jul 2024 17:46:46 +0000 Subject: [PATCH 37/45] Add comments for define Signed-off-by: Roshan Khatri --- src/cluster_legacy.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cluster_legacy.h b/src/cluster_legacy.h index c28339e2e4..1121091003 100644 --- a/src/cluster_legacy.h +++ b/src/cluster_legacy.h @@ -94,9 +94,9 @@ typedef struct clusterNodeFailReport { #define CLUSTERMSG_TYPE_PUBLISHSHARD 10 /* Pub/Sub Publish shard propagation */ #define CLUSTERMSG_TYPE_COUNT 11 /* Total number of message types. */ -#define CLUSTERMSG_LIGHT 0x8000 +#define CLUSTERMSG_LIGHT 0x8000 /* Modifier bit for message types that support light header */ -/* For the message with light header, we will set the MSB.*/ +/* We check for the modifier bit to determine if the message is sent using light header.*/ #define IS_LIGHT_MESSAGE(type) ((type) & CLUSTERMSG_LIGHT) /* Initially we don't know our "name", but we'll find it once we connect From 64ba73661fbc11d2273408b19be2019068e7ca53 Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Mon, 22 Jul 2024 22:41:24 +0000 Subject: [PATCH 38/45] Minor change Signed-off-by: Roshan Khatri --- src/cluster_legacy.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 43fcc84a73..bcde3b68dd 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -4142,8 +4142,7 @@ void clusterPropagatePublish(robj *channel, robj *message, int sharded) { clusterSendMessage(node->link, msgblock_light); } else { if (msgblock == NULL) { - msgblock = clusterCreatePublishMsgBlock( - channel, message, 0, sharded ? CLUSTERMSG_TYPE_PUBLISHSHARD : CLUSTERMSG_TYPE_PUBLISH); + msgblock = clusterCreatePublishMsgBlock(channel, message, 0, sharded); } clusterSendMessage(node->link, msgblock); } From 12ba92773ad672db86dced60c7477e0527c1e311 Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Tue, 23 Jul 2024 03:39:05 +0000 Subject: [PATCH 39/45] Address Feedback Signed-off-by: Roshan Khatri --- src/cluster_legacy.c | 57 +++++++++++++++++++++++++------------------- src/cluster_legacy.h | 2 ++ 2 files changed, 35 insertions(+), 24 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index bcde3b68dd..6431ab62ef 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -116,6 +116,7 @@ int auxTcpPortPresent(clusterNode *n); int auxTlsPortSetter(clusterNode *n, void *value, size_t length); sds auxTlsPortGetter(clusterNode *n, sds s); int auxTlsPortPresent(clusterNode *n); +static void clusterBuildMessageHdrLight(clusterMsgLight *hdr, int type, size_t msglen); static void clusterBuildMessageHdr(clusterMsg *hdr, int type, size_t msglen); void freeClusterLink(clusterLink *link); int verifyClusterNodeId(const char *name, int length); @@ -421,7 +422,7 @@ typedef struct { int refcount; /* Number of cluster link send msg queues containing the message */ union { clusterMsg msg; - clusterMsgLight light_msg; + clusterMsgLight msg_light; }; } clusterMsgSendBlock; @@ -1268,7 +1269,11 @@ static clusterMsgSendBlock *createClusterMsgSendBlock(int type, uint32_t msglen) msgblock->refcount = 1; msgblock->totlen = blocklen; server.stat_cluster_links_memory += blocklen; - clusterBuildMessageHdr(&msgblock->msg, type, msglen); + if IS_LIGHT_MESSAGE (type) { + clusterBuildMessageHdrLight(&msgblock->msg_light, type, msglen); + } else { + clusterBuildMessageHdr(&msgblock->msg, type, msglen); + } return msgblock; } @@ -2895,13 +2900,11 @@ void processLightPacket(clusterLink *link, uint16_t type) { } } -inline int messageTypeSupportsLightHdr(uint16_t type) { +static inline int messageTypeSupportsLightHdr(uint16_t type) { switch (type) { case CLUSTERMSG_TYPE_PUBLISH: return 1; case CLUSTERMSG_TYPE_PUBLISHSHARD: return 1; } - serverLog(LL_NOTICE, "--- Packet of type %s does not support light cluster header", - clusterGetMessageTypeString(type)); return 0; } @@ -2909,11 +2912,13 @@ inline int messageTypeSupportsLightHdr(uint16_t type) { int clusterIsValidPacket(clusterLink *link) { clusterMsg *hdr = (clusterMsg *)link->rcvbuf; uint32_t totlen = ntohl(hdr->totlen); - uint16_t type = ntohs(hdr->type); - int is_light = IS_LIGHT_MESSAGE(type); - if (is_light) { - type &= ~CLUSTERMSG_LIGHT; - serverAssert(messageTypeSupportsLightHdr(type)); + int is_light = IS_LIGHT_MESSAGE(ntohs(hdr->type)); + uint16_t type = ntohs(hdr->type) & ~CLUSTERMSG_MODIFIER_MASK; + + if (is_light && !messageTypeSupportsLightHdr(type)) { + serverLog(LL_NOTICE, "--- Packet of type %s does not support light cluster header", + clusterGetMessageTypeString(type)); + return 0; } if (type < CLUSTERMSG_TYPE_COUNT) server.cluster->stats_bus_messages_received[type]++; @@ -3028,12 +3033,11 @@ int clusterProcessPacket(clusterLink *link) { } clusterMsg *hdr = (clusterMsg *)link->rcvbuf; - uint16_t type = ntohs(hdr->type); mstime_t now = mstime(); - int is_light = IS_LIGHT_MESSAGE(type); + int is_light = IS_LIGHT_MESSAGE(ntohs(hdr->type)); + uint16_t type = ntohs(hdr->type) & ~CLUSTERMSG_MODIFIER_MASK; if (is_light) { - type &= ~CLUSTERMSG_LIGHT; if (!link->node || nodeInHandshake(link->node)) { freeClusterLink(link); serverLog( @@ -3717,24 +3721,21 @@ void clusterBroadcastMessage(clusterMsgSendBlock *msgblock) { dictReleaseIterator(di); } -/* Build the message header. hdr must point to a buffer at least - * sizeof(clusterMsg) in bytes. */ -static void clusterBuildMessageHdr(clusterMsg *hdr, int type, size_t msglen) { +static void clusterBuildMessageHdrLight(clusterMsgLight *hdr, int type, size_t msglen) { hdr->ver = htons(CLUSTER_PROTO_VER); hdr->sig[0] = 'R'; hdr->sig[1] = 'C'; hdr->sig[2] = 'm'; hdr->sig[3] = 'b'; hdr->type = htons(type); + hdr->notused1 = 0; + hdr->notused2 = 0; hdr->totlen = htonl(msglen); +} - if (IS_LIGHT_MESSAGE(type)) { - clusterMsgLight *hdr_light = (clusterMsgLight *)hdr; - hdr_light->notused1 = 0; - hdr_light->notused2 = 0; - return; - } - +/* Build the message header. hdr must point to a buffer at least + * sizeof(clusterMsg) in bytes. */ +static void clusterBuildMessageHdr(clusterMsg *hdr, int type, size_t msglen) { uint64_t offset; clusterNode *primary; @@ -3744,6 +3745,12 @@ static void clusterBuildMessageHdr(clusterMsg *hdr, int type, size_t msglen) { * in charge for this slots. */ primary = (nodeIsReplica(myself) && myself->replicaof) ? myself->replicaof : myself; + hdr->ver = htons(CLUSTER_PROTO_VER); + hdr->sig[0] = 'R'; + hdr->sig[1] = 'C'; + hdr->sig[2] = 'm'; + hdr->sig[3] = 'b'; + hdr->type = htons(type); memcpy(hdr->sender, myself->name, CLUSTER_NAMELEN); /* If cluster-announce-ip option is enabled, force the receivers of our @@ -3785,6 +3792,8 @@ static void clusterBuildMessageHdr(clusterMsg *hdr, int type, size_t msglen) { /* Set the message flags. */ if (clusterNodeIsPrimary(myself) && server.cluster->mf_end) hdr->mflags[0] |= CLUSTERMSG_FLAG0_PAUSED; + + hdr->totlen = htonl(msglen); } /* Set the i-th entry of the gossip section in the message pointed by 'hdr' @@ -4013,7 +4022,7 @@ clusterMsgSendBlock *clusterCreatePublishMsgBlock(robj *channel, robj *message, clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(type, msglen); clusterMsgDataPublish *hdr_data_msg; if (is_light) { - clusterMsgLight *hdr_light = &msgblock->light_msg; + clusterMsgLight *hdr_light = &msgblock->msg_light; hdr_data_msg = &hdr_light->data.publish.msg; } else { clusterMsg *hdr = &msgblock->msg; diff --git a/src/cluster_legacy.h b/src/cluster_legacy.h index 1121091003..c7cc25ba14 100644 --- a/src/cluster_legacy.h +++ b/src/cluster_legacy.h @@ -96,6 +96,8 @@ typedef struct clusterNodeFailReport { #define CLUSTERMSG_LIGHT 0x8000 /* Modifier bit for message types that support light header */ +#define CLUSTERMSG_MODIFIER_MASK (CLUSTERMSG_LIGHT) /* Modifier bit for message types that support light header */ + /* We check for the modifier bit to determine if the message is sent using light header.*/ #define IS_LIGHT_MESSAGE(type) ((type) & CLUSTERMSG_LIGHT) From c2e5c266e47e60ff961f31bfbdae73572e8aaff1 Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Tue, 23 Jul 2024 03:44:19 +0000 Subject: [PATCH 40/45] Minor Signed-off-by: Roshan Khatri --- src/cluster_legacy.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cluster_legacy.h b/src/cluster_legacy.h index c7cc25ba14..da7bc345ea 100644 --- a/src/cluster_legacy.h +++ b/src/cluster_legacy.h @@ -96,7 +96,7 @@ typedef struct clusterNodeFailReport { #define CLUSTERMSG_LIGHT 0x8000 /* Modifier bit for message types that support light header */ -#define CLUSTERMSG_MODIFIER_MASK (CLUSTERMSG_LIGHT) /* Modifier bit for message types that support light header */ +#define CLUSTERMSG_MODIFIER_MASK (CLUSTERMSG_LIGHT) /* Modifier mask for header types. (if we add more in the future) */ /* We check for the modifier bit to determine if the message is sent using light header.*/ #define IS_LIGHT_MESSAGE(type) ((type) & CLUSTERMSG_LIGHT) From 7fb234b2af5f945590517fd4ca65b29e1a7ec095 Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Tue, 23 Jul 2024 05:33:12 +0000 Subject: [PATCH 41/45] Address feedback Signed-off-by: Roshan Khatri --- src/cluster_legacy.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 6431ab62ef..6284178e9d 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -3583,7 +3583,8 @@ void clusterLinkConnectHandler(connection *conn) { serverLog(LL_DEBUG, "Connecting with Node %.40s at %s:%d", node->name, node->ip, node->cport); } -static inline int isSigAndMsgLenValid(clusterMsg *hdr) { +/* Performs sanity check on the message signature and length depending on the type. */ +static inline int isClusterMsgSignatureAndLengthValid(clusterMsg *hdr) { if (memcmp(hdr->sig, "RCmb", 4) != 0) return 0; uint16_t type = ntohs(hdr->type); uint32_t totlen = ntohl(hdr->totlen); @@ -3614,7 +3615,7 @@ void clusterReadHandler(connection *conn) { if (rcvbuflen == RCVBUF_MIN_READ_LEN) { /* Perform some sanity check on the message signature * and length. */ - if (!isSigAndMsgLenValid(hdr)) { + if (!isClusterMsgSignatureAndLengthValid(hdr)) { char ip[NET_IP_STR_LEN]; int port; if (connAddrPeerName(conn, ip, sizeof(ip), &port) == -1) { From b11b01463647e517fa4c173f6c456d4d02fb0c79 Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Tue, 23 Jul 2024 05:35:42 +0000 Subject: [PATCH 42/45] adds statics Signed-off-by: Roshan Khatri --- src/cluster_legacy.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 6284178e9d..79292ef149 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -196,19 +196,19 @@ typedef struct { }; } ClusterNodeIterator; -void clusterNodeIterInitAllNodes(ClusterNodeIterator *iter) { +static void clusterNodeIterInitAllNodes(ClusterNodeIterator *iter) { iter->type = ITER_DICT; dictInitSafeIterator(&iter->di, server.cluster->nodes); } -void clusterNodeIterInitMyShard(ClusterNodeIterator *iter) { +static void clusterNodeIterInitMyShard(ClusterNodeIterator *iter) { list *nodes = clusterGetNodesInMyShard(server.cluster->myself); serverAssert(nodes != NULL); iter->type = ITER_LIST; listRewind(nodes, &iter->li); } -clusterNode *clusterNodeIterNext(ClusterNodeIterator *iter) { +static clusterNode *clusterNodeIterNext(ClusterNodeIterator *iter) { switch (iter->type) { case ITER_DICT: { /* Get the next entry in the dictionary */ @@ -230,7 +230,7 @@ clusterNode *clusterNodeIterNext(ClusterNodeIterator *iter) { } } -void clusterNodeIterReset(ClusterNodeIterator *iter) { +static void clusterNodeIterReset(ClusterNodeIterator *iter) { if (iter->type == ITER_DICT) { dictResetIterator(&iter->di); } From b31150f55a01a77c8e04d34b543cb205e42656a5 Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Tue, 23 Jul 2024 08:02:31 +0000 Subject: [PATCH 43/45] Address feedback Signed-off-by: Roshan Khatri --- src/cluster_legacy.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 79292ef149..cc6afa1563 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -149,7 +149,10 @@ static inline int defaultClientPort(void) { (server.cluster->slots[slot] == NULL || bitmapTestBit(server.cluster->owner_not_claiming_slot, slot)) #define RCVBUF_INIT_LEN 1024 -#define RCVBUF_MIN_READ_LEN 16 +#define RCVBUF_MIN_READ_LEN 14 +static_assert(offsetof(clusterMsg, type) + sizeof(uint16_t) == RCVBUF_MIN_READ_LEN, + "Incorrect length to read to identify type"); + #define RCVBUF_MAX_PREALLOC (1 << 20) /* 1MB */ /* Fixed timeout value for cluster operations (milliseconds) */ @@ -1117,7 +1120,6 @@ void clusterInit(void) { * by the createClusterNode() function. */ myself = server.cluster->myself = createClusterNode(NULL, CLUSTER_NODE_MYSELF | CLUSTER_NODE_PRIMARY); serverLog(LL_NOTICE, "No cluster configuration found, I'm %.40s", myself->name); - clusterUpdateMyselfFlags(); clusterAddNode(myself); clusterAddNodeToShard(myself->shard_id, myself); saveconf = 1; @@ -1269,7 +1271,7 @@ static clusterMsgSendBlock *createClusterMsgSendBlock(int type, uint32_t msglen) msgblock->refcount = 1; msgblock->totlen = blocklen; server.stat_cluster_links_memory += blocklen; - if IS_LIGHT_MESSAGE (type) { + if (IS_LIGHT_MESSAGE(type)) { clusterBuildMessageHdrLight(&msgblock->msg_light, type, msglen); } else { clusterBuildMessageHdr(&msgblock->msg, type, msglen); @@ -2874,7 +2876,7 @@ static clusterNode *getNodeFromLinkAndMsg(clusterLink *link, clusterMsg *hdr) { return sender; } -void processPublishPacket(clusterMsgDataPublish *publish_data, uint16_t type) { +void clusterProcessPublishPacket(clusterMsgDataPublish *publish_data, uint16_t type) { robj *channel, *message; uint32_t channel_len, message_len; @@ -2896,7 +2898,7 @@ void processLightPacket(clusterLink *link, uint16_t type) { clusterMsgLight *hdr = (clusterMsgLight *)link->rcvbuf; if (type == CLUSTERMSG_TYPE_PUBLISH || type == CLUSTERMSG_TYPE_PUBLISHSHARD) { - processPublishPacket(&hdr->data.publish.msg, type); + clusterProcessPublishPacket(&hdr->data.publish.msg, type); } } @@ -2916,8 +2918,9 @@ int clusterIsValidPacket(clusterLink *link) { uint16_t type = ntohs(hdr->type) & ~CLUSTERMSG_MODIFIER_MASK; if (is_light && !messageTypeSupportsLightHdr(type)) { - serverLog(LL_NOTICE, "--- Packet of type %s does not support light cluster header", - clusterGetMessageTypeString(type)); + serverLog(LL_NOTICE, + "Packet of type '%s' (%u) does not support light cluster header. Marking packet as invalid.", + clusterGetMessageTypeString(type), type); return 0; } @@ -3418,7 +3421,7 @@ int clusterProcessPacket(clusterLink *link) { } } else if (type == CLUSTERMSG_TYPE_PUBLISH || type == CLUSTERMSG_TYPE_PUBLISHSHARD) { if (!sender) return 1; /* We don't know that node. */ - processPublishPacket(&hdr->data.publish.msg, type); + clusterProcessPublishPacket(&hdr->data.publish.msg, type); } else if (type == CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST) { if (!sender) return 1; /* We don't know that node. */ clusterSendFailoverAuthIfNeeded(sender, hdr); From b8420f2394780dde20e94ac321bdbb5b3718c765 Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Tue, 23 Jul 2024 08:10:19 +0000 Subject: [PATCH 44/45] rename Signed-off-by: Roshan Khatri --- src/cluster_legacy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index cc6afa1563..65e7349acf 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -2894,7 +2894,7 @@ void clusterProcessPublishPacket(clusterMsgDataPublish *publish_data, uint16_t t } } -void processLightPacket(clusterLink *link, uint16_t type) { +void clusterProcessLightPacket(clusterLink *link, uint16_t type) { clusterMsgLight *hdr = (clusterMsgLight *)link->rcvbuf; if (type == CLUSTERMSG_TYPE_PUBLISH || type == CLUSTERMSG_TYPE_PUBLISHSHARD) { @@ -3051,7 +3051,7 @@ int clusterProcessPacket(clusterLink *link) { } clusterNode *sender = link->node; sender->data_received = now; - processLightPacket(link, type); + clusterProcessLightPacket(link, type); return 1; } From 4431095b45bcc0092d15c38bc2fcc1d485b4ef14 Mon Sep 17 00:00:00 2001 From: Roshan Khatri Date: Fri, 26 Jul 2024 04:39:53 +0000 Subject: [PATCH 45/45] Address Feedback Signed-off-by: Roshan Khatri --- src/cluster_legacy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 65e7349acf..6feef5618d 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -2876,7 +2876,7 @@ static clusterNode *getNodeFromLinkAndMsg(clusterLink *link, clusterMsg *hdr) { return sender; } -void clusterProcessPublishPacket(clusterMsgDataPublish *publish_data, uint16_t type) { +static void clusterProcessPublishPacket(clusterMsgDataPublish *publish_data, uint16_t type) { robj *channel, *message; uint32_t channel_len, message_len; @@ -2894,7 +2894,7 @@ void clusterProcessPublishPacket(clusterMsgDataPublish *publish_data, uint16_t t } } -void clusterProcessLightPacket(clusterLink *link, uint16_t type) { +static void clusterProcessLightPacket(clusterLink *link, uint16_t type) { clusterMsgLight *hdr = (clusterMsgLight *)link->rcvbuf; if (type == CLUSTERMSG_TYPE_PUBLISH || type == CLUSTERMSG_TYPE_PUBLISHSHARD) {