From a06ae92cd68f385ce031de88c267d8ad358b6d69 Mon Sep 17 00:00:00 2001 From: Nicholas Sielicki Date: Wed, 4 Sep 2024 13:09:06 -0700 Subject: [PATCH] fix(rdma): use COMM_ID_MASK as invalid id Previously, this used ~0 as the invalid signal. This defaults to a signed type, which breaks under -wsign-compare. Prefer to use COMM_ID_MASK as the marker. stack-info: PR: https://github.com/aws/aws-ofi-nccl/pull/574, branch: aws-nslick/stack/21 Signed-off-by: Nicholas Sielicki --- src/nccl_ofi_rdma.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/nccl_ofi_rdma.c b/src/nccl_ofi_rdma.c index aee8663fb..ee22554dd 100644 --- a/src/nccl_ofi_rdma.c +++ b/src/nccl_ofi_rdma.c @@ -4188,7 +4188,7 @@ static nccl_net_ofi_rdma_recv_comm_t *prepare_recv_comm(nccl_net_ofi_rdma_listen /* Allocate recv communicator ID */ comm_id = nccl_ofi_idpool_allocate_id(device->comm_idpool); if (OFI_UNLIKELY(comm_id < 0)) { - r_comm->local_comm_id = ~0; + r_comm->local_comm_id = COMM_ID_MASK; goto error; } r_comm->local_comm_id = (uint32_t)comm_id; @@ -4358,7 +4358,7 @@ static nccl_net_ofi_rdma_recv_comm_t *prepare_recv_comm(nccl_net_ofi_rdma_listen nccl_ofi_freelist_fini(r_comm->nccl_ofi_reqs_fl); if (r_comm->msgbuff) nccl_ofi_msgbuff_destroy(r_comm->msgbuff); - if (~0 != r_comm->local_comm_id) { + if (COMM_ID_MASK != r_comm->local_comm_id) { ret = nccl_ofi_idpool_free_id(device->comm_idpool, r_comm->local_comm_id); if (ret != 0) { NCCL_OFI_WARN("Error freeing communicator ID %" PRIu32, r_comm->local_comm_id); @@ -4779,7 +4779,7 @@ static int listen(nccl_net_ofi_ep_t *base_ep, /* Allocate listen communicator ID */ comm_id = nccl_ofi_idpool_allocate_id(device->comm_idpool); if (OFI_UNLIKELY(comm_id < 0)) { - l_comm->comm_id = ~0; + l_comm->comm_id = COMM_ID_MASK; ret = comm_id; goto error; } @@ -4799,7 +4799,7 @@ static int listen(nccl_net_ofi_ep_t *base_ep, goto exit; error: - if (l_comm && ~0 != l_comm->comm_id) { + if (l_comm && COMM_ID_MASK != l_comm->comm_id) { if (0 != nccl_ofi_idpool_free_id(device->comm_idpool, l_comm->comm_id)) { NCCL_OFI_WARN("Error freeing communicator ID %" PRIu64, l_comm->comm_id); } @@ -5963,7 +5963,7 @@ static inline int create_send_comm(nccl_net_ofi_conn_handle_t *handle, /* Allocate send communicator ID */ comm_id = nccl_ofi_idpool_allocate_id(device->comm_idpool); if (OFI_UNLIKELY(comm_id < 0)) { - ret_s_comm->local_comm_id = ~0; + ret_s_comm->local_comm_id = COMM_ID_MASK; ret = comm_id; goto error; } @@ -6029,7 +6029,7 @@ static inline int create_send_comm(nccl_net_ofi_conn_handle_t *handle, error: if (ret_s_comm) { - if (~0 != ret_s_comm->local_comm_id) { + if (COMM_ID_MASK != ret_s_comm->local_comm_id) { if (0 != nccl_ofi_idpool_free_id(device->comm_idpool, ret_s_comm->local_comm_id)) { NCCL_OFI_WARN("Error freeing communicator ID %" PRIu32, ret_s_comm->local_comm_id); }