Skip to content

Commit

Permalink
fix(rdma): use COMM_ID_MASK as invalid id
Browse files Browse the repository at this point in the history
Previously, this used ~0 as the invalid signal. This defaults to a
signed type, which breaks under -wsign-compare. Prefer to use
COMM_ID_MASK as the marker.

stack-info: PR: aws#574, branch: aws-nslick/stack/21
Signed-off-by: Nicholas Sielicki <[email protected]>
  • Loading branch information
aws-nslick committed Sep 22, 2024
1 parent 1303e95 commit 10bbd0b
Showing 1 changed file with 6 additions and 6 deletions.
12 changes: 6 additions & 6 deletions src/nccl_ofi_rdma.c
Original file line number Diff line number Diff line change
Expand Up @@ -4197,7 +4197,7 @@ static nccl_net_ofi_rdma_recv_comm_t *prepare_recv_comm(nccl_net_ofi_rdma_listen
/* Allocate recv communicator ID */
comm_id = nccl_ofi_idpool_allocate_id(device->comm_idpool);
if (OFI_UNLIKELY(comm_id < 0)) {
r_comm->local_comm_id = ~0;
r_comm->local_comm_id = COMM_ID_MASK;
goto error;
}
r_comm->local_comm_id = (uint32_t)comm_id;
Expand Down Expand Up @@ -4367,7 +4367,7 @@ static nccl_net_ofi_rdma_recv_comm_t *prepare_recv_comm(nccl_net_ofi_rdma_listen
nccl_ofi_freelist_fini(r_comm->nccl_ofi_reqs_fl);
if (r_comm->msgbuff)
nccl_ofi_msgbuff_destroy(r_comm->msgbuff);
if (~0 != r_comm->local_comm_id) {
if (COMM_ID_MASK != r_comm->local_comm_id) {
ret = nccl_ofi_idpool_free_id(device->comm_idpool, r_comm->local_comm_id);
if (ret != 0) {
NCCL_OFI_WARN("Error freeing communicator ID %" PRIu32, r_comm->local_comm_id);
Expand Down Expand Up @@ -4788,7 +4788,7 @@ static int listen(nccl_net_ofi_ep_t *base_ep,
/* Allocate listen communicator ID */
comm_id = nccl_ofi_idpool_allocate_id(device->comm_idpool);
if (OFI_UNLIKELY(comm_id < 0)) {
l_comm->comm_id = ~0;
l_comm->comm_id = COMM_ID_MASK;
ret = comm_id;
goto error;
}
Expand All @@ -4808,7 +4808,7 @@ static int listen(nccl_net_ofi_ep_t *base_ep,
goto exit;

error:
if (l_comm && ~0 != l_comm->comm_id) {
if (l_comm && COMM_ID_MASK != l_comm->comm_id) {
if (0 != nccl_ofi_idpool_free_id(device->comm_idpool, l_comm->comm_id)) {
NCCL_OFI_WARN("Error freeing communicator ID %" PRIu32, l_comm->comm_id);
}
Expand Down Expand Up @@ -5971,7 +5971,7 @@ static inline int create_send_comm(nccl_net_ofi_conn_handle_t *handle,
/* Allocate send communicator ID */
comm_id = nccl_ofi_idpool_allocate_id(device->comm_idpool);
if (OFI_UNLIKELY(comm_id < 0)) {
ret_s_comm->local_comm_id = ~0;
ret_s_comm->local_comm_id = COMM_ID_MASK;
ret = comm_id;
goto error;
}
Expand Down Expand Up @@ -6037,7 +6037,7 @@ static inline int create_send_comm(nccl_net_ofi_conn_handle_t *handle,

error:
if (ret_s_comm) {
if (~0 != ret_s_comm->local_comm_id) {
if (COMM_ID_MASK != ret_s_comm->local_comm_id) {
if (0 != nccl_ofi_idpool_free_id(device->comm_idpool, ret_s_comm->local_comm_id)) {
NCCL_OFI_WARN("Error freeing communicator ID %" PRIu32, ret_s_comm->local_comm_id);
}
Expand Down

0 comments on commit 10bbd0b

Please sign in to comment.