From c36e94fcb9ac2eee28874b77b9f10e16d73081b8 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Thu, 19 Dec 2024 17:02:15 -0600 Subject: [PATCH] ch3: shortcut tmp_comm used in MPI_Comm_accept/connect Because the tmp_comm uses a temporary vc that doesn't belong to any pg, it is incompatible to the new comm init process (that relies on lpid lookup to construct vcrt tables). Turns out we only need tmp_comm to perform basic send/recv (MPIC_Sendrecv) and we don't need most of the facility of a normal communicator. Shortcut the tmp_comm construction and destroy greatly simplifies the code. --- src/mpid/ch3/src/ch3u_port.c | 44 ++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 25 deletions(-) diff --git a/src/mpid/ch3/src/ch3u_port.c b/src/mpid/ch3/src/ch3u_port.c index 39249e73035..91aaf1f11d5 100644 --- a/src/mpid/ch3/src/ch3u_port.c +++ b/src/mpid/ch3/src/ch3u_port.c @@ -487,12 +487,10 @@ static int MPIDI_CH3I_Initialize_tmp_comm(MPIR_Comm **comm_pptr, MPIDI_VC_t *vc_ptr, int is_low_group, int context_id_offset) { int mpi_errno = MPI_SUCCESS; - MPIR_Comm *tmp_comm, *commself_ptr; + MPIR_Comm *tmp_comm; MPIR_FUNC_ENTER; - MPIR_Comm_get_ptr( MPI_COMM_SELF, commself_ptr ); - /* WDG-old code allocated a context id that was then discarded */ mpi_errno = MPIR_Comm_create(&tmp_comm); MPIR_ERR_CHECK(mpi_errno); @@ -524,11 +522,6 @@ static int MPIDI_CH3I_Initialize_tmp_comm(MPIR_Comm **comm_pptr, /* No pg structure needed since vc has already been set up (connection has been established). */ - /* Point local vcrt at those of commself_ptr */ - /* FIXME: Explain why */ - tmp_comm->dev.local_vcrt = commself_ptr->dev.vcrt; - MPIDI_VCRT_Add_ref(commself_ptr->dev.vcrt); - /* No pg needed since connection has already been formed. FIXME - ensure that the comm_release code does not try to free an unallocated pg */ @@ -542,21 +535,6 @@ static int MPIDI_CH3I_Initialize_tmp_comm(MPIR_Comm **comm_pptr, /* FIXME: Why do we do a dup here? */ MPIDI_VCR_Dup(vc_ptr, &tmp_comm->dev.vcrt->vcr_table[0]); - MPIR_Coll_comm_init(tmp_comm); - - MPIR_Lpid local_lpid = tmp_comm->dev.local_vcrt->vcr_table[0]->lpid; - MPIR_Lpid remote_lpid = tmp_comm->dev.vcrt->vcr_table[0]->lpid; - mpi_errno = MPIR_Group_create_stride(1, 0, commself_ptr->session_ptr, local_lpid, 1, 1, - &tmp_comm->local_group); - mpi_errno = MPIR_Group_create_stride(1, 0, commself_ptr->session_ptr, remote_lpid, 1, 1, - &tmp_comm->remote_group); - - /* Even though this is a tmp comm and we don't call - MPI_Comm_commit, we still need to call the creation hook - because the destruction hook will be called in comm_release */ - mpi_errno = MPID_Comm_commit_pre_hook(tmp_comm); - MPIR_ERR_CHECK(mpi_errno); - *comm_pptr = tmp_comm; fn_exit: @@ -566,6 +544,22 @@ static int MPIDI_CH3I_Initialize_tmp_comm(MPIR_Comm **comm_pptr, goto fn_exit; } +static int MPIDI_CH3I_Release_tmp_comm(MPIR_Comm *tmp_comm) +{ + int mpi_errno = MPI_SUCCESS; + + mpi_errno = MPIDI_VCRT_Release(tmp_comm->dev.vcrt, FALSE); + MPIR_ERR_CHECK(mpi_errno); + + MPIR_Free_contextid(tmp_comm->recvcontext_id); + MPIR_Handle_obj_free(&MPIR_Comm_mem, tmp_comm); + + fn_exit: + return mpi_errno; + fn_fail: + goto fn_exit; +} + /* ------------------------------------------------------------------------- */ /* MPIDI_Comm_connect() @@ -752,7 +746,7 @@ int MPIDI_Comm_connect(const char *port_name, MPIR_Info *info, int root, MPIR_ERR_CHECK(mpi_errno); /* All communication with remote root done. Release the communicator. */ - MPIR_Comm_release(tmp_comm); + MPIDI_CH3I_Release_tmp_comm(tmp_comm); } /*printf("connect:barrier\n");fflush(stdout);*/ @@ -1283,7 +1277,7 @@ int MPIDI_Comm_accept(const char *port_name, MPIR_Info *info, int root, MPIR_ERR_CHECK(mpi_errno); /* All communication with remote root done. Release the communicator. */ - MPIR_Comm_release(tmp_comm); + MPIDI_CH3I_Release_tmp_comm(tmp_comm); } MPL_DBG_MSG(MPIDI_CH3_DBG_CONNECT,VERBOSE,"Barrier");