Skip to content

Commit

Permalink
ch3: shortcut tmp_comm used in MPI_Comm_accept/connect
Browse files Browse the repository at this point in the history
Because the tmp_comm uses a temporary vc that doesn't belong to any pg,
it is incompatible to the new comm init process (that relies on lpid
lookup to construct vcrt tables).

Turns out we only need tmp_comm to perform basic send/recv
(MPIC_Sendrecv) and we don't need most of the facility of a normal
communicator. Shortcut the tmp_comm construction and destroy greatly
simplifies the code.
  • Loading branch information
hzhou committed Dec 21, 2024
1 parent cfeb14c commit c36e94f
Showing 1 changed file with 19 additions and 25 deletions.
44 changes: 19 additions & 25 deletions src/mpid/ch3/src/ch3u_port.c
Original file line number Diff line number Diff line change
Expand Up @@ -487,12 +487,10 @@ static int MPIDI_CH3I_Initialize_tmp_comm(MPIR_Comm **comm_pptr,
MPIDI_VC_t *vc_ptr, int is_low_group, int context_id_offset)
{
int mpi_errno = MPI_SUCCESS;
MPIR_Comm *tmp_comm, *commself_ptr;
MPIR_Comm *tmp_comm;

MPIR_FUNC_ENTER;

MPIR_Comm_get_ptr( MPI_COMM_SELF, commself_ptr );

/* WDG-old code allocated a context id that was then discarded */
mpi_errno = MPIR_Comm_create(&tmp_comm);
MPIR_ERR_CHECK(mpi_errno);
Expand Down Expand Up @@ -524,11 +522,6 @@ static int MPIDI_CH3I_Initialize_tmp_comm(MPIR_Comm **comm_pptr,
/* No pg structure needed since vc has already been set up
(connection has been established). */

/* Point local vcrt at those of commself_ptr */
/* FIXME: Explain why */
tmp_comm->dev.local_vcrt = commself_ptr->dev.vcrt;
MPIDI_VCRT_Add_ref(commself_ptr->dev.vcrt);

/* No pg needed since connection has already been formed.
FIXME - ensure that the comm_release code does not try to
free an unallocated pg */
Expand All @@ -542,21 +535,6 @@ static int MPIDI_CH3I_Initialize_tmp_comm(MPIR_Comm **comm_pptr,
/* FIXME: Why do we do a dup here? */
MPIDI_VCR_Dup(vc_ptr, &tmp_comm->dev.vcrt->vcr_table[0]);

MPIR_Coll_comm_init(tmp_comm);

MPIR_Lpid local_lpid = tmp_comm->dev.local_vcrt->vcr_table[0]->lpid;
MPIR_Lpid remote_lpid = tmp_comm->dev.vcrt->vcr_table[0]->lpid;
mpi_errno = MPIR_Group_create_stride(1, 0, commself_ptr->session_ptr, local_lpid, 1, 1,
&tmp_comm->local_group);
mpi_errno = MPIR_Group_create_stride(1, 0, commself_ptr->session_ptr, remote_lpid, 1, 1,
&tmp_comm->remote_group);

/* Even though this is a tmp comm and we don't call
MPI_Comm_commit, we still need to call the creation hook
because the destruction hook will be called in comm_release */
mpi_errno = MPID_Comm_commit_pre_hook(tmp_comm);
MPIR_ERR_CHECK(mpi_errno);

*comm_pptr = tmp_comm;

fn_exit:
Expand All @@ -566,6 +544,22 @@ static int MPIDI_CH3I_Initialize_tmp_comm(MPIR_Comm **comm_pptr,
goto fn_exit;
}

static int MPIDI_CH3I_Release_tmp_comm(MPIR_Comm *tmp_comm)
{
int mpi_errno = MPI_SUCCESS;

mpi_errno = MPIDI_VCRT_Release(tmp_comm->dev.vcrt, FALSE);
MPIR_ERR_CHECK(mpi_errno);

MPIR_Free_contextid(tmp_comm->recvcontext_id);
MPIR_Handle_obj_free(&MPIR_Comm_mem, tmp_comm);

fn_exit:
return mpi_errno;
fn_fail:
goto fn_exit;
}

/* ------------------------------------------------------------------------- */
/*
MPIDI_Comm_connect()
Expand Down Expand Up @@ -752,7 +746,7 @@ int MPIDI_Comm_connect(const char *port_name, MPIR_Info *info, int root,
MPIR_ERR_CHECK(mpi_errno);

/* All communication with remote root done. Release the communicator. */
MPIR_Comm_release(tmp_comm);
MPIDI_CH3I_Release_tmp_comm(tmp_comm);
}

/*printf("connect:barrier\n");fflush(stdout);*/
Expand Down Expand Up @@ -1283,7 +1277,7 @@ int MPIDI_Comm_accept(const char *port_name, MPIR_Info *info, int root,
MPIR_ERR_CHECK(mpi_errno);

/* All communication with remote root done. Release the communicator. */
MPIR_Comm_release(tmp_comm);
MPIDI_CH3I_Release_tmp_comm(tmp_comm);
}

MPL_DBG_MSG(MPIDI_CH3_DBG_CONNECT,VERBOSE,"Barrier");
Expand Down

0 comments on commit c36e94f

Please sign in to comment.