Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

comm: create local_group/remote_group beform comm commit #7237

Open
wants to merge 19 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions dummy
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1
12 changes: 0 additions & 12 deletions src/binding/c/group_api.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,18 +37,6 @@ MPI_Group_excl:

MPI_Group_free:
.desc: Frees a group
{ -- error_check --
/* Cannot free the predefined groups, but allow GROUP_EMPTY
* because otherwise many tests fail */
if ((HANDLE_IS_BUILTIN(*group)) && *group != MPI_GROUP_EMPTY) {
mpi_errno = MPIR_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE, __func__, __LINE__,
MPI_ERR_GROUP, "**groupperm", 0);
}
if (mpi_errno) {
goto fn_fail;
}
}

MPI_Group_incl:
.desc: Produces a group by reordering an existing group and taking only listed members
Expand Down
6 changes: 3 additions & 3 deletions src/include/mpir_comm.h
Original file line number Diff line number Diff line change
Expand Up @@ -166,9 +166,9 @@ struct MPIR_Comm {
int rank; /* Value of MPI_Comm_rank */
MPIR_Attribute *attributes; /* List of attributes */
int local_size; /* Value of MPI_Comm_size for local group */
MPIR_Group *local_group, /* Groups in communicator. */
*remote_group; /* The local and remote groups are the
* same for intra communicators */
MPIR_Group *local_group; /* Groups in communicator. */
MPIR_Group *remote_group; /* The remote group in a inter communicator.
* Must be NULL in a intra communicator. */
MPIR_Comm_kind_t comm_kind; /* MPIR_COMM_KIND__INTRACOMM or MPIR_COMM_KIND__INTERCOMM */
char name[MPI_MAX_OBJECT_NAME]; /* Required for MPI-2 */
MPIR_Errhandler *errhandler; /* Pointer to the error handler structure */
Expand Down
93 changes: 66 additions & 27 deletions src/include/mpir_group.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,7 @@
* only because they are required for the group operations (e.g.,
* MPI_Group_intersection) and for the scalable RMA synchronization
*---------------------------------------------------------------------------*/
/* This structure is used to implement the group operations such as
MPI_Group_translate_ranks */
/* note: next_lpid (with idx_of_first_lpid in MPIR_Group) gives a linked list
* in a sorted lpid ascending order */
typedef struct MPII_Group_pmap_t {
uint64_t lpid; /* local process id, from VCONN */
int next_lpid; /* Index of next lpid (in lpid order) */
} MPII_Group_pmap_t;

/* Any changes in the MPIR_Group structure must be made to the
predefined value in MPIR_Group_builtin for MPI_GROUP_EMPTY in
src/mpi/group/grouputil.c */

/*S
MPIR_Group - Description of the Group data structure

Expand Down Expand Up @@ -53,22 +42,67 @@ typedef struct MPII_Group_pmap_t {
Group-DS

S*/

/* In addition to MPI_GROUP_EMPTY, internally we have a few more builtins */
#define MPIR_GROUP_WORLD ((MPI_Group)0x48000001)
#define MPIR_GROUP_SELF ((MPI_Group)0x48000002)

#define MPIR_GROUP_WORLD_PTR (MPIR_Group_builtin + 1)
#define MPIR_GROUP_SELF_PTR (MPIR_Group_builtin + 2)

/* Worlds -
* We need a device-independent way of identifying processes. Assuming the concept of
* "worlds", we can describe a process with (world_idx, world_rank).
*
* The world_idx is a local id because each process may not see all worlds. Thus,
* each process only can maintain a list of worlds as it encounters them. Thus,
* a process id derived from (world_idx, world_rank) is referred as LPID, or
* "local process id".
*
* Each process should maintain a table of worlds with sufficient information so
* processes can match worlds upon connection or making address exchange.
*/

#define MPIR_NAMESPACE_MAX 128
struct MPIR_World {
char namespace[MPIR_NAMESPACE_MAX];
/* other useful fields */
int num_procs;
};

extern struct MPIR_World MPIR_Worlds[];

int MPIR_add_world(const char *namespace, int num_procs);
int MPIR_find_world(const char *namespace);

/* Abstract the integer type for lpid (process id). It is possible to use 32-bit
* in principle, but 64-bit is simpler since we can trivially combine
* (world_idx, world_rank).
*/
typedef uint64_t MPIR_Lpid;

struct MPIR_Pmap {
int size; /* same as group->size, duplicate here so Pmap is logically complete */
bool use_map;
union {
MPIR_Lpid *map;
struct {
MPIR_Lpid offset;
MPIR_Lpid stride;
MPIR_Lpid blocksize;
} stride;
} u;
};

struct MPIR_Group {
MPIR_OBJECT_HEADER; /* adds handle and ref_count fields */
int size; /* Size of a group */
int rank; /* rank of this process relative to this
* group */
int idx_of_first_lpid;
MPII_Group_pmap_t *lrank_to_lpid; /* Array mapping a local rank to local
* process number */
int is_local_dense_monotonic; /* see NOTE-G1 */

/* We may want some additional data for the RMA syncrhonization calls */
/* Other, device-specific information */
int rank; /* rank of this process relative to this group */
struct MPIR_Pmap pmap;
MPIR_Session *session_ptr; /* Pointer to session to which this group belongs */
#ifdef MPID_DEV_GROUP_DECL
MPID_DEV_GROUP_DECL
#endif
MPIR_Session * session_ptr; /* Pointer to session to which this group belongs */
};

/* NOTE-G1: is_local_dense_monotonic will be true iff the group meets the
Expand Down Expand Up @@ -97,18 +131,23 @@ extern MPIR_Group *const MPIR_Group_empty;
#define MPIR_Group_release_ref(_group, _inuse) \
do { MPIR_Object_release_ref(_group, _inuse); } while (0)

void MPII_Group_setup_lpid_list(MPIR_Group *);
int MPIR_Group_check_valid_ranks(MPIR_Group *, const int[], int);
int MPIR_Group_check_valid_ranges(MPIR_Group *, int[][3], int);
void MPIR_Group_setup_lpid_pairs(MPIR_Group *, MPIR_Group *);
int MPIR_Group_create(int, MPIR_Group **);
int MPIR_Group_release(MPIR_Group * group_ptr);

int MPIR_Group_dup(MPIR_Group * old_group, MPIR_Session * session_ptr, MPIR_Group ** new_group_ptr);
int MPIR_Group_create_map(int size, int rank, MPIR_Session * session_ptr, MPIR_Lpid * map,
MPIR_Group ** new_group_ptr);
int MPIR_Group_create_stride(int size, int rank, MPIR_Session * session_ptr,
MPIR_Lpid offset, MPIR_Lpid stride, MPIR_Lpid blocksize,
MPIR_Group ** new_group_ptr);
MPIR_Lpid MPIR_Group_rank_to_lpid(MPIR_Group * group, int rank);
int MPIR_Group_lpid_to_rank(MPIR_Group * group, MPIR_Lpid lpid);

int MPIR_Group_check_subset(MPIR_Group * group_ptr, MPIR_Comm * comm_ptr);
void MPIR_Group_set_session_ptr(MPIR_Group * group_ptr, MPIR_Session * session_out);
int MPIR_Group_init(void);

/* internal functions */
void MPII_Group_setup_lpid_list(MPIR_Group *);
void MPIR_Group_finalize(void);

#endif /* MPIR_GROUP_H_INCLUDED */
2 changes: 1 addition & 1 deletion src/include/mpir_objects.h
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ const char *MPIR_Handle_get_kind_str(int kind);
#define MPIR_COMM_PREALLOC 8
#endif

#define MPIR_GROUP_N_BUILTIN 1
#define MPIR_GROUP_N_BUILTIN 3
#ifdef MPID_GROUP_PREALLOC
#define MPIR_GROUP_PREALLOC MPID_GROUP_PREALLOC
#else
Expand Down
9 changes: 9 additions & 0 deletions src/mpi/comm/builtin_comms.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ int MPIR_init_comm_world(void)
MPIR_Process.comm_world->remote_size = MPIR_Process.size;
MPIR_Process.comm_world->local_size = MPIR_Process.size;

MPIR_Process.comm_world->local_group = MPIR_GROUP_WORLD_PTR;
MPIR_Group_add_ref(MPIR_GROUP_WORLD_PTR);

mpi_errno = MPIR_Comm_commit(MPIR_Process.comm_world);
MPIR_ERR_CHECK(mpi_errno);

Expand Down Expand Up @@ -59,6 +62,9 @@ int MPIR_init_comm_self(void)
MPIR_Process.comm_self->remote_size = 1;
MPIR_Process.comm_self->local_size = 1;

MPIR_Process.comm_self->local_group = MPIR_GROUP_SELF_PTR;
MPIR_Group_add_ref(MPIR_GROUP_SELF_PTR);

mpi_errno = MPIR_Comm_commit(MPIR_Process.comm_self);
MPIR_ERR_CHECK(mpi_errno);

Expand Down Expand Up @@ -91,6 +97,9 @@ int MPIR_init_icomm_world(void)
MPIR_Process.icomm_world->remote_size = MPIR_Process.size;
MPIR_Process.icomm_world->local_size = MPIR_Process.size;

MPIR_Process.icomm_world->local_group = MPIR_GROUP_WORLD_PTR;
MPIR_Group_add_ref(MPIR_GROUP_WORLD_PTR);

mpi_errno = MPIR_Comm_commit(MPIR_Process.icomm_world);
MPIR_ERR_CHECK(mpi_errno);

Expand Down
Loading