Skip to content

Commit

Permalink
Merge pull request #13095 from edgargabriel/topic/memkind-optimizatio…
Browse files Browse the repository at this point in the history
…ns-1

first set of memkind optimizations
  • Loading branch information
edgargabriel authored Feb 14, 2025
2 parents 9fb2d4e + ee8e53a commit 7cb2fee
Show file tree
Hide file tree
Showing 12 changed files with 147 additions and 31 deletions.
51 changes: 42 additions & 9 deletions ompi/communicator/comm.c
Original file line number Diff line number Diff line change
Expand Up @@ -448,7 +448,11 @@ int ompi_comm_create_w_info (ompi_communicator_t *comm, ompi_group_t *group, opa
if (info) {
opal_info_dup(info, &(newcomp->super.s_info));
}
ompi_info_memkind_copy_or_set (&comm->instance->super, &newcomp->super, info);
ompi_info_memkind_assert_type type;
ompi_info_memkind_copy_or_set (&comm->instance->super, &newcomp->super, info, &type);
if (OMPI_INFO_MEMKIND_ASSERT_NO_ACCEL == type) {
newcomp->c_assertions |= OMPI_COMM_ASSERT_NO_ACCEL_BUF;
}

/* Set name for debugging purposes */
snprintf(newcomp->c_name, MPI_MAX_OBJECT_NAME, "MPI COMMUNICATOR %s CREATE FROM %s",
Expand Down Expand Up @@ -705,8 +709,11 @@ int ompi_comm_split_with_info( ompi_communicator_t* comm, int color, int key,
if (info) {
opal_info_dup(info, &(newcomp->super.s_info));
}
ompi_info_memkind_copy_or_set (&comm->instance->super, &newcomp->super, info);

ompi_info_memkind_assert_type type;
ompi_info_memkind_copy_or_set (&comm->instance->super, &newcomp->super, info, &type);
if (OMPI_INFO_MEMKIND_ASSERT_NO_ACCEL == type) {
newcomp->c_assertions |= OMPI_COMM_ASSERT_NO_ACCEL_BUF;
}
/* Activate the communicator and init coll-component */
rc = ompi_comm_activate (&newcomp, comm, NULL, NULL, NULL, false, mode);

Expand Down Expand Up @@ -997,7 +1004,11 @@ static int ompi_comm_split_type_core(ompi_communicator_t *comm,
if (info) {
opal_infosubscribe_change_info(&newcomp->super, info);
}
ompi_info_memkind_copy_or_set (&comm->instance->super, &newcomp->super, info);
ompi_info_memkind_assert_type type;
ompi_info_memkind_copy_or_set (&comm->instance->super, &newcomp->super, info, &type);
if (OMPI_INFO_MEMKIND_ASSERT_NO_ACCEL == type) {
newcomp->c_assertions |= OMPI_COMM_ASSERT_NO_ACCEL_BUF;
}

/* Activate the communicator and init coll-component */
rc = ompi_comm_activate (&newcomp, comm, NULL, NULL, NULL, false, mode);
Expand Down Expand Up @@ -1351,7 +1362,11 @@ int ompi_comm_dup_with_info ( ompi_communicator_t * comm, opal_info_t *info, omp
if (info) {
opal_infosubscribe_change_info(&newcomp->super, info);
}
ompi_info_memkind_copy_or_set (&comm->instance->super, &newcomp->super, info);
ompi_info_memkind_assert_type type;
ompi_info_memkind_copy_or_set (&comm->instance->super, &newcomp->super, info, &type);
if (OMPI_INFO_MEMKIND_ASSERT_NO_ACCEL == type) {
newcomp->c_assertions |= OMPI_COMM_ASSERT_NO_ACCEL_BUF;
}

/* activate communicator and init coll-module */
rc = ompi_comm_activate (&newcomp, comm, NULL, NULL, NULL, false, mode);
Expand Down Expand Up @@ -1442,7 +1457,12 @@ static int ompi_comm_idup_internal (ompi_communicator_t *comm, ompi_group_t *gro
if (info) {
opal_info_dup(info, &(newcomp->super.s_info));
}
ompi_info_memkind_copy_or_set (&comm->super, &newcomp->super, info);

ompi_info_memkind_assert_type type;
ompi_info_memkind_copy_or_set (&comm->super, &newcomp->super, info, &type);
if (OMPI_INFO_MEMKIND_ASSERT_NO_ACCEL == type) {
newcomp->c_assertions |= OMPI_COMM_ASSERT_NO_ACCEL_BUF;
}
}

ompi_comm_request_schedule_append (request, ompi_comm_idup_getcid, subreq, subreq[0] ? 1 : 0);
Expand Down Expand Up @@ -1594,7 +1614,11 @@ int ompi_comm_create_from_group (ompi_group_t *group, const char *tag, opal_info
if (NULL == newcomp->super.s_info) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
ompi_info_memkind_copy_or_set (&group->grp_instance->super, &newcomp->super, info);
ompi_info_memkind_assert_type type;
ompi_info_memkind_copy_or_set (&group->grp_instance->super, &newcomp->super, info, &type);
if (OMPI_INFO_MEMKIND_ASSERT_NO_ACCEL == type) {
newcomp->c_assertions |= OMPI_COMM_ASSERT_NO_ACCEL_BUF;
}

/* activate communicator and init coll-module. use the group allreduce implementation as
* no collective module has yet been selected. the tag does not matter as any tag will
Expand Down Expand Up @@ -1736,7 +1760,12 @@ int ompi_intercomm_create (ompi_communicator_t *local_comm, int local_leader, om

// Copy info if there is one.
newcomp->super.s_info = OBJ_NEW(opal_info_t);
ompi_info_memkind_copy_or_set (&local_comm->instance->super, &newcomp->super, &ompi_mpi_info_null.info.super);
ompi_info_memkind_assert_type type;
ompi_info_memkind_copy_or_set (&local_comm->instance->super, &newcomp->super,
&ompi_mpi_info_null.info.super, &type);
if (OMPI_INFO_MEMKIND_ASSERT_NO_ACCEL == type) {
newcomp->c_assertions |= OMPI_COMM_ASSERT_NO_ACCEL_BUF;
}

*newintercomm = newcomp;

Expand Down Expand Up @@ -1900,7 +1929,11 @@ int ompi_intercomm_create_from_groups (ompi_group_t *local_group, int local_lead
if (info) {
opal_info_dup(info, &(newcomp->super.s_info));
}
ompi_info_memkind_copy_or_set (&local_group->grp_instance->super, &newcomp->super, info);
ompi_info_memkind_assert_type type;
ompi_info_memkind_copy_or_set (&local_group->grp_instance->super, &newcomp->super, info, &type);
if (OMPI_INFO_MEMKIND_ASSERT_NO_ACCEL == type) {
newcomp->c_assertions |= OMPI_COMM_ASSERT_NO_ACCEL_BUF;
}

/* activate communicator and init coll-module */
rc = ompi_comm_activate (&newcomp, local_comm, leader_comm, &local_leader, &leader_comm_remote_leader,
Expand Down
7 changes: 6 additions & 1 deletion ompi/communicator/comm_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -305,11 +305,16 @@ int ompi_comm_init_mpi3 (void)
char *memkind_requested = getenv ("OMPI_MCA_mpi_memory_alloc_kinds");
if (NULL != memkind_requested) {
char *memkind_provided;
ompi_info_memkind_assert_type type;

ompi_info_memkind_process (memkind_requested, &memkind_provided);
ompi_info_memkind_process (memkind_requested, &memkind_provided, &type);
opal_infosubscribe_subscribe (&ompi_mpi_comm_world.comm.super, "mpi_memory_alloc_kinds", memkind_provided, ompi_info_memkind_cb);
opal_infosubscribe_subscribe (&ompi_mpi_comm_self.comm.super, "mpi_memory_alloc_kinds", memkind_provided, ompi_info_memkind_cb);
opal_infosubscribe_subscribe (&ompi_mpi_comm_world.comm.instance->super, "mpi_memory_alloc_kinds", memkind_provided, ompi_info_memkind_cb);
if (OMPI_INFO_MEMKIND_ASSERT_NO_ACCEL == type) {
ompi_mpi_comm_world.comm.c_assertions |= OMPI_COMM_ASSERT_NO_ACCEL_BUF;
ompi_mpi_comm_self.comm.c_assertions |= OMPI_COMM_ASSERT_NO_ACCEL_BUF;
}
free (memkind_provided);
}

Expand Down
2 changes: 2 additions & 0 deletions ompi/communicator/communicator.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_communicator_t);
#define OMPI_COMM_ASSERT_ALLOW_OVERTAKE 0x00000008
#define OMPI_COMM_ASSERT_LAZY_BARRIER 0x00000010
#define OMPI_COMM_ASSERT_ACTIVE_POLL 0x00000020
#define OMPI_COMM_ASSERT_NO_ACCEL_BUF 0x00000040

#define OMPI_COMM_CHECK_ASSERT(comm, flag) !!((comm)->c_assertions & flag)
#define OMPI_COMM_CHECK_ASSERT_NO_ANY_TAG(comm) OMPI_COMM_CHECK_ASSERT(comm, OMPI_COMM_ASSERT_NO_ANY_TAG)
Expand All @@ -114,6 +115,7 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_communicator_t);
#define OMPI_COMM_CHECK_ASSERT_ALLOW_OVERTAKE(comm) OMPI_COMM_CHECK_ASSERT(comm, OMPI_COMM_ASSERT_ALLOW_OVERTAKE)
#define OMPI_COMM_CHECK_ASSERT_LAZY_BARRIER(comm) OMPI_COMM_CHECK_ASSERT(comm, OMPI_COMM_ASSERT_LAZY_BARRIER)
#define OMPI_COMM_CHECK_ASSERT_ACTIVE_POLL(comm) OMPI_COMM_CHECK_ASSERT(comm, OMPI_COMM_ASSERT_ACTIVE_POLL)
#define OMPI_COMM_CHECK_ASSERT_NO_ACCEL_BUF(comm) OMPI_COMM_CHECK_ASSERT(comm, OMPI_COMM_ASSERT_NO_ACCEL_BUF)

/**
* Modes required for acquiring the new comm-id.
Expand Down
9 changes: 6 additions & 3 deletions ompi/file/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
* Copyright (c) 2016-2017 IBM Corporation. All rights reserved.
* Copyright (c) 2024 Triad National Security, LLC. All rights
* reserved.
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -124,8 +124,11 @@ int ompi_file_open(struct ompi_communicator_t *comm, const char *filename,
if (info) {
opal_info_dup(info, &(file->super.s_info));
}
ompi_info_memkind_copy_or_set (&comm->instance->super, &file->super, info);

ompi_info_memkind_assert_type type;
ompi_info_memkind_copy_or_set (&comm->instance->super, &file->super, info, &type);
if (OMPI_INFO_MEMKIND_ASSERT_NO_ACCEL == type) {
file->f_flags |= OMPI_FILE_ASSERT_NO_ACCEL_BUF;
}
file->f_amode = amode;
file->f_filename = strdup(filename);
if (NULL == file->f_filename) {
Expand Down
6 changes: 3 additions & 3 deletions ompi/file/file.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,9 @@
/*
* Flags
*/
#define OMPI_FILE_ISCLOSED 0x00000001
#define OMPI_FILE_HIDDEN 0x00000002

#define OMPI_FILE_ISCLOSED 0x00000001
#define OMPI_FILE_HIDDEN 0x00000002
#define OMPI_FILE_ASSERT_NO_ACCEL_BUF 0x00000004
BEGIN_C_DECLS

/**
Expand Down
54 changes: 50 additions & 4 deletions ompi/info/info_memkind.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ static void ompi_info_memkind_extract (const char* memkind_str, int *num_memkind
*/

/* Separate requested_str into an array of individual entries */
int current_max = 0;
char **memkind_combos = opal_argv_split(memkind_str, ',');
int max_num_memkinds = opal_argv_count(memkind_combos);

Expand All @@ -71,7 +72,6 @@ static void ompi_info_memkind_extract (const char* memkind_str, int *num_memkind

int iter = 0;
char *m = memkind_combos[iter];
int current_max = 0;
while (m != NULL) {
bool name_found = false;
char **tmp_str = opal_argv_split (m, ':');
Expand Down Expand Up @@ -419,19 +419,52 @@ static bool ompi_info_memkind_validate (const char *assert_str, const char *pare
return ret;
}

static bool ompi_info_memkind_check_no_accel (int num_memkinds, ompi_memkind_t *memkinds)
{
bool result = true;

for (int i = 0; i < num_memkinds; i++) {
if (!strncmp(memkinds[i].im_name, "system", strlen("system"))) {
continue;
}
if (!strncmp(memkinds[i].im_name, "mpi", strlen("mpi"))) {
continue;
}
result = false;
break;
}

return result;
}

int ompi_info_memkind_process (const char* requested_str, char **provided_str)
static bool ompi_info_memkind_check_no_accel_from_string (char *mstring)
{
bool ret = false;
int num_memkinds;
ompi_memkind_t *memkinds = NULL;

ompi_info_memkind_extract (mstring, &num_memkinds, &memkinds);
if (NULL != memkinds) {
ret = ompi_info_memkind_check_no_accel (num_memkinds, memkinds);
ompi_info_memkind_free(num_memkinds, memkinds);
}

return ret;
}
int ompi_info_memkind_process (const char* requested_str, char **provided_str,
ompi_info_memkind_assert_type *type)
{
int err;
char *tmp_str = NULL;

int num_requested_memkinds, num_available_memkinds, num_provided_memkinds;
ompi_memkind_t *requested_memkinds = NULL ;
ompi_memkind_t *available_memkinds = NULL;
ompi_memkind_t *provided_memkinds = NULL;
ompi_info_memkind_assert_type assert_type = OMPI_INFO_MEMKIND_ASSERT_UNDEFINED;

if (NULL == requested_str) {
*provided_str = NULL;
*type = assert_type;
return OMPI_SUCCESS;
}

Expand All @@ -448,6 +481,10 @@ int ompi_info_memkind_process (const char* requested_str, char **provided_str)
goto exit;
}

if (ompi_info_memkind_check_no_accel (num_provided_memkinds, provided_memkinds)) {
assert_type = OMPI_INFO_MEMKIND_ASSERT_NO_ACCEL;
}

ompi_info_memkind_str_create(num_provided_memkinds, provided_memkinds, &tmp_str);

exit:
Expand All @@ -459,6 +496,7 @@ int ompi_info_memkind_process (const char* requested_str, char **provided_str)
}
// Don't free the available_memkinds, they will be released in info_finalize;

*type = assert_type;
*provided_str = tmp_str;
return err;
}
Expand Down Expand Up @@ -504,15 +542,17 @@ const char *ompi_info_memkind_cb (opal_infosubscriber_t *obj, const char *key, c
** value of another info key (mpi_memory_alloc_kinds).
*/
int ompi_info_memkind_copy_or_set (opal_infosubscriber_t *parent, opal_infosubscriber_t *child,
opal_info_t *info)
opal_info_t *info, ompi_info_memkind_assert_type *type)
{
opal_cstring_t *parent_val;
opal_cstring_t *assert_val;
ompi_info_memkind_assert_type assert_type = OMPI_INFO_MEMKIND_ASSERT_UNDEFINED;
char *final_str = NULL;
int flag;

opal_info_get(parent->s_info, "mpi_memory_alloc_kinds", &parent_val, &flag);
if (0 == flag) {
*type = assert_type;
return OMPI_SUCCESS;
}
final_str = (char*) parent_val->string;
Expand All @@ -539,6 +579,12 @@ int ompi_info_memkind_copy_or_set (opal_infosubscriber_t *parent, opal_infosubsc
opal_infosubscribe_subscribe (child, "mpi_memory_alloc_kinds", final_str,
ompi_info_memkind_cb);
OBJ_RELEASE(parent_val);

if (ompi_info_memkind_check_no_accel_from_string(final_str)) {
assert_type = OMPI_INFO_MEMKIND_ASSERT_NO_ACCEL;
}

*type = assert_type;
return OMPI_SUCCESS;
}

Expand Down
15 changes: 13 additions & 2 deletions ompi/info/info_memkind.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,26 @@ struct ompi_memkind_t {
};
typedef struct ompi_memkind_t ompi_memkind_t;

typedef enum {
OMPI_INFO_MEMKIND_ASSERT_UNDEFINED = 0, // no statement on memkind usage
OMPI_INFO_MEMKIND_ASSERT_NO_ACCEL, // no accelerator memory is used
OMPI_INFO_MEMKIND_ASSERT_ACCEL_DEVICE_ONLY, // only accelerator device memory used
OMPI_INFO_MEMKIND_ASSERT_ACCEL_ALL // only accelerator memory (no restrictors) used
} ompi_info_memkind_assert_type;

/*
** Given a string of user requested memory alloc kinds, create
** a string with the actually support memory kinds by the library.
**
** @param[IN]: requested_str input string
** @param[OUT]: provided_str result string
** @param[OUT]: type guarantuees given on memkind utilization
**
** @return: OMPI_SUCCESS or error on failure
*/
OMPI_DECLSPEC int ompi_info_memkind_process (const char* requested_str,
char **provided_str);
char **provided_str,
ompi_info_memkind_assert_type *type);
/*
** Set the memory_alloc_kind info object on the child object, either
** by copying it from the parent object, or adjusting it based
Expand All @@ -46,12 +55,14 @@ OMPI_DECLSPEC int ompi_info_memkind_process (const char* requested_str,
** @param [INOUT]: child child object
** @param[IN]: info info object provided by code during object creation
** (e.g. MPI_Comm_dup_with_info, MPI_File_open, etc.)
** @param[OUT]: type guarantuees given on memkind utilization
**
** @return: OMPI_SUCCESS or error on failure
*/
OMPI_DECLSPEC int ompi_info_memkind_copy_or_set (opal_infosubscriber_t *parent,
opal_infosubscriber_t *child,
opal_info_t *info);
opal_info_t *info,
ompi_info_memkind_assert_type *type);

/*
** free the array of available memkinds when shutting down the info
Expand Down
3 changes: 2 additions & 1 deletion ompi/instance/instance.c
Original file line number Diff line number Diff line change
Expand Up @@ -859,13 +859,14 @@ int ompi_mpi_instance_init (int ts_level, opal_info_t *info, ompi_errhandler_t
/* Copy info if there is one. */
if (OPAL_UNLIKELY(NULL != info)) {
opal_cstring_t *memkind_requested;
ompi_info_memkind_assert_type type;
int flag;

new_instance->super.s_info = OBJ_NEW(opal_info_t);
opal_info_get(info, "mpi_memory_alloc_kinds", &memkind_requested, &flag);
if (1 == flag) {
char *memkind_provided;
ompi_info_memkind_process (memkind_requested->string, &memkind_provided);
ompi_info_memkind_process (memkind_requested->string, &memkind_provided, &type);
opal_infosubscribe_subscribe (&new_instance->super, "mpi_memory_alloc_kinds",
memkind_provided, ompi_info_memkind_cb);
free (memkind_provided);
Expand Down
6 changes: 6 additions & 0 deletions ompi/mca/coll/accelerator/coll_accelerator_module.c
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,12 @@ mca_coll_accelerator_comm_query(struct ompi_communicator_t *comm,
return NULL;
}

if (OMPI_COMM_CHECK_ASSERT_NO_ACCEL_BUF(comm)) {
opal_output_verbose(10, ompi_coll_base_framework.framework_output,
"coll:accelerator:comm_query: NO_ACCEL_BUF assertion set: disqualifying myself");
return NULL;
}

accelerator_module = OBJ_NEW(mca_coll_accelerator_module_t);
if (NULL == accelerator_module) {
return NULL;
Expand Down
Loading

0 comments on commit 7cb2fee

Please sign in to comment.