diff --git a/ompi/communicator/comm.c b/ompi/communicator/comm.c index 489357f2004..a18b91ca7d6 100644 --- a/ompi/communicator/comm.c +++ b/ompi/communicator/comm.c @@ -448,7 +448,11 @@ int ompi_comm_create_w_info (ompi_communicator_t *comm, ompi_group_t *group, opa if (info) { opal_info_dup(info, &(newcomp->super.s_info)); } - ompi_info_memkind_copy_or_set (&comm->instance->super, &newcomp->super, info); + ompi_info_memkind_assert_type type; + ompi_info_memkind_copy_or_set (&comm->instance->super, &newcomp->super, info, &type); + if (OMPI_INFO_MEMKIND_ASSERT_NO_ACCEL == type) { + newcomp->c_assertions |= OMPI_COMM_ASSERT_NO_ACCEL_BUF; + } /* Set name for debugging purposes */ snprintf(newcomp->c_name, MPI_MAX_OBJECT_NAME, "MPI COMMUNICATOR %s CREATE FROM %s", @@ -705,8 +709,11 @@ int ompi_comm_split_with_info( ompi_communicator_t* comm, int color, int key, if (info) { opal_info_dup(info, &(newcomp->super.s_info)); } - ompi_info_memkind_copy_or_set (&comm->instance->super, &newcomp->super, info); - + ompi_info_memkind_assert_type type; + ompi_info_memkind_copy_or_set (&comm->instance->super, &newcomp->super, info, &type); + if (OMPI_INFO_MEMKIND_ASSERT_NO_ACCEL == type) { + newcomp->c_assertions |= OMPI_COMM_ASSERT_NO_ACCEL_BUF; + } /* Activate the communicator and init coll-component */ rc = ompi_comm_activate (&newcomp, comm, NULL, NULL, NULL, false, mode); @@ -997,7 +1004,11 @@ static int ompi_comm_split_type_core(ompi_communicator_t *comm, if (info) { opal_infosubscribe_change_info(&newcomp->super, info); } - ompi_info_memkind_copy_or_set (&comm->instance->super, &newcomp->super, info); + ompi_info_memkind_assert_type type; + ompi_info_memkind_copy_or_set (&comm->instance->super, &newcomp->super, info, &type); + if (OMPI_INFO_MEMKIND_ASSERT_NO_ACCEL == type) { + newcomp->c_assertions |= OMPI_COMM_ASSERT_NO_ACCEL_BUF; + } /* Activate the communicator and init coll-component */ rc = ompi_comm_activate (&newcomp, comm, NULL, NULL, NULL, false, mode); @@ -1351,7 +1362,11 @@ int ompi_comm_dup_with_info ( ompi_communicator_t * comm, opal_info_t *info, omp if (info) { opal_infosubscribe_change_info(&newcomp->super, info); } - ompi_info_memkind_copy_or_set (&comm->instance->super, &newcomp->super, info); + ompi_info_memkind_assert_type type; + ompi_info_memkind_copy_or_set (&comm->instance->super, &newcomp->super, info, &type); + if (OMPI_INFO_MEMKIND_ASSERT_NO_ACCEL == type) { + newcomp->c_assertions |= OMPI_COMM_ASSERT_NO_ACCEL_BUF; + } /* activate communicator and init coll-module */ rc = ompi_comm_activate (&newcomp, comm, NULL, NULL, NULL, false, mode); @@ -1442,7 +1457,12 @@ static int ompi_comm_idup_internal (ompi_communicator_t *comm, ompi_group_t *gro if (info) { opal_info_dup(info, &(newcomp->super.s_info)); } - ompi_info_memkind_copy_or_set (&comm->super, &newcomp->super, info); + + ompi_info_memkind_assert_type type; + ompi_info_memkind_copy_or_set (&comm->super, &newcomp->super, info, &type); + if (OMPI_INFO_MEMKIND_ASSERT_NO_ACCEL == type) { + newcomp->c_assertions |= OMPI_COMM_ASSERT_NO_ACCEL_BUF; + } } ompi_comm_request_schedule_append (request, ompi_comm_idup_getcid, subreq, subreq[0] ? 1 : 0); @@ -1594,7 +1614,11 @@ int ompi_comm_create_from_group (ompi_group_t *group, const char *tag, opal_info if (NULL == newcomp->super.s_info) { return OMPI_ERR_OUT_OF_RESOURCE; } - ompi_info_memkind_copy_or_set (&group->grp_instance->super, &newcomp->super, info); + ompi_info_memkind_assert_type type; + ompi_info_memkind_copy_or_set (&group->grp_instance->super, &newcomp->super, info, &type); + if (OMPI_INFO_MEMKIND_ASSERT_NO_ACCEL == type) { + newcomp->c_assertions |= OMPI_COMM_ASSERT_NO_ACCEL_BUF; + } /* activate communicator and init coll-module. use the group allreduce implementation as * no collective module has yet been selected. the tag does not matter as any tag will @@ -1736,7 +1760,12 @@ int ompi_intercomm_create (ompi_communicator_t *local_comm, int local_leader, om // Copy info if there is one. newcomp->super.s_info = OBJ_NEW(opal_info_t); - ompi_info_memkind_copy_or_set (&local_comm->instance->super, &newcomp->super, &ompi_mpi_info_null.info.super); + ompi_info_memkind_assert_type type; + ompi_info_memkind_copy_or_set (&local_comm->instance->super, &newcomp->super, + &ompi_mpi_info_null.info.super, &type); + if (OMPI_INFO_MEMKIND_ASSERT_NO_ACCEL == type) { + newcomp->c_assertions |= OMPI_COMM_ASSERT_NO_ACCEL_BUF; + } *newintercomm = newcomp; @@ -1900,7 +1929,11 @@ int ompi_intercomm_create_from_groups (ompi_group_t *local_group, int local_lead if (info) { opal_info_dup(info, &(newcomp->super.s_info)); } - ompi_info_memkind_copy_or_set (&local_group->grp_instance->super, &newcomp->super, info); + ompi_info_memkind_assert_type type; + ompi_info_memkind_copy_or_set (&local_group->grp_instance->super, &newcomp->super, info, &type); + if (OMPI_INFO_MEMKIND_ASSERT_NO_ACCEL == type) { + newcomp->c_assertions |= OMPI_COMM_ASSERT_NO_ACCEL_BUF; + } /* activate communicator and init coll-module */ rc = ompi_comm_activate (&newcomp, local_comm, leader_comm, &local_leader, &leader_comm_remote_leader, diff --git a/ompi/communicator/comm_init.c b/ompi/communicator/comm_init.c index 9f091974e7c..86e260c536d 100644 --- a/ompi/communicator/comm_init.c +++ b/ompi/communicator/comm_init.c @@ -305,11 +305,16 @@ int ompi_comm_init_mpi3 (void) char *memkind_requested = getenv ("OMPI_MCA_mpi_memory_alloc_kinds"); if (NULL != memkind_requested) { char *memkind_provided; + ompi_info_memkind_assert_type type; - ompi_info_memkind_process (memkind_requested, &memkind_provided); + ompi_info_memkind_process (memkind_requested, &memkind_provided, &type); opal_infosubscribe_subscribe (&ompi_mpi_comm_world.comm.super, "mpi_memory_alloc_kinds", memkind_provided, ompi_info_memkind_cb); opal_infosubscribe_subscribe (&ompi_mpi_comm_self.comm.super, "mpi_memory_alloc_kinds", memkind_provided, ompi_info_memkind_cb); opal_infosubscribe_subscribe (&ompi_mpi_comm_world.comm.instance->super, "mpi_memory_alloc_kinds", memkind_provided, ompi_info_memkind_cb); + if (OMPI_INFO_MEMKIND_ASSERT_NO_ACCEL == type) { + ompi_mpi_comm_world.comm.c_assertions |= OMPI_COMM_ASSERT_NO_ACCEL_BUF; + ompi_mpi_comm_self.comm.c_assertions |= OMPI_COMM_ASSERT_NO_ACCEL_BUF; + } free (memkind_provided); } diff --git a/ompi/communicator/communicator.h b/ompi/communicator/communicator.h index 1714a09befc..4a1b42ab0c4 100644 --- a/ompi/communicator/communicator.h +++ b/ompi/communicator/communicator.h @@ -106,6 +106,7 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_communicator_t); #define OMPI_COMM_ASSERT_ALLOW_OVERTAKE 0x00000008 #define OMPI_COMM_ASSERT_LAZY_BARRIER 0x00000010 #define OMPI_COMM_ASSERT_ACTIVE_POLL 0x00000020 +#define OMPI_COMM_ASSERT_NO_ACCEL_BUF 0x00000040 #define OMPI_COMM_CHECK_ASSERT(comm, flag) !!((comm)->c_assertions & flag) #define OMPI_COMM_CHECK_ASSERT_NO_ANY_TAG(comm) OMPI_COMM_CHECK_ASSERT(comm, OMPI_COMM_ASSERT_NO_ANY_TAG) @@ -114,6 +115,7 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_communicator_t); #define OMPI_COMM_CHECK_ASSERT_ALLOW_OVERTAKE(comm) OMPI_COMM_CHECK_ASSERT(comm, OMPI_COMM_ASSERT_ALLOW_OVERTAKE) #define OMPI_COMM_CHECK_ASSERT_LAZY_BARRIER(comm) OMPI_COMM_CHECK_ASSERT(comm, OMPI_COMM_ASSERT_LAZY_BARRIER) #define OMPI_COMM_CHECK_ASSERT_ACTIVE_POLL(comm) OMPI_COMM_CHECK_ASSERT(comm, OMPI_COMM_ASSERT_ACTIVE_POLL) +#define OMPI_COMM_CHECK_ASSERT_NO_ACCEL_BUF(comm) OMPI_COMM_CHECK_ASSERT(comm, OMPI_COMM_ASSERT_NO_ACCEL_BUF) /** * Modes required for acquiring the new comm-id. diff --git a/ompi/file/file.c b/ompi/file/file.c index 11e7a709d4f..c7e3759da16 100644 --- a/ompi/file/file.c +++ b/ompi/file/file.c @@ -18,7 +18,7 @@ * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * Copyright (c) 2024 Triad National Security, LLC. All rights * reserved. - * Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -124,8 +124,11 @@ int ompi_file_open(struct ompi_communicator_t *comm, const char *filename, if (info) { opal_info_dup(info, &(file->super.s_info)); } - ompi_info_memkind_copy_or_set (&comm->instance->super, &file->super, info); - + ompi_info_memkind_assert_type type; + ompi_info_memkind_copy_or_set (&comm->instance->super, &file->super, info, &type); + if (OMPI_INFO_MEMKIND_ASSERT_NO_ACCEL == type) { + file->f_flags |= OMPI_FILE_ASSERT_NO_ACCEL_BUF; + } file->f_amode = amode; file->f_filename = strdup(filename); if (NULL == file->f_filename) { diff --git a/ompi/file/file.h b/ompi/file/file.h index 512560aecb7..4ba7e0e3506 100644 --- a/ompi/file/file.h +++ b/ompi/file/file.h @@ -39,9 +39,9 @@ /* * Flags */ -#define OMPI_FILE_ISCLOSED 0x00000001 -#define OMPI_FILE_HIDDEN 0x00000002 - +#define OMPI_FILE_ISCLOSED 0x00000001 +#define OMPI_FILE_HIDDEN 0x00000002 +#define OMPI_FILE_ASSERT_NO_ACCEL_BUF 0x00000004 BEGIN_C_DECLS /** diff --git a/ompi/info/info_memkind.c b/ompi/info/info_memkind.c index 26d8dd99431..da5bb9c0bd2 100644 --- a/ompi/info/info_memkind.c +++ b/ompi/info/info_memkind.c @@ -56,6 +56,7 @@ static void ompi_info_memkind_extract (const char* memkind_str, int *num_memkind */ /* Separate requested_str into an array of individual entries */ + int current_max = 0; char **memkind_combos = opal_argv_split(memkind_str, ','); int max_num_memkinds = opal_argv_count(memkind_combos); @@ -71,7 +72,6 @@ static void ompi_info_memkind_extract (const char* memkind_str, int *num_memkind int iter = 0; char *m = memkind_combos[iter]; - int current_max = 0; while (m != NULL) { bool name_found = false; char **tmp_str = opal_argv_split (m, ':'); @@ -419,19 +419,52 @@ static bool ompi_info_memkind_validate (const char *assert_str, const char *pare return ret; } +static bool ompi_info_memkind_check_no_accel (int num_memkinds, ompi_memkind_t *memkinds) +{ + bool result = true; + + for (int i = 0; i < num_memkinds; i++) { + if (!strncmp(memkinds[i].im_name, "system", strlen("system"))) { + continue; + } + if (!strncmp(memkinds[i].im_name, "mpi", strlen("mpi"))) { + continue; + } + result = false; + break; + } + + return result; +} -int ompi_info_memkind_process (const char* requested_str, char **provided_str) +static bool ompi_info_memkind_check_no_accel_from_string (char *mstring) +{ + bool ret = false; + int num_memkinds; + ompi_memkind_t *memkinds = NULL; + + ompi_info_memkind_extract (mstring, &num_memkinds, &memkinds); + if (NULL != memkinds) { + ret = ompi_info_memkind_check_no_accel (num_memkinds, memkinds); + ompi_info_memkind_free(num_memkinds, memkinds); + } + + return ret; +} +int ompi_info_memkind_process (const char* requested_str, char **provided_str, + ompi_info_memkind_assert_type *type) { int err; char *tmp_str = NULL; - int num_requested_memkinds, num_available_memkinds, num_provided_memkinds; ompi_memkind_t *requested_memkinds = NULL ; ompi_memkind_t *available_memkinds = NULL; ompi_memkind_t *provided_memkinds = NULL; + ompi_info_memkind_assert_type assert_type = OMPI_INFO_MEMKIND_ASSERT_UNDEFINED; if (NULL == requested_str) { *provided_str = NULL; + *type = assert_type; return OMPI_SUCCESS; } @@ -448,6 +481,10 @@ int ompi_info_memkind_process (const char* requested_str, char **provided_str) goto exit; } + if (ompi_info_memkind_check_no_accel (num_provided_memkinds, provided_memkinds)) { + assert_type = OMPI_INFO_MEMKIND_ASSERT_NO_ACCEL; + } + ompi_info_memkind_str_create(num_provided_memkinds, provided_memkinds, &tmp_str); exit: @@ -459,6 +496,7 @@ int ompi_info_memkind_process (const char* requested_str, char **provided_str) } // Don't free the available_memkinds, they will be released in info_finalize; + *type = assert_type; *provided_str = tmp_str; return err; } @@ -504,15 +542,17 @@ const char *ompi_info_memkind_cb (opal_infosubscriber_t *obj, const char *key, c ** value of another info key (mpi_memory_alloc_kinds). */ int ompi_info_memkind_copy_or_set (opal_infosubscriber_t *parent, opal_infosubscriber_t *child, - opal_info_t *info) + opal_info_t *info, ompi_info_memkind_assert_type *type) { opal_cstring_t *parent_val; opal_cstring_t *assert_val; + ompi_info_memkind_assert_type assert_type = OMPI_INFO_MEMKIND_ASSERT_UNDEFINED; char *final_str = NULL; int flag; opal_info_get(parent->s_info, "mpi_memory_alloc_kinds", &parent_val, &flag); if (0 == flag) { + *type = assert_type; return OMPI_SUCCESS; } final_str = (char*) parent_val->string; @@ -539,6 +579,12 @@ int ompi_info_memkind_copy_or_set (opal_infosubscriber_t *parent, opal_infosubsc opal_infosubscribe_subscribe (child, "mpi_memory_alloc_kinds", final_str, ompi_info_memkind_cb); OBJ_RELEASE(parent_val); + + if (ompi_info_memkind_check_no_accel_from_string(final_str)) { + assert_type = OMPI_INFO_MEMKIND_ASSERT_NO_ACCEL; + } + + *type = assert_type; return OMPI_SUCCESS; } diff --git a/ompi/info/info_memkind.h b/ompi/info/info_memkind.h index 42f21177454..a23a55ea27f 100644 --- a/ompi/info/info_memkind.h +++ b/ompi/info/info_memkind.h @@ -25,17 +25,26 @@ struct ompi_memkind_t { }; typedef struct ompi_memkind_t ompi_memkind_t; +typedef enum { + OMPI_INFO_MEMKIND_ASSERT_UNDEFINED = 0, // no statement on memkind usage + OMPI_INFO_MEMKIND_ASSERT_NO_ACCEL, // no accelerator memory is used + OMPI_INFO_MEMKIND_ASSERT_ACCEL_DEVICE_ONLY, // only accelerator device memory used + OMPI_INFO_MEMKIND_ASSERT_ACCEL_ALL // only accelerator memory (no restrictors) used +} ompi_info_memkind_assert_type; + /* ** Given a string of user requested memory alloc kinds, create ** a string with the actually support memory kinds by the library. ** ** @param[IN]: requested_str input string ** @param[OUT]: provided_str result string +** @param[OUT]: type guarantuees given on memkind utilization ** ** @return: OMPI_SUCCESS or error on failure */ OMPI_DECLSPEC int ompi_info_memkind_process (const char* requested_str, - char **provided_str); + char **provided_str, + ompi_info_memkind_assert_type *type); /* ** Set the memory_alloc_kind info object on the child object, either ** by copying it from the parent object, or adjusting it based @@ -46,12 +55,14 @@ OMPI_DECLSPEC int ompi_info_memkind_process (const char* requested_str, ** @param [INOUT]: child child object ** @param[IN]: info info object provided by code during object creation ** (e.g. MPI_Comm_dup_with_info, MPI_File_open, etc.) +** @param[OUT]: type guarantuees given on memkind utilization ** ** @return: OMPI_SUCCESS or error on failure */ OMPI_DECLSPEC int ompi_info_memkind_copy_or_set (opal_infosubscriber_t *parent, opal_infosubscriber_t *child, - opal_info_t *info); + opal_info_t *info, + ompi_info_memkind_assert_type *type); /* ** free the array of available memkinds when shutting down the info diff --git a/ompi/instance/instance.c b/ompi/instance/instance.c index 4c28d7b69a0..0b83e442b0c 100644 --- a/ompi/instance/instance.c +++ b/ompi/instance/instance.c @@ -859,13 +859,14 @@ int ompi_mpi_instance_init (int ts_level, opal_info_t *info, ompi_errhandler_t /* Copy info if there is one. */ if (OPAL_UNLIKELY(NULL != info)) { opal_cstring_t *memkind_requested; + ompi_info_memkind_assert_type type; int flag; new_instance->super.s_info = OBJ_NEW(opal_info_t); opal_info_get(info, "mpi_memory_alloc_kinds", &memkind_requested, &flag); if (1 == flag) { char *memkind_provided; - ompi_info_memkind_process (memkind_requested->string, &memkind_provided); + ompi_info_memkind_process (memkind_requested->string, &memkind_provided, &type); opal_infosubscribe_subscribe (&new_instance->super, "mpi_memory_alloc_kinds", memkind_provided, ompi_info_memkind_cb); free (memkind_provided); diff --git a/ompi/mca/coll/accelerator/coll_accelerator_module.c b/ompi/mca/coll/accelerator/coll_accelerator_module.c index c4a08c2e4a6..6e242f2c353 100644 --- a/ompi/mca/coll/accelerator/coll_accelerator_module.c +++ b/ompi/mca/coll/accelerator/coll_accelerator_module.c @@ -82,6 +82,12 @@ mca_coll_accelerator_comm_query(struct ompi_communicator_t *comm, return NULL; } + if (OMPI_COMM_CHECK_ASSERT_NO_ACCEL_BUF(comm)) { + opal_output_verbose(10, ompi_coll_base_framework.framework_output, + "coll:accelerator:comm_query: NO_ACCEL_BUF assertion set: disqualifying myself"); + return NULL; + } + accelerator_module = OBJ_NEW(mca_coll_accelerator_module_t); if (NULL == accelerator_module) { return NULL; diff --git a/ompi/mca/common/ompio/common_ompio_buffer.c b/ompi/mca/common/ompio/common_ompio_buffer.c index a3875e28dd5..c8ce40c0561 100644 --- a/ompi/mca/common/ompio/common_ompio_buffer.c +++ b/ompi/mca/common/ompio/common_ompio_buffer.c @@ -11,6 +11,7 @@ * All rights reserved. * Copyright (c) 2008-2019 University of Houston. All rights reserved. * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. All Rights reserved. + * Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -47,7 +48,11 @@ void mca_common_ompio_check_gpu_buf ( ompio_file_t *fh, const void *buf, int *is *is_gpu=0; *is_managed=0; - + + if (fh->f_fh->f_flags & OMPI_FILE_ASSERT_NO_ACCEL_BUF) { + return; + } + if (0 < opal_accelerator.check_addr(buf, &dev_id, &flags)) { *is_gpu = 1; if (flags & MCA_ACCELERATOR_FLAGS_UNIFIED_MEMORY) { @@ -62,16 +67,14 @@ static void* mca_common_ompio_buffer_alloc_seg ( void*ctx, size_t *size ) { char *buf=NULL; size_t realsize, numpages; - uint64_t flags = 0; - int dev_id; numpages = (*size + mca_common_ompio_pagesize -1 )/mca_common_ompio_pagesize; realsize = numpages * mca_common_ompio_pagesize; - buf = malloc ( realsize); + buf = malloc (realsize); - if (NULL != buf && 0 == opal_accelerator.check_addr(buf, &dev_id, &flags)) { - opal_accelerator.host_register(dev_id, (void *)buf, realsize); + if (NULL != buf) { + opal_accelerator.host_register(MCA_ACCELERATOR_NO_DEVICE_ID, (void *)buf, realsize); } *size = realsize; diff --git a/ompi/mpi/c/intercomm_merge.c b/ompi/mpi/c/intercomm_merge.c index 7abafcda5f9..4d09f159612 100644 --- a/ompi/mpi/c/intercomm_merge.c +++ b/ompi/mpi/c/intercomm_merge.c @@ -140,9 +140,14 @@ int MPI_Intercomm_merge(MPI_Comm intercomm, int high, goto exit; } + ompi_info_memkind_assert_type type; newcomp->super.s_info = OBJ_NEW(opal_info_t); ompi_info_memkind_copy_or_set (&intercomm->instance->super, &newcomp->super, - &ompi_mpi_info_null.info.super); + &ompi_mpi_info_null.info.super, &type); + if (OMPI_INFO_MEMKIND_ASSERT_NO_ACCEL == type) { + newcomp->c_assertions |= OMPI_COMM_ASSERT_NO_ACCEL_BUF; + } + exit: diff --git a/ompi/win/win.c b/ompi/win/win.c index aba941a5082..87556a44c24 100644 --- a/ompi/win/win.c +++ b/ompi/win/win.c @@ -169,7 +169,8 @@ static int alloc_window(struct ompi_communicator_t *comm, opal_info_t *info, int if (info) { opal_info_dup(info, &(win->super.s_info)); } - ompi_info_memkind_copy_or_set (&comm->instance->super, &win->super, info); + ompi_info_memkind_assert_type type; + ompi_info_memkind_copy_or_set (&comm->instance->super, &win->super, info, &type); ret = opal_info_get_value_enum (win->super.s_info, "accumulate_ops", &acc_ops, OMPI_WIN_ACCUMULATE_OPS_SAME_OP_NO_OP,