diff --git a/include/shmem.h.in.tpl b/include/shmem.h.in.tpl index 1df1ba8..09e8a81 100644 --- a/include/shmem.h.in.tpl +++ b/include/shmem.h.in.tpl @@ -88,9 +88,9 @@ typedef void* shmem_team_t; typedef struct { int num_contexts; } shmem_team_config_t; -#define SHMEM_TEAM_WORLD (shmem_team_t) 0x90000 -#define SHMEM_TEAM_SHARED (shmem_team_t) 0x90001 -#define SHMEM_TEAM_INVALID NULL +#define SHMEM_TEAM_WORLD ((shmem_team_t)0x90000) +#define SHMEM_TEAM_SHARED ((shmem_team_t)0x90001) +#define SHMEM_TEAM_INVALID ((shmem_team_t)NULL) #define SHMEM_TEAM_NUM_CONTEXTS 0x091001L @@ -102,10 +102,10 @@ typedef struct { #define SHMEM_CTX_SERIALIZED 0x001001L #define SHMEM_CTX_PRIVATE 0x001002L #define SHMEM_CTX_NOSTORE 0x001003L -#define SHMEM_CTX_INVALID (-1L) typedef void* shmem_ctx_t; -#define SHMEM_CTX_DEFAULT (shmem_ctx_t) 0x80000 +#define SHMEM_CTX_DEFAULT ((shmem_ctx_t)0x80000) +#define SHMEM_CTX_INVALID ((shmem_ctx_t)NULL) /* Signaling Operations */ #define SHMEM_SIGNAL_SET 0 @@ -339,6 +339,9 @@ void shmem_clear_lock(long *lock); void shmem_set_lock(long *lock); int shmem_test_lock(long *lock); +/* -- Profiling Interface -- */ +void shmem_pcontrol(int level, ...); + /* -- Cache Management -- */ /* (deprecated APIs) */ void shmem_clear_cache_inv(void); diff --git a/include/shmem_amo_bitws_typed.h.tpl b/include/shmem_amo_bitws_typed.h.tpl index a7025b9..ab23142 100644 --- a/include/shmem_amo_bitws_typed.h.tpl +++ b/include/shmem_amo_bitws_typed.h.tpl @@ -13,14 +13,14 @@ TYPE shmem_ctx_TYPENAME_atomic_fetch_xor(shmem_ctx_t ctx, TYPE * dest, TYPE valu void shmem_TYPENAME_atomic_xor(TYPE * dest, TYPE value, int pe); void shmem_ctx_TYPENAME_atomic_xor(shmem_ctx_t ctx, TYPE * dest, TYPE value, int pe); -TYPE shmem_TYPENAME_atomic_fetch_and_nbi(TYPE * fetch, TYPE * dest, TYPE value, int pe); -TYPE shmem_ctx_TYPENAME_atomic_fetch_and_nbi(shmem_ctx_t ctx, TYPE * fetch, TYPE * dest, TYPE value, +void shmem_TYPENAME_atomic_fetch_and_nbi(TYPE * fetch, TYPE * dest, TYPE value, int pe); +void shmem_ctx_TYPENAME_atomic_fetch_and_nbi(shmem_ctx_t ctx, TYPE * fetch, TYPE * dest, TYPE value, int pe); -TYPE shmem_TYPENAME_atomic_fetch_or_nbi(TYPE * fetch, TYPE * dest, TYPE value, int pe); -TYPE shmem_ctx_TYPENAME_atomic_fetch_or_nbi(shmem_ctx_t ctx, TYPE * fetch, TYPE * dest, TYPE value, +void shmem_TYPENAME_atomic_fetch_or_nbi(TYPE * fetch, TYPE * dest, TYPE value, int pe); +void shmem_ctx_TYPENAME_atomic_fetch_or_nbi(shmem_ctx_t ctx, TYPE * fetch, TYPE * dest, TYPE value, int pe); -TYPE shmem_TYPENAME_atomic_fetch_xor_nbi(TYPE * fetch, TYPE * dest, TYPE value, int pe); -TYPE shmem_ctx_TYPENAME_atomic_fetch_xor_nbi(shmem_ctx_t ctx, TYPE * fetch, TYPE * dest, TYPE value, +void shmem_TYPENAME_atomic_fetch_xor_nbi(TYPE * fetch, TYPE * dest, TYPE value, int pe); +void shmem_ctx_TYPENAME_atomic_fetch_xor_nbi(shmem_ctx_t ctx, TYPE * fetch, TYPE * dest, TYPE value, int pe); /* TPL_BLOCK_END */ diff --git a/include/shmem_amo_ext_typed.h.tpl b/include/shmem_amo_ext_typed.h.tpl index 427cde4..db9fa3c 100644 --- a/include/shmem_amo_ext_typed.h.tpl +++ b/include/shmem_amo_ext_typed.h.tpl @@ -7,11 +7,11 @@ void shmem_ctx_TYPENAME_atomic_set(shmem_ctx_t ctx, TYPE * dest, TYPE value, int TYPE shmem_TYPENAME_atomic_swap(TYPE * dest, TYPE value, int pe); TYPE shmem_ctx_TYPENAME_atomic_swap(shmem_ctx_t ctx, TYPE * dest, TYPE value, int pe); -TYPE shmem_TYPENAME_atomic_fetch_nbi(TYPE * fetch, const TYPE * source, int pe); -TYPE shmem_ctx_TYPENAME_atomic_fetch_nbi(shmem_ctx_t ctx, TYPE * fetch, const TYPE * source, +void shmem_TYPENAME_atomic_fetch_nbi(TYPE * fetch, const TYPE * source, int pe); +void shmem_ctx_TYPENAME_atomic_fetch_nbi(shmem_ctx_t ctx, TYPE * fetch, const TYPE * source, int pe); -TYPE shmem_TYPENAME_atomic_swap_nbi(TYPE * fetch, TYPE * dest, TYPE value, int pe); -TYPE shmem_ctx_TYPENAME_atomic_swap_nbi(shmem_ctx_t ctx, TYPE * fetch, TYPE * dest, TYPE value, +void shmem_TYPENAME_atomic_swap_nbi(TYPE * fetch, TYPE * dest, TYPE value, int pe); +void shmem_ctx_TYPENAME_atomic_swap_nbi(shmem_ctx_t ctx, TYPE * fetch, TYPE * dest, TYPE value, int pe); /* Deprecated APIs start */ TYPE shmem_TYPENAME_fetch(const TYPE * source, int pe); diff --git a/include/shmem_amo_std_typed.h.tpl b/include/shmem_amo_std_typed.h.tpl index 059ab3c..32c9a8d 100644 --- a/include/shmem_amo_std_typed.h.tpl +++ b/include/shmem_amo_std_typed.h.tpl @@ -12,14 +12,14 @@ TYPE shmem_ctx_TYPENAME_atomic_fetch_add(shmem_ctx_t ctx, TYPE * dest, TYPE valu void shmem_TYPENAME_atomic_add(TYPE * dest, TYPE value, int pe); void shmem_ctx_TYPENAME_atomic_add(shmem_ctx_t ctx, TYPE * dest, TYPE value, int pe); -TYPE shmem_TYPENAME_atomic_compare_swap_nbi(TYPE * fetch,TYPE * dest, TYPE cond, TYPE value, +void shmem_TYPENAME_atomic_compare_swap_nbi(TYPE * fetch,TYPE * dest, TYPE cond, TYPE value, int pe); -TYPE shmem_ctx_TYPENAME_atomic_compare_swap_nbi(shmem_ctx_t ctx, TYPE * fetch, TYPE * dest, +void shmem_ctx_TYPENAME_atomic_compare_swap_nbi(shmem_ctx_t ctx, TYPE * fetch, TYPE * dest, TYPE cond, TYPE value, int pe); -TYPE shmem_TYPENAME_atomic_fetch_inc_nbi(TYPE * fetch, TYPE * dest, int pe); -TYPE shmem_ctx_TYPENAME_atomic_fetch_inc_nbi(shmem_ctx_t ctx, TYPE * fetch, TYPE * dest, int pe); -TYPE shmem_TYPENAME_atomic_fetch_add_nbi(TYPE * fetch, TYPE * dest, TYPE value, int pe); -TYPE shmem_ctx_TYPENAME_atomic_fetch_add_nbi(shmem_ctx_t ctx, TYPE * fetch, TYPE * dest, TYPE value, +void shmem_TYPENAME_atomic_fetch_inc_nbi(TYPE * fetch, TYPE * dest, int pe); +void shmem_ctx_TYPENAME_atomic_fetch_inc_nbi(shmem_ctx_t ctx, TYPE * fetch, TYPE * dest, int pe); +void shmem_TYPENAME_atomic_fetch_add_nbi(TYPE * fetch, TYPE * dest, TYPE value, int pe); +void shmem_ctx_TYPENAME_atomic_fetch_add_nbi(shmem_ctx_t ctx, TYPE * fetch, TYPE * dest, TYPE value, int pe); /* Deprecated APIs start */ TYPE shmem_TYPENAME_cswap(TYPE * dest, TYPE cond, TYPE value, int pe); diff --git a/include/shmem_p2p_typed.h.tpl b/include/shmem_p2p_typed.h.tpl index a8d898e..1dcef15 100644 --- a/include/shmem_p2p_typed.h.tpl +++ b/include/shmem_p2p_typed.h.tpl @@ -19,7 +19,7 @@ int shmem_TYPENAME_test_all(TYPE * ivars, size_t nelems, const int *status, int size_t shmem_TYPENAME_test_any(TYPE * ivars, size_t nelems, const int *status, int cmp, TYPE cmp_value); size_t shmem_TYPENAME_test_some(TYPE * ivars, size_t nelems, size_t *indices, const int *status, - int cmp, TYPE * cmp_values); + int cmp, TYPE cmp_value); int shmem_TYPENAME_test_all_vector(TYPE * ivars, size_t nelems, const int *status, int cmp, TYPE * cmp_values); size_t shmem_TYPENAME_test_any_vector(TYPE * ivars, size_t nelems, const int *status, int cmp, diff --git a/include/shmem_reduce_bitws_team_typed.h.tpl b/include/shmem_reduce_bitws_team_typed.h.tpl index dedbfd5..80093d9 100644 --- a/include/shmem_reduce_bitws_team_typed.h.tpl +++ b/include/shmem_reduce_bitws_team_typed.h.tpl @@ -1,8 +1,8 @@ /* The following lines are automatically generated. DO NOT EDIT. */ /* TPL_BLOCK_START */ -int shmem_TYPENAME_and_reduce(shmem_team_t team, TYPE * dest, const TYPE * source, int nreduce); -int shmem_TYPENAME_or_reduce(shmem_team_t team, TYPE * dest, const TYPE * source, int nreduce); -int shmem_TYPENAME_xor_reduce(shmem_team_t team, TYPE * dest, const TYPE * source, int nreduce); +int shmem_TYPENAME_and_reduce(shmem_team_t team, TYPE * dest, const TYPE * source, size_t nreduce); +int shmem_TYPENAME_or_reduce(shmem_team_t team, TYPE * dest, const TYPE * source, size_t nreduce); +int shmem_TYPENAME_xor_reduce(shmem_team_t team, TYPE * dest, const TYPE * source, size_t nreduce); /* TPL_BLOCK_END */ /* *INDENT-OFF* */ diff --git a/include/shmem_reduce_minmax_team_typed.h.tpl b/include/shmem_reduce_minmax_team_typed.h.tpl index eb06485..4a960a1 100644 --- a/include/shmem_reduce_minmax_team_typed.h.tpl +++ b/include/shmem_reduce_minmax_team_typed.h.tpl @@ -1,7 +1,7 @@ /* The following lines are automatically generated. DO NOT EDIT. */ /* TPL_BLOCK_START */ -int shmem_TYPENAME_min_reduce(shmem_team_t team, TYPE * dest, const TYPE * source, int nreduce); -int shmem_TYPENAME_max_reduce(shmem_team_t team, TYPE * dest, const TYPE * source, int nreduce); +int shmem_TYPENAME_min_reduce(shmem_team_t team, TYPE * dest, const TYPE * source, size_t nreduce); +int shmem_TYPENAME_max_reduce(shmem_team_t team, TYPE * dest, const TYPE * source, size_t nreduce); /* TPL_BLOCK_END */ /* *INDENT-OFF* */ diff --git a/include/shmem_reduce_sumprod_team_typed.h.tpl b/include/shmem_reduce_sumprod_team_typed.h.tpl index 064bd54..15ac382 100644 --- a/include/shmem_reduce_sumprod_team_typed.h.tpl +++ b/include/shmem_reduce_sumprod_team_typed.h.tpl @@ -1,7 +1,7 @@ /* The following lines are automatically generated. DO NOT EDIT. */ /* TPL_BLOCK_START */ -int shmem_TYPENAME_sum_reduce(shmem_team_t team, TYPE * dest, const TYPE * source, int nreduce); -int shmem_TYPENAME_prod_reduce(shmem_team_t team, TYPE * dest, const TYPE * source, int nreduce); +int shmem_TYPENAME_sum_reduce(shmem_team_t team, TYPE * dest, const TYPE * source, size_t nreduce); +int shmem_TYPENAME_prod_reduce(shmem_team_t team, TYPE * dest, const TYPE * source, size_t nreduce); /* TPL_BLOCK_END */ /* *INDENT-OFF* */ diff --git a/src/include/oshmpi_impl.h b/src/include/oshmpi_impl.h index e8ecd61..74d4a8d 100644 --- a/src/include/oshmpi_impl.h +++ b/src/include/oshmpi_impl.h @@ -481,7 +481,7 @@ OSHMPI_STATIC_INLINE_PREFIX void OSHMPI_alltoalls_team(OSHMPI_team_t * team, voi ptrdiff_t sst, size_t nelems, MPI_Datatype mpi_type); OSHMPI_STATIC_INLINE_PREFIX void OSHMPI_allreduce_team(OSHMPI_team_t * team, void *dest, - const void *source, int count, + const void *source, size_t count, MPI_Datatype mpi_type, MPI_Op op); /* for deprecated active-set-based collectives */ OSHMPI_STATIC_INLINE_PREFIX void OSHMPI_barrier(int PE_start, int logPE_stride, int PE_size); diff --git a/src/internal/coll_impl.h b/src/internal/coll_impl.h index c06cf05..243168d 100644 --- a/src/internal/coll_impl.h +++ b/src/internal/coll_impl.h @@ -52,8 +52,19 @@ OSHMPI_STATIC_INLINE_PREFIX void OSHMPI_broadcast_team(OSHMPI_team_t * team, voi const void *source, size_t nelems, MPI_Datatype mpi_type, int PE_root) { - OSHMPI_am_progress_mpi_bcast(PE_root == OSHMPI_global.team_world_my_pe ? (void *) source : dest, + MPI_Aint lb, extent; + int typesize; + + OSHMPI_am_progress_mpi_bcast(PE_root == team->my_pe ? (void *) source : dest, nelems, mpi_type, PE_root, team->comm); + + if (PE_root == team->my_pe) { + OSHMPI_CALLMPI(MPI_Type_get_extent(mpi_type, &lb, &extent)); + OSHMPI_ASSERT(lb == 0); + OSHMPI_CALLMPI(MPI_Type_size(mpi_type, &typesize)); + OSHMPI_ASSERT(extent == typesize); + memcpy(dest, source, nelems * (size_t) extent); + } } OSHMPI_STATIC_INLINE_PREFIX void OSHMPI_collect_team(OSHMPI_team_t * team, void *dest, @@ -140,12 +151,12 @@ OSHMPI_STATIC_INLINE_PREFIX void OSHMPI_alltoalls_team(OSHMPI_team_t * team, voi } OSHMPI_STATIC_INLINE_PREFIX void OSHMPI_allreduce_team(OSHMPI_team_t * team, void *dest, - const void *source, int count, + const void *source, size_t nreduce, MPI_Datatype mpi_type, MPI_Op op) { /* source and dest may be the same array, but may not be overlapping. */ OSHMPI_am_progress_mpi_allreduce((source == dest) ? MPI_IN_PLACE : source, - dest, count, mpi_type, op, team->comm); + dest, (int) nreduce, mpi_type, op, team->comm); } #endif /* INTERNAL_COLL_IMPL_H */ diff --git a/src/internal/ctx_impl.c b/src/internal/ctx_impl.c index cf53883..812a798 100644 --- a/src/internal/ctx_impl.c +++ b/src/internal/ctx_impl.c @@ -8,9 +8,6 @@ void OSHMPI_ctx_destroy(OSHMPI_ctx_t * ctx) { - if (ctx == SHMEM_CTX_DEFAULT) - return; - OSHMPIU_ATOMIC_FLAG_STORE(ctx->used_flag, 0); /* Do not free window. diff --git a/src/internal/strided_impl.h b/src/internal/strided_impl.h index ea017f0..3dc4926 100644 --- a/src/internal/strided_impl.h +++ b/src/internal/strided_impl.h @@ -140,6 +140,8 @@ OSHMPI_STATIC_INLINE_PREFIX void OSHMPI_create_strided_dtype(size_t nelems, ptrd /* Slow path: create a new datatype and cache it */ MPI_Datatype vtype = MPI_DATATYPE_NULL; size_t elem_bytes = 0; + MPI_Aint lb, extent; + int typesize; OSHMPI_CALLMPI(MPI_Type_vector((int) nelems, 1, (int) stride, mpi_type, &vtype)); @@ -148,10 +150,19 @@ OSHMPI_STATIC_INLINE_PREFIX void OSHMPI_create_strided_dtype(size_t nelems, ptrd * Extent can be negative in MPI, however, we do not expect such case in OSHMPI. * Thus skip any negative one */ if (required_ext_nelems > 0) { - if (mpi_type == OSHMPI_MPI_COLL32_T) + if (mpi_type == OSHMPI_MPI_COLL_BYTE_T) + elem_bytes = 1; + else if (mpi_type == OSHMPI_MPI_COLL32_T) elem_bytes = 4; - else + else if (mpi_type == OSHMPI_MPI_COLL64_T) elem_bytes = 8; + else { + OSHMPI_CALLMPI(MPI_Type_get_extent(mpi_type, &lb, &extent)); + OSHMPI_ASSERT(lb == 0); + OSHMPI_CALLMPI(MPI_Type_size(mpi_type, &typesize)); + OSHMPI_ASSERT(extent == typesize); + elem_bytes = (size_t) extent; + } OSHMPI_CALLMPI(MPI_Type_create_resized (vtype, 0, required_ext_nelems * elem_bytes, strided_type)); } else diff --git a/src/shmem/Makefile.mk b/src/shmem/Makefile.mk index db3828d..faad28d 100644 --- a/src/shmem/Makefile.mk +++ b/src/shmem/Makefile.mk @@ -28,6 +28,7 @@ liboshmpi_la_SOURCES += src/shmem/setup.c \ src/shmem/p2p_typed.c \ src/shmem/order.c \ src/shmem/lock.c \ + src/shmem/profile.c \ src/shmem/cache.c EXTRA_DIST += src/shmem/rma_typed.c.tpl \ diff --git a/src/shmem/amo_bitws_typed.c.tpl b/src/shmem/amo_bitws_typed.c.tpl index bf00a94..4b3b2d1 100644 --- a/src/shmem/amo_bitws_typed.c.tpl +++ b/src/shmem/amo_bitws_typed.c.tpl @@ -94,55 +94,43 @@ void shmem_ctx_TYPENAME_atomic_xor(shmem_ctx_t ctx, TYPE * dest, TYPE value, int dest, &value, pe); } -TYPE shmem_TYPENAME_atomic_fetch_and_nbi(TYPE * fetch, TYPE * dest, TYPE value, int pe) +void shmem_TYPENAME_atomic_fetch_and_nbi(TYPE * fetch, TYPE * dest, TYPE value, int pe) { - TYPE oldval; OSHMPI_amo_fetch(SHMEM_CTX_DEFAULT, MPI_TYPE, OSHMPI_AM_MPI_TYPE, sizeof(TYPE), MPI_BAND, - OSHMPI_AM_MPI_BAND, dest, &value, pe, &oldval); - return oldval; + OSHMPI_AM_MPI_BAND, dest, &value, pe, fetch); } -TYPE shmem_ctx_TYPENAME_atomic_fetch_and_nbi(shmem_ctx_t ctx, TYPE * fetch, TYPE * dest, TYPE value, +void shmem_ctx_TYPENAME_atomic_fetch_and_nbi(shmem_ctx_t ctx, TYPE * fetch, TYPE * dest, TYPE value, int pe) { - TYPE oldval; OSHMPI_amo_fetch(ctx, MPI_TYPE, OSHMPI_AM_MPI_TYPE, sizeof(TYPE), MPI_BAND, - OSHMPI_AM_MPI_BAND, dest, &value, pe, &oldval); - return oldval; + OSHMPI_AM_MPI_BAND, dest, &value, pe, fetch); } -TYPE shmem_TYPENAME_atomic_fetch_or_nbi(TYPE * fetch, TYPE * dest, TYPE value, int pe) +void shmem_TYPENAME_atomic_fetch_or_nbi(TYPE * fetch, TYPE * dest, TYPE value, int pe) { - TYPE oldval; OSHMPI_amo_fetch(SHMEM_CTX_DEFAULT, MPI_TYPE, OSHMPI_AM_MPI_TYPE, sizeof(TYPE), MPI_BOR, - OSHMPI_AM_MPI_BOR, dest, &value, pe, &oldval); - return oldval; + OSHMPI_AM_MPI_BOR, dest, &value, pe, fetch); } -TYPE shmem_ctx_TYPENAME_atomic_fetch_or_nbi(shmem_ctx_t ctx, TYPE * fetch, TYPE * dest, TYPE value, +void shmem_ctx_TYPENAME_atomic_fetch_or_nbi(shmem_ctx_t ctx, TYPE * fetch, TYPE * dest, TYPE value, int pe) { - TYPE oldval; OSHMPI_amo_fetch(ctx, MPI_TYPE, OSHMPI_AM_MPI_TYPE, sizeof(TYPE), MPI_BOR, OSHMPI_AM_MPI_BOR, - dest, &value, pe, &oldval); - return oldval; + dest, &value, pe, fetch); } -TYPE shmem_TYPENAME_atomic_fetch_xor_nbi(TYPE * fetch, TYPE * dest, TYPE value, int pe) +void shmem_TYPENAME_atomic_fetch_xor_nbi(TYPE * fetch, TYPE * dest, TYPE value, int pe) { - TYPE oldval; OSHMPI_amo_fetch(SHMEM_CTX_DEFAULT, MPI_TYPE, OSHMPI_AM_MPI_TYPE, sizeof(TYPE), MPI_BXOR, - OSHMPI_AM_MPI_BOR, dest, &value, pe, &oldval); - return oldval; + OSHMPI_AM_MPI_BXOR, dest, &value, pe, fetch); } -TYPE shmem_ctx_TYPENAME_atomic_fetch_xor_nbi(shmem_ctx_t ctx, TYPE * fetch, TYPE * dest, TYPE value, +void shmem_ctx_TYPENAME_atomic_fetch_xor_nbi(shmem_ctx_t ctx, TYPE * fetch, TYPE * dest, TYPE value, int pe) { - TYPE oldval; - OSHMPI_amo_fetch(ctx, MPI_TYPE, OSHMPI_AM_MPI_TYPE, sizeof(TYPE), MPI_BOR, OSHMPI_AM_MPI_BXOR, - dest, &value, pe, &oldval); - return oldval; + OSHMPI_amo_fetch(ctx, MPI_TYPE, OSHMPI_AM_MPI_TYPE, sizeof(TYPE), MPI_BXOR, OSHMPI_AM_MPI_BXOR, + dest, &value, pe, fetch); } /* TPL_BLOCK_END */ diff --git a/src/shmem/amo_ext_typed.c.tpl b/src/shmem/amo_ext_typed.c.tpl index 4d922ba..10cdda7 100644 --- a/src/shmem/amo_ext_typed.c.tpl +++ b/src/shmem/amo_ext_typed.c.tpl @@ -55,37 +55,29 @@ TYPE shmem_ctx_TYPENAME_atomic_swap(shmem_ctx_t ctx, TYPE * dest, TYPE value, in return oldval; } -TYPE shmem_TYPENAME_atomic_fetch_nbi(TYPE * fetch, const TYPE * source, int pe) +void shmem_TYPENAME_atomic_fetch_nbi(TYPE * fetch, const TYPE * source, int pe) { - TYPE oldval; OSHMPI_amo_fetch(SHMEM_CTX_DEFAULT, MPI_TYPE, OSHMPI_AM_MPI_TYPE, sizeof(TYPE), MPI_NO_OP, - OSHMPI_AM_MPI_NO_OP, (void *) source, NULL, pe, &oldval); - return oldval; + OSHMPI_AM_MPI_NO_OP, (void *) source, NULL, pe, fetch); } -TYPE shmem_ctx_TYPENAME_atomic_fetch_nbi(shmem_ctx_t ctx, TYPE * fetch, const TYPE * source, int pe) +void shmem_ctx_TYPENAME_atomic_fetch_nbi(shmem_ctx_t ctx, TYPE * fetch, const TYPE * source, int pe) { - TYPE oldval; OSHMPI_amo_fetch(ctx, MPI_TYPE, OSHMPI_AM_MPI_TYPE, sizeof(TYPE), MPI_NO_OP, - OSHMPI_AM_MPI_NO_OP, (void *) source, NULL, pe, &oldval); - return oldval; + OSHMPI_AM_MPI_NO_OP, (void *) source, NULL, pe, fetch); } -TYPE shmem_TYPENAME_atomic_swap_nbi(TYPE * fetch, TYPE * dest, TYPE value, int pe) +void shmem_TYPENAME_atomic_swap_nbi(TYPE * fetch, TYPE * dest, TYPE value, int pe) { - TYPE oldval; OSHMPI_amo_fetch(SHMEM_CTX_DEFAULT, MPI_TYPE, OSHMPI_AM_MPI_TYPE, sizeof(TYPE), MPI_REPLACE, - OSHMPI_AM_MPI_REPLACE, dest, &value, pe, &oldval); - return oldval; + OSHMPI_AM_MPI_REPLACE, dest, &value, pe, fetch); } -TYPE shmem_ctx_TYPENAME_atomic_swap_nbi(shmem_ctx_t ctx, TYPE * fetch, TYPE * dest, TYPE value, +void shmem_ctx_TYPENAME_atomic_swap_nbi(shmem_ctx_t ctx, TYPE * fetch, TYPE * dest, TYPE value, int pe) { - TYPE oldval; OSHMPI_amo_fetch(ctx, MPI_TYPE, OSHMPI_AM_MPI_TYPE, sizeof(TYPE), MPI_REPLACE, - OSHMPI_AM_MPI_REPLACE, dest, &value, pe, &oldval); - return oldval; + OSHMPI_AM_MPI_REPLACE, dest, &value, pe, fetch); } /* Deprecated APIs start */ diff --git a/src/shmem/amo_std_typed.c.tpl b/src/shmem/amo_std_typed.c.tpl index 5ed5808..2229c77 100644 --- a/src/shmem/amo_std_typed.c.tpl +++ b/src/shmem/amo_std_typed.c.tpl @@ -85,55 +85,45 @@ void shmem_ctx_TYPENAME_atomic_add(shmem_ctx_t ctx, TYPE * dest, TYPE value, int dest, &value, pe); } -TYPE shmem_TYPENAME_atomic_compare_swap_nbi(TYPE * fetch, TYPE * dest, TYPE cond, TYPE value, +void shmem_TYPENAME_atomic_compare_swap_nbi(TYPE * fetch, TYPE * dest, TYPE cond, TYPE value, int pe) { - TYPE oldval; OSHMPI_amo_cswap(SHMEM_CTX_DEFAULT, MPI_TYPE, OSHMPI_AM_MPI_TYPE, sizeof(TYPE), - dest, &cond, &value, pe, &oldval); - return oldval; + dest, &cond, &value, pe, fetch); } -TYPE shmem_ctx_TYPENAME_atomic_compare_swap_nbi(shmem_ctx_t ctx, TYPE * fetch, TYPE * dest, +void shmem_ctx_TYPENAME_atomic_compare_swap_nbi(shmem_ctx_t ctx, TYPE * fetch, TYPE * dest, TYPE cond, TYPE value, int pe) { - TYPE oldval; OSHMPI_amo_cswap(ctx, MPI_TYPE, OSHMPI_AM_MPI_TYPE, sizeof(TYPE), dest, &cond, &value, pe, - &oldval); - return oldval; + fetch); } -TYPE shmem_TYPENAME_atomic_fetch_inc_nbi(TYPE * fetch, TYPE * dest, int pe) +void shmem_TYPENAME_atomic_fetch_inc_nbi(TYPE * fetch, TYPE * dest, int pe) { - TYPE one = 1, oldval; + TYPE one = 1; OSHMPI_amo_fetch(SHMEM_CTX_DEFAULT, MPI_TYPE, OSHMPI_AM_MPI_TYPE, sizeof(TYPE), - MPI_SUM, OSHMPI_AM_MPI_SUM, dest, &one, pe, &oldval); - return oldval; + MPI_SUM, OSHMPI_AM_MPI_SUM, dest, &one, pe, fetch); } -TYPE shmem_ctx_TYPENAME_atomic_fetch_inc_nbi(shmem_ctx_t ctx, TYPE * fetch, TYPE * dest, int pe) +void shmem_ctx_TYPENAME_atomic_fetch_inc_nbi(shmem_ctx_t ctx, TYPE * fetch, TYPE * dest, int pe) { - TYPE one = 1, oldval; + TYPE one = 1; OSHMPI_amo_fetch(ctx, MPI_TYPE, OSHMPI_AM_MPI_TYPE, sizeof(TYPE), MPI_SUM, OSHMPI_AM_MPI_SUM, - dest, &one, pe, &oldval); - return oldval; + dest, &one, pe, fetch); } -TYPE shmem_TYPENAME_atomic_fetch_add_nbi(TYPE * fetch, TYPE * dest, TYPE value, int pe) +void shmem_TYPENAME_atomic_fetch_add_nbi(TYPE * fetch, TYPE * dest, TYPE value, int pe) { - TYPE oldval; OSHMPI_amo_fetch(SHMEM_CTX_DEFAULT, MPI_TYPE, OSHMPI_AM_MPI_TYPE, sizeof(TYPE), - MPI_SUM, OSHMPI_AM_MPI_SUM, dest, &value, pe, &oldval); - return oldval; + MPI_SUM, OSHMPI_AM_MPI_SUM, dest, &value, pe, fetch); } -TYPE shmem_ctx_TYPENAME_atomic_fetch_add_nbi(shmem_ctx_t ctx, TYPE * fetch, TYPE * dest, TYPE value, +void shmem_ctx_TYPENAME_atomic_fetch_add_nbi(shmem_ctx_t ctx, TYPE * fetch, TYPE * dest, TYPE value, int pe) { - TYPE oldval; OSHMPI_amo_fetch(ctx, MPI_TYPE, OSHMPI_AM_MPI_TYPE, sizeof(TYPE), MPI_SUM, OSHMPI_AM_MPI_SUM, - dest, &value, pe, &oldval); - return oldval; + dest, &value, pe, fetch); } /* Deprecated APIs start */ diff --git a/src/shmem/coll.c b/src/shmem/coll.c index d276bef..972f3d2 100644 --- a/src/shmem/coll.c +++ b/src/shmem/coll.c @@ -13,7 +13,10 @@ void shmem_barrier_all(void) int shmem_team_sync(shmem_team_t team) { - OSHMPI_ASSERT(0); + if (team == SHMEM_TEAM_WORLD) { + OSHMPI_sync_all(); + return SHMEM_SUCCESS; + } return SHMEM_OTHER_ERR; } diff --git a/src/shmem/context.c b/src/shmem/context.c index 11a3477..f33e021 100644 --- a/src/shmem/context.c +++ b/src/shmem/context.c @@ -12,23 +12,36 @@ int shmem_ctx_create(long options, shmem_ctx_t * ctx) * We cannot support it in OSHMPI unless MPI exposes the communication * resource management to users (e.g., endpoint). * - * Spec v1.4 defines: an unsuccessful context creation call is not treated + * Spec v1.5 defines: an unsuccessful context creation call is not treated * as an error and the OpenSHMEM library remains in a correct state. */ + *ctx = SHMEM_CTX_INVALID; return SHMEM_NO_CTX; } void shmem_ctx_destroy(shmem_ctx_t ctx) { + if (ctx == SHMEM_CTX_INVALID || ctx == SHMEM_CTX_DEFAULT) { + return; + } OSHMPI_ctx_destroy((OSHMPI_ctx_t *) ctx); } int shmem_team_create_ctx(shmem_team_t team, long options, shmem_ctx_t * ctx) { + if (team == SHMEM_TEAM_INVALID) { + *ctx = SHMEM_CTX_INVALID; + return SHMEM_OTHER_ERR; + } + *ctx = SHMEM_CTX_INVALID; return SHMEM_NO_CTX; } int shmem_ctx_get_team(shmem_ctx_t ctx, shmem_team_t * team) { - OSHMPI_ASSERT(0); + if (ctx == SHMEM_CTX_DEFAULT) { + *team = SHMEM_TEAM_WORLD; + return SHMEM_SUCCESS; + } + *team = SHMEM_TEAM_INVALID; return SHMEM_OTHER_ERR; } diff --git a/src/shmem/mem.c b/src/shmem/mem.c index d556215..1b6f966 100644 --- a/src/shmem/mem.c +++ b/src/shmem/mem.c @@ -39,8 +39,13 @@ void *shmem_align(size_t alignment, size_t size) void *shmem_malloc_with_hints(size_t size, long hints) { - OSHMPI_ASSERT(0); - return NULL; + /* OpenSHMEM 1.5 Section 9.3.2: + * The information provided by the hints is used to optimize for performance by the implementation. + * If the implementation cannot optimize, the behavior is same as shmem_malloc. */ + void *ptr = NULL; + OSHMPI_NOINLINE_RECURSIVE() + ptr = OSHMPI_malloc(size); + return ptr; } void *shmem_calloc(size_t count, size_t size) diff --git a/src/shmem/p2p_typed.c.tpl b/src/shmem/p2p_typed.c.tpl index 645a373..0986df0 100644 --- a/src/shmem/p2p_typed.c.tpl +++ b/src/shmem/p2p_typed.c.tpl @@ -79,7 +79,7 @@ size_t shmem_TYPENAME_test_any(TYPE * ivars, size_t nelems, const int *status, i } size_t shmem_TYPENAME_test_some(TYPE * ivars, size_t nelems, size_t *indices, const int *status, - int cmp, TYPE * cmp_values) + int cmp, TYPE cmp_value) { OSHMPI_ASSERT(0); return SHMEM_OTHER_ERR; diff --git a/src/shmem/profile.c b/src/shmem/profile.c new file mode 100644 index 0000000..b8c32a7 --- /dev/null +++ b/src/shmem/profile.c @@ -0,0 +1,11 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * (C) 2023 by Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ +#include +#include "oshmpi_impl.h" + +void shmem_pcontrol(int level, ...) +{ +} diff --git a/src/shmem/reduce_bitws_team_typed.c.tpl b/src/shmem/reduce_bitws_team_typed.c.tpl index 4285f71..179e537 100644 --- a/src/shmem/reduce_bitws_team_typed.c.tpl +++ b/src/shmem/reduce_bitws_team_typed.c.tpl @@ -10,7 +10,7 @@ #include "oshmpi_impl.h" /* TPL_BLOCK_START */ -int shmem_TYPENAME_and_reduce(shmem_team_t team, TYPE * dest, const TYPE * source, int nreduce) +int shmem_TYPENAME_and_reduce(shmem_team_t team, TYPE * dest, const TYPE * source, size_t nreduce) { OSHMPI_team_t *team_obj; OSHMPI_TEAM_GET_OBJ(team, team_obj); @@ -18,7 +18,7 @@ int shmem_TYPENAME_and_reduce(shmem_team_t team, TYPE * dest, const TYPE * sourc return SHMEM_SUCCESS; } -int shmem_TYPENAME_or_reduce(shmem_team_t team, TYPE * dest, const TYPE * source, int nreduce) +int shmem_TYPENAME_or_reduce(shmem_team_t team, TYPE * dest, const TYPE * source, size_t nreduce) { OSHMPI_team_t *team_obj; OSHMPI_TEAM_GET_OBJ(team, team_obj); @@ -26,7 +26,7 @@ int shmem_TYPENAME_or_reduce(shmem_team_t team, TYPE * dest, const TYPE * source return SHMEM_SUCCESS; } -int shmem_TYPENAME_xor_reduce(shmem_team_t team, TYPE * dest, const TYPE * source, int nreduce) +int shmem_TYPENAME_xor_reduce(shmem_team_t team, TYPE * dest, const TYPE * source, size_t nreduce) { OSHMPI_team_t *team_obj; OSHMPI_TEAM_GET_OBJ(team, team_obj); diff --git a/src/shmem/reduce_minmax_team_typed.c.tpl b/src/shmem/reduce_minmax_team_typed.c.tpl index 39e7d5a..c1f3cf2 100644 --- a/src/shmem/reduce_minmax_team_typed.c.tpl +++ b/src/shmem/reduce_minmax_team_typed.c.tpl @@ -10,7 +10,7 @@ #include "oshmpi_impl.h" /* TPL_BLOCK_START */ -int shmem_TYPENAME_min_reduce(shmem_team_t team, TYPE * dest, const TYPE * source, int nreduce) +int shmem_TYPENAME_min_reduce(shmem_team_t team, TYPE * dest, const TYPE * source, size_t nreduce) { OSHMPI_team_t *team_obj; OSHMPI_TEAM_GET_OBJ(team, team_obj); @@ -18,7 +18,7 @@ int shmem_TYPENAME_min_reduce(shmem_team_t team, TYPE * dest, const TYPE * sourc return SHMEM_SUCCESS; } -int shmem_TYPENAME_max_reduce(shmem_team_t team, TYPE * dest, const TYPE * source, int nreduce) +int shmem_TYPENAME_max_reduce(shmem_team_t team, TYPE * dest, const TYPE * source, size_t nreduce) { OSHMPI_team_t *team_obj; OSHMPI_TEAM_GET_OBJ(team, team_obj); diff --git a/src/shmem/reduce_sumprod_team_typed.c.tpl b/src/shmem/reduce_sumprod_team_typed.c.tpl index 7f37648..9045f67 100644 --- a/src/shmem/reduce_sumprod_team_typed.c.tpl +++ b/src/shmem/reduce_sumprod_team_typed.c.tpl @@ -10,7 +10,7 @@ #include "oshmpi_impl.h" /* TPL_BLOCK_START */ -int shmem_TYPENAME_sum_reduce(shmem_team_t team, TYPE * dest, const TYPE * source, int nreduce) +int shmem_TYPENAME_sum_reduce(shmem_team_t team, TYPE * dest, const TYPE * source, size_t nreduce) { OSHMPI_team_t *team_obj; OSHMPI_TEAM_GET_OBJ(team, team_obj); @@ -18,7 +18,7 @@ int shmem_TYPENAME_sum_reduce(shmem_team_t team, TYPE * dest, const TYPE * sourc return SHMEM_SUCCESS; } -int shmem_TYPENAME_prod_reduce(shmem_team_t team, TYPE * dest, const TYPE * source, int nreduce) +int shmem_TYPENAME_prod_reduce(shmem_team_t team, TYPE * dest, const TYPE * source, size_t nreduce) { OSHMPI_team_t *team_obj; OSHMPI_TEAM_GET_OBJ(team, team_obj);