From 9628d5b603a573a6aeb99accae08ab2e7ac9f8dd Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Thu, 9 Feb 2023 10:24:49 -0600 Subject: [PATCH] Introduce support for local complete --- content/shmem_local_complete.tex | 52 +++++++++++++++++++++ example_code/shmem_local_complete_example.c | 40 ++++++++++++++++ main_spec.tex | 15 ++++-- 3 files changed, 104 insertions(+), 3 deletions(-) create mode 100644 content/shmem_local_complete.tex create mode 100644 example_code/shmem_local_complete_example.c diff --git a/content/shmem_local_complete.tex b/content/shmem_local_complete.tex new file mode 100644 index 00000000..b2ded37c --- /dev/null +++ b/content/shmem_local_complete.tex @@ -0,0 +1,52 @@ +\apisummary{ + Waits for data to be copied out of the \VAR{source} array on all + outstanding non-blocking \OPR{Put} and non-blocking \OPR{put-with-signal} + issued by a \ac{PE}. +} + +\begin{apidefinition} + +\begin{Csynopsis} +void @\FuncDecl{shmem\_local\_complete}@(void); +void @\FuncDecl{shmem\_ctx\_local\_complete}@(shmem_ctx_t ctx); +\end{Csynopsis} + +\begin{apiarguments} + \apiargument{IN}{ctx}{A context handle specifying the context on which to + perform the operation. When this argument is not provided, the operation is + performed on the default context.} +\end{apiarguments} + +\apidescription{ + The \FUNC{shmem\_local\_complete} routine ensures local completion of all + non-blocking \OPR{Put} and non-blocking \OPR{put-with-signal} operations + issued by a \ac{PE}. Local completion guarantees the reusability of the + \VAR{source} buffers associated with a \ac{PE} issuing the operation. + Local completion does not guarantee any ordering and/or delivery of + completion with any visibility guarantees on all \acp{PE}. + Return from \FUNC{shmem\_local\_complete} just guarantees that the data has + been copied out of the \VAR{source} array on all previously posted + non-blocking \OPR{Put} and non-blocking \OPR{put-with-signal} operations + in the local \ac{PE}. Memory ordering routines as supported in + Section~\ref{subsec:memory_order} is still required to provide + mechanims to ensure ordering and/or delivery of completions on the + non-blocking \OPR{Put} and non-blocking \OPR{put-with-signal} operations. +} + + +\apireturnvalues{ + None. +} + +\begin{apiexamples} + +\apicexample + {The following example uses \FUNC{shmem\_quiet} in a \Cstd[11] program: } + {./example_code/shmem_local_complete_example.c} + {\FUNC{shmem\_local\_complete} allows reusing the \VAR{source} buffer + without waiting for the completion and global visibility on target process + \VAR{tpe}} +\end{apiexamples} + +\end{apidefinition} + diff --git a/example_code/shmem_local_complete_example.c b/example_code/shmem_local_complete_example.c new file mode 100644 index 00000000..15ea4664 --- /dev/null +++ b/example_code/shmem_local_complete_example.c @@ -0,0 +1,40 @@ +#include +#include + +#define SIZE 10 + +int main(void) { + int count = 0; + shmem_init(); + static int64_t sig_addr = 0; + int *dest = shmem_malloc (SIZE * sizeof(int)); + int *source = shmem_malloc (SIZE * sizeof(int)); + int mype = shmem_my_pe(); + int npes = shmem_n_pes(); + + for (int i = 0; i < SIZE; i++) { + source[i] = count; + dest[i] = 0; + } + + if (mype == 0) { + for (int tpe = 1; tpe < npes; tpe++) { + shmem_put_signal_nbi(dest, source, SIZE, &sig_addr, 1, SHMEM_SIGNAL_SET, tpe); + shmem_local_complete(); + count++; + for (int i = 0; i < SIZE; i++) { + source[i] = count; + } + } + } else { + shmem_wait_until(&sig_addr, SHMEM_CMP_EQ, 1); + for (int i = 0; i < SIZE; i++) { + if (dest[i] != mype) { + count++; + } + } + if (count) fprintf(stderr, "Program Error\n"); + } + shmem_finalize(); + return 0; +} diff --git a/main_spec.tex b/main_spec.tex index f657471f..b3a5a776 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -486,9 +486,18 @@ \subsubsection{Synchronization and Communication Ordering in OpenSHMEM} \input{content/synchronization_model.tex} - - - +\color{teal} +\subsection{Local Completion Operations}\label{sec:lcomplete} +This section specifies the OpenSHMEM support for \OPR{local-complete} +operation. The \OPR{local-complete} operation provide a method for reusing +the \VAR{source} buffers associated with the process initiating the +non-blocking remote memory access routines and signaling operations as +specified in Section~\ref{subsec:shmem_put_nbi} and +Section~\ref{subsec:shmem_put_signal_nbi} respectively. + +\subsubsection{\textbf{SHMEM\_LOCAL\_COMPLETE}}\label{subsec:shmem_local_complete} +\input{content/shmem_local_complete.tex} +\color{black} \subsection{Distributed Locking Routines} The following section discusses \openshmem locks as a mechanism to provide