From 4b9f48db6afc410892341dea6593e57274086781 Mon Sep 17 00:00:00 2001 From: Manjunath Gorentla Venkata Date: Mon, 26 Oct 2020 06:10:07 -0700 Subject: [PATCH 01/27] Adding first draft for nb collective intro, broadcast, and test --- content/nb_collectives_intro.tex | 35 ++++++++++ content/shmem_broadcast_nb.tex | 104 ++++++++++++++++++++++++++++++ content/shmem_collective_test.tex | 35 ++++++++++ main_spec.tex | 10 +++ 4 files changed, 184 insertions(+) create mode 100644 content/nb_collectives_intro.tex create mode 100644 content/shmem_broadcast_nb.tex create mode 100644 content/shmem_collective_test.tex diff --git a/content/nb_collectives_intro.tex b/content/nb_collectives_intro.tex new file mode 100644 index 00000000..3430024b --- /dev/null +++ b/content/nb_collectives_intro.tex @@ -0,0 +1,35 @@ +An OpenSHMEM nonblocking collective operation, like blocking collective +operation, is a group communication operation among the +participants of the team. All participants of the team are required to call the +collective operation. + +\begin{enumerate} + +\item Invocation semantics: The non-blocking collective routine initializes the +buffers, operation type, reduction type, and posts the collective operation. All +participants of the team should call this routine. The routine returns +immediately after posting the operation. + +\item Collective Types: Currently, only the nonblocking alltoall, broadcast, and reduction collective +operations are supported. The reduction operations supported are defined in the +Table \ref{reducetypes}. + +\item Completion semantics: Upon invocation, the collective operations are +posted and returns immediately. A user can learn the status of the collective operations +using the \FUNC{shmem\_collective\_test} routine and can be completed using +the \FUNC{shmem\_collective\_wait} routine. + +\item Threads: While using SHMEM\_THREAD\_MULTIPLE, the \openshmem +programs are allowed to call multiple collective operations on different threads +and the same Team. The collective operations invoked on different threads +are ordered by user-provided tag. When the user does not provide the tag, the +library generates the tag and establishes the order. + +\end{enumerate} + +Note: Like other nonblocking \openshmem operations, the implementations are +expected to asynchronously progress the collective operations. The guidance on +asynchronous progress is provided in Section \ref{subsec:progress}. + + + diff --git a/content/shmem_broadcast_nb.tex b/content/shmem_broadcast_nb.tex new file mode 100644 index 00000000..635af6f0 --- /dev/null +++ b/content/shmem_broadcast_nb.tex @@ -0,0 +1,104 @@ +\apisummary{ + Broadcasts a block of data from one \ac{PE} to one or more destination + \acp{PE}. +} + +\begin{apidefinition} + +%% C11 +\begin{C11synopsis} +int @\FuncDecl{shmem\_broadcast\_nb}@(shmem_team_t team, TYPE *dest, const TYPE +*source, size_t nelems, int PE_root,uint32_t tag, shmem_req_h *request); +\end{C11synopsis} +where \TYPE{} is one of the standard \ac{RMA} types specified by Table \ref{stdrmatypes}. + +%% C/C++ +\begin{Csynopsis} +\end{Csynopsis} +\begin{CsynopsisCol} +int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_broadcast\_nb}@(shmem_team_t team, TYPE +*dest, const TYPE *source, size_t nelems, int PE_root, uint32_t tag, +shmem_req_h *request); +\end{CsynopsisCol} +where \TYPE{} is one of the standard \ac{RMA} types and has a corresponding \TYPENAME{} specified by Table \ref{stdrmatypes}. + +\begin{CsynopsisCol} +int @\FuncDecl{shmem\_broadcastmem\_nb}@(shmem_team_t team, void *dest, const void +*source, size_t nelems, int PE_root, uint32_t tag, shmem_req_h *request); +\end{CsynopsisCol} + +\begin{apiarguments} + +\apiargument{IN}{team}{The team over which to perform the operation.}% + +\apiargument{OUT}{dest}{Symmetric address of destination data object. + The type of \dest{} should match that implied in the SYNOPSIS section.} +\apiargument{IN}{source}{Symmetric address of the source data object. + The type of \source{} should match that implied in the SYNOPSIS section.} +\apiargument{IN}{nelems}{ + The number of elements in \source{} and \dest{} arrays. + For \FUNC{shmem\_broadcastmem\_nb}, elements are bytes; + for \FUNC{shmem\_broadcast\{32,64\}\_nb}, elements are 4 or 8 bytes, + respectively. +} +\apiargument{IN}{PE\_root}{Zero-based ordinal of the \ac{PE}, with respect to + the team, from which the data is copied.} +\apiargument{IN}{tag}{A user defined tag to order the collective operation.} +\apiargument{OUT}{request}{An opaque request handle identifying the collective operation} + + +\end{apiarguments} + +\apidescription{ + \openshmem nonblocking broadcast routines are collective routines over a + valid \openshmem team. + They copy the \source{} data object on the \ac{PE} specified by + \VAR{PE\_root} to the \dest{} data object on the \acp{PE} + participating in the collective operation. + The same \dest{} and \source{} data objects and the same value of + \VAR{PE\_root} must be passed by all \acp{PE} participating in the + collective operation. + + A call to the nonblocking broadcast routine returns immediately without + necessarily completing the operation. The operation is completed after a + call to \FUNC{shmem\_collective\_test} or \FUNC{shmem\_collective\_wait}. + + Like blocking broadcast, before any \ac{PE} calls a broadcast routine, the following + conditions must be ensured: + \begin{itemize} + \item The \dest{} array on all \acp{PE} participating in the broadcast + is ready to accept the broadcast data. + \item All \acp{PE} in the \VAR{team} argument must participate in + the operation. + \item If the \VAR{team} compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID} or is + otherwise invalid, the behavior is undefined. + \item \ac{PE} numbering is relative to the team. The specified + root \ac{PE} must be a valid \ac{PE} number for the team, + between \CONST{0} and \VAR{N$-$1}, where \VAR{N} is the size of + the team. + \end{itemize} + Otherwise, the behavior is undefined. + + Upon completion of a nonblocking broadcast routine, the following are true for the local + \ac{PE}: + \begin{itemize} + \item The \dest{} data object is + updated. + \item The \source{} data object may be safely reused. + \end{itemize} +} + + +\apireturnvalues{ + Zero on successfull posting of the collective + operation; otherwise, nonzero. +} + +\apinotes{ + Team handle error checking and integer return codes are currently undefined. + Implementations may define these behaviors as needed, but programs should + ensure portability by doing their own checks for invalid team handles and for + \LibConstRef{SHMEM\_TEAM\_INVALID}. +} + +\end{apidefinition} diff --git a/content/shmem_collective_test.tex b/content/shmem_collective_test.tex new file mode 100644 index 00000000..8737afaf --- /dev/null +++ b/content/shmem_collective_test.tex @@ -0,0 +1,35 @@ +\apisummary{ + The routine outputs the status of the collective operation identified by the request. +} + +\begin{apidefinition} + +\begin{Csynopsis} +int @\FuncDecl{shmem\_collective\_test}@(shmem_req_h request, int *flag); +\end{Csynopsis} + +\begin{apiarguments} + + \apiargument{IN}{request}{Request representing a outstanding collective} + \apiargument{IN}{flag}{Variable indicating the status of the request} + +\end{apiarguments} + +\apidescription{ + A call to \FUNC{shmem\_collective\_test} returns immediately. If the + collective operation identified by the request is completed, it returns + true. The request object is deallocated. If the collective operation is not + completed, it returns false. + + In a multithreaded environment, the collective and the + \FUNC{shmem\_collective\_test} can be + called by different threads. It is the responsibility of the \openshmem user + to ensure that the \FUNC{shmem\_collective\_test} operation is called after the + collective operation. + } + +\apireturnvalues{ + On success returns zero, otherwise returns a negative integer. + } + +\end{apidefinition} diff --git a/main_spec.tex b/main_spec.tex index 19b7200f..df5d226e 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -383,6 +383,16 @@ \subsubsection{\textbf{SHMEM\_COLLECT, SHMEM\_FCOLLECT}}\label{subsec:shmem_coll \subsubsection{\textbf{SHMEM\_REDUCTIONS}}\label{subsec:shmem_reductions} \input{content/shmem_reductions.tex} +\subsection{Nonblocking Collective Routines}\label{subsec:nb_coll} +\input{content/nb_collectives_intro.tex} + +\subsubsection{\textbf{SHMEM\_BROADCAST\_NB}}\label{subsec:shmem_broadcast_nb} +\input{content/shmem_broadcast_nb.tex} + +\subsubsection{\textbf{SHMEM\_COLLECTIVE\_TEST}}\label{subsec:shmem_collective_test} +\input{content/shmem_collective_test.tex} + + From bb13dd6b71533fec7ab5289d789c7f1cb69e09b2 Mon Sep 17 00:00:00 2001 From: Manjunath Gorentla Venkata Date: Mon, 26 Oct 2020 14:02:35 -0700 Subject: [PATCH 02/27] Update: Addressing feedback from WG - Oct 26 --- content/nb_collectives_intro.tex | 28 ++++++++++++++-------------- content/shmem_broadcast_nb.tex | 15 +++++++++++---- content/shmem_collective_test.tex | 13 ++++++------- main_spec.tex | 1 + 4 files changed, 32 insertions(+), 25 deletions(-) diff --git a/content/nb_collectives_intro.tex b/content/nb_collectives_intro.tex index 3430024b..454ec282 100644 --- a/content/nb_collectives_intro.tex +++ b/content/nb_collectives_intro.tex @@ -1,29 +1,29 @@ -An OpenSHMEM nonblocking collective operation, like blocking collective +An \openshmem nonblocking collective operation, like blocking collective operation, is a group communication operation among the participants of the team. All participants of the team are required to call the collective operation. \begin{enumerate} -\item Invocation semantics: The non-blocking collective routine initializes the -buffers, operation type, reduction type, and posts the collective operation. All -participants of the team should call this routine. The routine returns -immediately after posting the operation. +\item Invocation semantics: Upon invocation of a collective routine interface, +the operation is posted and returned immediately. All participants of the Team +should call this routine. -\item Collective Types: Currently, only the nonblocking alltoall, broadcast, and reduction collective -operations are supported. The reduction operations supported are defined in the -Table \ref{reducetypes}. +\item Collective Types: In the current specification, not all blocking collectives have +their nonblocking variants. The nonblocking variants supported include alltoall, +broadcast, and reduction collectives. The reduction types supported +are defined in the Table \ref{reducetypes}. -\item Completion semantics: Upon invocation, the collective operations are -posted and returns immediately. A user can learn the status of the collective operations -using the \FUNC{shmem\_collective\_test} routine and can be completed using -the \FUNC{shmem\_collective\_wait} routine. +\item Completion semantics: \openshmem programs can learn the status of the collective operations +using the \FUNC{shmem\_req\_test} routine and can be completed using +the \FUNC{shmem\_req\_wait} routine. \item Threads: While using SHMEM\_THREAD\_MULTIPLE, the \openshmem programs are allowed to call multiple collective operations on different threads and the same Team. The collective operations invoked on different threads -are ordered by user-provided tag. When the user does not provide the tag, the -library generates the tag and establishes the order. +are ordered by user-provided tag. The user may choose to not order the +collective operations by using the library constant +\CONST{SHMEM\_COLL\_UNORDERED} instead of specifying the tag. \end{enumerate} diff --git a/content/shmem_broadcast_nb.tex b/content/shmem_broadcast_nb.tex index 635af6f0..a2fd47be 100644 --- a/content/shmem_broadcast_nb.tex +++ b/content/shmem_broadcast_nb.tex @@ -8,7 +8,7 @@ %% C11 \begin{C11synopsis} int @\FuncDecl{shmem\_broadcast\_nb}@(shmem_team_t team, TYPE *dest, const TYPE -*source, size_t nelems, int PE_root,uint32_t tag, shmem_req_h *request); +*source, size_t nelems, int PE_root, uint32_t tag, shmem_req_h *request); \end{C11synopsis} where \TYPE{} is one of the standard \ac{RMA} types specified by Table \ref{stdrmatypes}. @@ -59,9 +59,16 @@ \VAR{PE\_root} must be passed by all \acp{PE} participating in the collective operation. - A call to the nonblocking broadcast routine returns immediately without - necessarily completing the operation. The operation is completed after a - call to \FUNC{shmem\_collective\_test} or \FUNC{shmem\_collective\_wait}. + A call to the nonblocking broadcast routine posts the operation and returns + immediately without necessarily completing the operation. On the successful + post of the operation, an opaque request handle is created and returned. The + operation is completed after a call to \FUNC{shmem\_req\_test} or + \FUNC{shmem\_req\_wait}. When the operation is complete, the request handle + is deallocated and cannot be reused. + + The nonblocking broadcast routine is ordered according to the user-defined tag. + If \CONST{SHMEM\_COLL\_UNORDERED} is used instead of a tag, the collective + operations are not ordered. Like blocking broadcast, before any \ac{PE} calls a broadcast routine, the following conditions must be ensured: diff --git a/content/shmem_collective_test.tex b/content/shmem_collective_test.tex index 8737afaf..657fc38c 100644 --- a/content/shmem_collective_test.tex +++ b/content/shmem_collective_test.tex @@ -5,31 +5,30 @@ \begin{apidefinition} \begin{Csynopsis} -int @\FuncDecl{shmem\_collective\_test}@(shmem_req_h request, int *flag); +int @\FuncDecl{shmem\_req\_test}@(shmem_req_t request); \end{Csynopsis} \begin{apiarguments} \apiargument{IN}{request}{Request representing a outstanding collective} - \apiargument{IN}{flag}{Variable indicating the status of the request} \end{apiarguments} \apidescription{ - A call to \FUNC{shmem\_collective\_test} returns immediately. If the + A call to \FUNC{shmem\_req\_test} returns immediately. If the collective operation identified by the request is completed, it returns - true. The request object is deallocated. If the collective operation is not - completed, it returns false. + true (non-negative integer). The request object is deallocated. If the collective operation is not + completed, it returns zero. In a multithreaded environment, the collective and the - \FUNC{shmem\_collective\_test} can be + \FUNC{shmem\_req\_test} can be called by different threads. It is the responsibility of the \openshmem user to ensure that the \FUNC{shmem\_collective\_test} operation is called after the collective operation. } \apireturnvalues{ - On success returns zero, otherwise returns a negative integer. + On success returns zero or one, otherwise returns a negative integer. } \end{apidefinition} diff --git a/main_spec.tex b/main_spec.tex index df5d226e..07c1fcdc 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -383,6 +383,7 @@ \subsubsection{\textbf{SHMEM\_COLLECT, SHMEM\_FCOLLECT}}\label{subsec:shmem_coll \subsubsection{\textbf{SHMEM\_REDUCTIONS}}\label{subsec:shmem_reductions} \input{content/shmem_reductions.tex} +\newpage \subsection{Nonblocking Collective Routines}\label{subsec:nb_coll} \input{content/nb_collectives_intro.tex} From df261dd96c8d4b3bd4816b7541388632889c396a Mon Sep 17 00:00:00 2001 From: Manjunath Gorentla Venkata Date: Fri, 21 Jan 2022 11:09:54 -0800 Subject: [PATCH 03/27] Adding NB blocking a2a; minor updates --- content/nb_collectives_intro.tex | 10 +-- content/shmem_alltoall_nb.tex | 129 ++++++++++++++++++++++++++++++ content/shmem_broadcast_nb.tex | 8 +- content/shmem_collective_test.tex | 6 +- main_spec.tex | 4 + 5 files changed, 146 insertions(+), 11 deletions(-) create mode 100644 content/shmem_alltoall_nb.tex diff --git a/content/nb_collectives_intro.tex b/content/nb_collectives_intro.tex index 454ec282..f99525c8 100644 --- a/content/nb_collectives_intro.tex +++ b/content/nb_collectives_intro.tex @@ -9,10 +9,10 @@ the operation is posted and returned immediately. All participants of the Team should call this routine. -\item Collective Types: In the current specification, not all blocking collectives have -their nonblocking variants. The nonblocking variants supported include alltoall, -broadcast, and reduction collectives. The reduction types supported -are defined in the Table \ref{reducetypes}. +\item Collective Types: The nonblocking variants supported include alltoall, +broadcast, and reduction collectives. Other collective operations such as +collect, barrier, alltoalls, and sync will not have nonblocking variants. The +reduction types supported are defined in Table \ref{teamreducetypes}. \item Completion semantics: \openshmem programs can learn the status of the collective operations using the \FUNC{shmem\_req\_test} routine and can be completed using @@ -21,7 +21,7 @@ \item Threads: While using SHMEM\_THREAD\_MULTIPLE, the \openshmem programs are allowed to call multiple collective operations on different threads and the same Team. The collective operations invoked on different threads -are ordered by user-provided tag. The user may choose to not order the +are ordered by a user-provided tag. The user may choose to not order the collective operations by using the library constant \CONST{SHMEM\_COLL\_UNORDERED} instead of specifying the tag. diff --git a/content/shmem_alltoall_nb.tex b/content/shmem_alltoall_nb.tex new file mode 100644 index 00000000..4a566b1e --- /dev/null +++ b/content/shmem_alltoall_nb.tex @@ -0,0 +1,129 @@ +\apisummary{ + Exchanges a fixed amount of contiguous data blocks between all pairs + of \acp{PE} participating in the collective routine. +} + +\begin{apidefinition} + +%% C11 +\begin{C11synopsis} +int @\FuncDecl{shmem\_alltoall\_nb}@(shmem_team_t team, TYPE *dest, const TYPE +*source, size_t nelems, uint32_t tag, shmem_req_h *request); +\end{C11synopsis} +where \TYPE{} is one of the standard \ac{RMA} types specified by Table \ref{stdrmatypes}. + +\begin{Csynopsis} +\end{Csynopsis} +\begin{CsynopsisCol} +int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_alltoall\_nb}@(shmem_team_t team, +TYPE *dest, const TYPE *source, size_t nelems, uint32_t tag, shmem_req_h *request); +\end{CsynopsisCol} +where \TYPE{} is one of the standard \ac{RMA} types and has a corresponding \TYPENAME{} specified by Table \ref{stdrmatypes}. + +\begin{CsynopsisCol} +int @\FuncDecl{shmem\_alltoallmem\_nb}@(shmem_team_t team, void *dest, const +void *source, size_t nelems, uint32_t tag, shmem_req_h *request); +\end{CsynopsisCol} + +\begin{apiarguments} + +\apiargument{IN}{team}{A valid \openshmem team handle to a team.}% + +\apiargument{OUT}{dest}{Symmetric address of a data object large enough to receive + the combined total of \VAR{nelems} elements from each \ac{PE} in the + active set. + The type of \dest{} should match that implied in the SYNOPSIS section.} +\apiargument{IN}{source}{Symmetric address of a data object that contains \VAR{nelems} + elements of data for each \ac{PE} in the active set, ordered according to + destination \ac{PE}. + The type of \source{} should match that implied in the SYNOPSIS section.} +\apiargument{IN}{nelems}{ + The number of elements to exchange for each \ac{PE}. + For \FUNC{shmem\_alltoallmem\_nb}, elements are bytes; + for \FUNC{shmem\_alltoall\{32,64\}\_nb}, elements are 4 or 8 bytes, + respectively. +} +\apiargument{IN}{tag}{A user defined tag to order the collective operation; +SHMEM\_COLL\_UNORDERED can be provided if no order is required.} +\apiargument{OUT}{request}{An opaque request handle identifying the collective +operation.} + +\end{apiarguments} + +\apidescription{ + The \FUNC{shmem\_alltoall\_nb} routines are collective routines. All + \acp{PE} in the provided team must participate in the collective. If + \VAR{team} compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID} or is + otherwise invalid, the behavior is undefined. + + {\bf Invocation and completion}: A call to the nonblocking alltoall routine posts the operation and returns + immediately without necessarily completing the operation. On the successful + post of the operation, an opaque request handle is created and returned. The + operation is completed after a call to \FUNC{shmem\_req\_test} or + \FUNC{shmem\_req\_wait}. When the operation is complete, the request handle + is deallocated and cannot be reused. + + Though nonblocking alltoall varies in invocation and completion semantics + when compared to blocking alltoall, the data exchange semantics are similar. + + {\bf Data exchange semantics}: + In this routine, each \ac{PE} + participating in the operation exchanges \VAR{nelems} data elements + with all other \acp{PE} participating in the operation. + The size of a data element is: + \begin{itemize} + \item 32 bits for \FUNC{shmem\_alltoall32} + \item 64 bits for \FUNC{shmem\_alltoall64} + \item 8 bits for \FUNC{shmem\_alltoallmem} + \item \FUNC{sizeof}(\TYPE{}) for alltoall routines taking typed \VAR{source} and \VAR{dest} + \end{itemize} + + The data being sent and received are + stored in a contiguous symmetric data object. The total size of each \ac{PE}'s + \VAR{source} object and \VAR{dest} object is \VAR{nelems} times the size of + an element + times \VAR{N}, where \VAR{N} equals the number of \acp{PE} participating + in the operation. + The \VAR{source} object contains \VAR{N} blocks of data + (where the size of each block is defined by \VAR{nelems}) and each block of data + is sent to a different \ac{PE}. + + The same \dest{} and \source{} + arrays, and same value for nelems + must be passed by all \acp{PE} that participate in the collective. + + Given a \ac{PE} \VAR{i} that is the \kth \ac{PE} + participating in the operation and a \ac{PE} + \VAR{j} that is the \lth \ac{PE} + participating in the operation, + + \ac{PE} \VAR{i} sends the \lth block of its \VAR{source} object to + the \kth block of + the \VAR{dest} object of \ac{PE} \VAR{j}. + + + Like data exchange semantics, the entry and completion + criteria of blocking and nonblocking alltoall is similar. + + {\bf Entry criteria}: Before any \ac{PE} calls a \FUNC{shmem\_alltoall\_nb} routine, + the following condition must be ensured: + \begin{itemize} + \item The \VAR{dest} data object on all \acp{PE} in the team is + ready to accept the \FUNC{shmem\_alltoall\_nb} data. + \end{itemize} + Otherwise, the behavior is undefined. + + {\bf Completion criteria}: Upon completion, the following is true for + the local PE: + \begin{itemize} + \item Its \VAR{dest} symmetric data object is completely updated and + the data has been copied out of the \VAR{source} data object. + \end{itemize} +} + +\apireturnvalues{ + Zero on successful local completion. Nonzero otherwise. +} + +\end{apidefinition} + diff --git a/content/shmem_broadcast_nb.tex b/content/shmem_broadcast_nb.tex index a2fd47be..62d7688d 100644 --- a/content/shmem_broadcast_nb.tex +++ b/content/shmem_broadcast_nb.tex @@ -43,8 +43,10 @@ } \apiargument{IN}{PE\_root}{Zero-based ordinal of the \ac{PE}, with respect to the team, from which the data is copied.} -\apiargument{IN}{tag}{A user defined tag to order the collective operation.} -\apiargument{OUT}{request}{An opaque request handle identifying the collective operation} +\apiargument{IN}{tag}{A user defined tag to order the collective operation; +SHMEM\_COLL\_UNORDERED can be provided if no order is required.} +\apiargument{OUT}{request}{An opaque request handle identifying the collective +operation.} \end{apiarguments} @@ -97,7 +99,7 @@ \apireturnvalues{ - Zero on successfull posting of the collective + Zero on successful posting of the collective operation; otherwise, nonzero. } diff --git a/content/shmem_collective_test.tex b/content/shmem_collective_test.tex index 657fc38c..ba24803e 100644 --- a/content/shmem_collective_test.tex +++ b/content/shmem_collective_test.tex @@ -5,7 +5,7 @@ \begin{apidefinition} \begin{Csynopsis} -int @\FuncDecl{shmem\_req\_test}@(shmem_req_t request); +int @\FuncDecl{shmem\_req\_test}@(shmem_req_h request); \end{Csynopsis} \begin{apiarguments} @@ -17,8 +17,8 @@ \apidescription{ A call to \FUNC{shmem\_req\_test} returns immediately. If the collective operation identified by the request is completed, it returns - true (non-negative integer). The request object is deallocated. If the collective operation is not - completed, it returns zero. + zero. The request object is deallocated. If the collective operation is not + completed, it returns an integer (non-negative integer). In a multithreaded environment, the collective and the \FUNC{shmem\_req\_test} can be diff --git a/main_spec.tex b/main_spec.tex index 07c1fcdc..b8b7e756 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -390,6 +390,10 @@ \subsection{Nonblocking Collective Routines}\label{subsec:nb_coll} \subsubsection{\textbf{SHMEM\_BROADCAST\_NB}}\label{subsec:shmem_broadcast_nb} \input{content/shmem_broadcast_nb.tex} +\subsubsection{\textbf{SHMEM\_ALLTOALL\_NB}}\label{subsec:shmem_alltoall_nb} +\input{content/shmem_alltoall_nb.tex} + + \subsubsection{\textbf{SHMEM\_COLLECTIVE\_TEST}}\label{subsec:shmem_collective_test} \input{content/shmem_collective_test.tex} From 48a012c5aaf04e3ed0a5d21d03cb5b1cc854b266 Mon Sep 17 00:00:00 2001 From: Manjunath Gorentla Venkata Date: Thu, 19 Jan 2023 10:28:18 -0800 Subject: [PATCH 04/27] Adding collective wait --- content/shmem_collective_test.tex | 2 +- content/shmem_collective_wait.tex | 34 +++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 content/shmem_collective_wait.tex diff --git a/content/shmem_collective_test.tex b/content/shmem_collective_test.tex index ba24803e..93e37060 100644 --- a/content/shmem_collective_test.tex +++ b/content/shmem_collective_test.tex @@ -28,7 +28,7 @@ } \apireturnvalues{ - On success returns zero or one, otherwise returns a negative integer. + On success returns zero, otherwise returns a negative integer. } \end{apidefinition} diff --git a/content/shmem_collective_wait.tex b/content/shmem_collective_wait.tex new file mode 100644 index 00000000..93e37060 --- /dev/null +++ b/content/shmem_collective_wait.tex @@ -0,0 +1,34 @@ +\apisummary{ + The routine outputs the status of the collective operation identified by the request. +} + +\begin{apidefinition} + +\begin{Csynopsis} +int @\FuncDecl{shmem\_req\_test}@(shmem_req_h request); +\end{Csynopsis} + +\begin{apiarguments} + + \apiargument{IN}{request}{Request representing a outstanding collective} + +\end{apiarguments} + +\apidescription{ + A call to \FUNC{shmem\_req\_test} returns immediately. If the + collective operation identified by the request is completed, it returns + zero. The request object is deallocated. If the collective operation is not + completed, it returns an integer (non-negative integer). + + In a multithreaded environment, the collective and the + \FUNC{shmem\_req\_test} can be + called by different threads. It is the responsibility of the \openshmem user + to ensure that the \FUNC{shmem\_collective\_test} operation is called after the + collective operation. + } + +\apireturnvalues{ + On success returns zero, otherwise returns a negative integer. + } + +\end{apidefinition} From d3a7ac9e4bdad4d23b0b7cd8d058807ffd413beb Mon Sep 17 00:00:00 2001 From: Manjunath Gorentla Venkata Date: Mon, 13 Mar 2023 10:43:13 -0700 Subject: [PATCH 05/27] Adding shmem barrier all nonblocking --- content/shmem_barrier_all_nb.tex | 46 ++++++++++++++++++++++++++++++++ main_spec.tex | 3 +++ 2 files changed, 49 insertions(+) create mode 100644 content/shmem_barrier_all_nb.tex diff --git a/content/shmem_barrier_all_nb.tex b/content/shmem_barrier_all_nb.tex new file mode 100644 index 00000000..9490dc62 --- /dev/null +++ b/content/shmem_barrier_all_nb.tex @@ -0,0 +1,46 @@ +\apisummary{ + Registers the arrival of a \ac{PE} at a barrier and returns immediately. It completes when all \acp{PE} + arrive at the barrier and all local updates and remote memory updates on the default context are completed. + } + +\begin{apidefinition} + +\begin{Csynopsis} +void @\FuncDecl{shmem\_barrier\_all\_nb}@(void); +\end{Csynopsis} + +\begin{apiarguments} + + \apiargument{None.}{}{} + +\end{apiarguments} + +\apidescription{ + Similar to the \FUNC{shmem\_barrier\_all} routine, the nonblocking \FUNC{shmem\_barrier\_all\_nb} + is a mechanism for synchronizing all \acp{PE} in the world team at + once. This routine completes when all \acp{PE} have called + \FUNC{shmem\_barrier\_all\_nb}. + + A call to the nonblocking barrier routine posts the operation and returns + immediately without necessarily completing the operation. Upon successful posting of the operation, + an opaque request handle is created and returned. The + operation is completed after a call to \FUNC{shmem\_req\_test} or + \FUNC{shmem\_req\_wait}. When the operation is complete, the request handle + is deallocated and cannot be reused. + + Prior to completion, \FUNC{shmem\_barrier\_all\_nb} + ensures completion of all previously issued memory stores and remote memory + updates issued on the default context via \openshmem \acp{AMO} and + \ac{RMA} routine calls such + as \FUNC{shmem\_int\_add}, \FUNC{shmem\_put32}, + \FUNC{shmem\_put\_nbi}, and \FUNC{shmem\_get\_nbi}. +} + +\apireturnvalues{ + None. +} + +\apinotes{ +} + +\end{apidefinition} diff --git a/main_spec.tex b/main_spec.tex index b8b7e756..a11d19e0 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -387,6 +387,9 @@ \subsubsection{\textbf{SHMEM\_REDUCTIONS}}\label{subsec:shmem_reductions} \subsection{Nonblocking Collective Routines}\label{subsec:nb_coll} \input{content/nb_collectives_intro.tex} +\subsubsection{\textbf{SHMEM\_BARRIER\_ALL\_NB}}\label{subsec:shmem_barrier_all_nb} +\input{content/shmem_barrier_all_nb.tex} + \subsubsection{\textbf{SHMEM\_BROADCAST\_NB}}\label{subsec:shmem_broadcast_nb} \input{content/shmem_broadcast_nb.tex} From a3c8b15b4d6928622e5bef9dfca4aabc105f0912 Mon Sep 17 00:00:00 2001 From: Manjunath Gorentla Venkata Date: Fri, 31 Mar 2023 08:49:53 -0700 Subject: [PATCH 06/27] Fixing description of SHMEM Collective Wait --- content/shmem_collective_wait.tex | 31 +++++++++++++++++++------------ main_spec.tex | 4 +++- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/content/shmem_collective_wait.tex b/content/shmem_collective_wait.tex index 93e37060..413e432a 100644 --- a/content/shmem_collective_wait.tex +++ b/content/shmem_collective_wait.tex @@ -1,11 +1,12 @@ \apisummary{ - The routine outputs the status of the collective operation identified by the request. + The routine waits until a collective operation identified by a request + object completes. } \begin{apidefinition} \begin{Csynopsis} -int @\FuncDecl{shmem\_req\_test}@(shmem_req_h request); +int @\FuncDecl{shmem\_collective\_wait}@(shmem_req_h request); \end{Csynopsis} \begin{apiarguments} @@ -15,16 +16,22 @@ \end{apiarguments} \apidescription{ - A call to \FUNC{shmem\_req\_test} returns immediately. If the - collective operation identified by the request is completed, it returns - zero. The request object is deallocated. If the collective operation is not - completed, it returns an integer (non-negative integer). - - In a multithreaded environment, the collective and the - \FUNC{shmem\_req\_test} can be - called by different threads. It is the responsibility of the \openshmem user - to ensure that the \FUNC{shmem\_collective\_test} operation is called after the - collective operation. + +The \FUNC{shmem\_collective\_wait} function is a blocking operation. It is used to +determine whether a collective operation identified by the request object has been +completed. If the collective operation is completed, +\FUNC{shmem\_collective\_wait} +returns zero and deallocates the request object. If the collective operation has +not been completed, \FUNC{shmem\_collective\_wait} blocks until collective +operation completes and then returns zero. + + +In a multithreaded environment, \FUNC{shmem\_collective\_wait} can be called by different +threads but on different request objects. It is the responsibility of the +OpenSHMEM user to ensure that proper synchronization is used to prevent race +conditions or deadlock. Specifically, the \FUNC{shmem\_collective\_wait} operation should +be called after the collective operation to ensure that the request object is +not deallocated prematurely. } \apireturnvalues{ diff --git a/main_spec.tex b/main_spec.tex index a11d19e0..a0ad0261 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -396,10 +396,12 @@ \subsubsection{\textbf{SHMEM\_BROADCAST\_NB}}\label{subsec:shmem_broadcast_nb} \subsubsection{\textbf{SHMEM\_ALLTOALL\_NB}}\label{subsec:shmem_alltoall_nb} \input{content/shmem_alltoall_nb.tex} - \subsubsection{\textbf{SHMEM\_COLLECTIVE\_TEST}}\label{subsec:shmem_collective_test} \input{content/shmem_collective_test.tex} +\subsubsection{\textbf{SHMEM\_COLLECTIVE\_WAIT}}\label{subsec:shmem_collective_wait} +\input{content/shmem_collective_wait.tex} + From 1becb005f7cbac4f5e75eee82a3b079362bbfe82 Mon Sep 17 00:00:00 2001 From: Manjunath Gorentla Venkata Date: Fri, 31 Mar 2023 08:54:11 -0700 Subject: [PATCH 07/27] Change collective wait to shmem_req_wait --- content/shmem_collective_wait.tex | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/content/shmem_collective_wait.tex b/content/shmem_collective_wait.tex index 413e432a..563e3016 100644 --- a/content/shmem_collective_wait.tex +++ b/content/shmem_collective_wait.tex @@ -6,7 +6,7 @@ \begin{apidefinition} \begin{Csynopsis} -int @\FuncDecl{shmem\_collective\_wait}@(shmem_req_h request); +int @\FuncDecl{shmem\_req\_wait}@(shmem_req_h request); \end{Csynopsis} \begin{apiarguments} @@ -17,19 +17,19 @@ \apidescription{ -The \FUNC{shmem\_collective\_wait} function is a blocking operation. It is used to +The \FUNC{shmem\_req\_wait} function is a blocking operation. It is used to determine whether a collective operation identified by the request object has been completed. If the collective operation is completed, -\FUNC{shmem\_collective\_wait} +\FUNC{shmem\_req\_wait} returns zero and deallocates the request object. If the collective operation has -not been completed, \FUNC{shmem\_collective\_wait} blocks until collective +not been completed, \FUNC{shmem\_req\_wait} blocks until collective operation completes and then returns zero. -In a multithreaded environment, \FUNC{shmem\_collective\_wait} can be called by different +In a multithreaded environment, \FUNC{shmem\_req\_wait} can be called by different threads but on different request objects. It is the responsibility of the -OpenSHMEM user to ensure that proper synchronization is used to prevent race -conditions or deadlock. Specifically, the \FUNC{shmem\_collective\_wait} operation should +\openshmem user to ensure that proper synchronization is used to prevent race +conditions or deadlock. Specifically, the \FUNC{shmem\_req\_wait} operation should be called after the collective operation to ensure that the request object is not deallocated prematurely. } From 1cbd79a3816ab5c12f8c24812f1b539bcbf89e4a Mon Sep 17 00:00:00 2001 From: Manjunath Gorentla Venkata Date: Fri, 31 Mar 2023 09:05:28 -0700 Subject: [PATCH 08/27] Fix description of request param --- content/shmem_collective_test.tex | 2 +- content/shmem_collective_wait.tex | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/content/shmem_collective_test.tex b/content/shmem_collective_test.tex index 93e37060..2777d5c2 100644 --- a/content/shmem_collective_test.tex +++ b/content/shmem_collective_test.tex @@ -10,7 +10,7 @@ \begin{apiarguments} - \apiargument{IN}{request}{Request representing a outstanding collective} + \apiargument{IN}{request}{Request handle} \end{apiarguments} diff --git a/content/shmem_collective_wait.tex b/content/shmem_collective_wait.tex index 563e3016..beb4a34f 100644 --- a/content/shmem_collective_wait.tex +++ b/content/shmem_collective_wait.tex @@ -11,7 +11,7 @@ \begin{apiarguments} - \apiargument{IN}{request}{Request representing a outstanding collective} + \apiargument{IN}{request}{Request handle} \end{apiarguments} From f98cdf7d92013f566268e8197418a7ec5b2c2857 Mon Sep 17 00:00:00 2001 From: Manjunath Gorentla Venkata Date: Thu, 14 Sep 2023 09:10:03 -0700 Subject: [PATCH 09/27] Add req object to NB barrier arguments --- content/nb_collectives_intro.tex | 2 +- content/shmem_barrier_all_nb.tex | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/content/nb_collectives_intro.tex b/content/nb_collectives_intro.tex index f99525c8..7b27a4d2 100644 --- a/content/nb_collectives_intro.tex +++ b/content/nb_collectives_intro.tex @@ -9,7 +9,7 @@ the operation is posted and returned immediately. All participants of the Team should call this routine. -\item Collective Types: The nonblocking variants supported include alltoall, +\item Collective Types: The nonblocking variants supported include barrier, alltoall, broadcast, and reduction collectives. Other collective operations such as collect, barrier, alltoalls, and sync will not have nonblocking variants. The reduction types supported are defined in Table \ref{teamreducetypes}. diff --git a/content/shmem_barrier_all_nb.tex b/content/shmem_barrier_all_nb.tex index 9490dc62..2168ba4d 100644 --- a/content/shmem_barrier_all_nb.tex +++ b/content/shmem_barrier_all_nb.tex @@ -6,12 +6,13 @@ \begin{apidefinition} \begin{Csynopsis} -void @\FuncDecl{shmem\_barrier\_all\_nb}@(void); +void @\FuncDecl{shmem\_barrier\_all\_nb}@(shmem_req_h *request); \end{Csynopsis} \begin{apiarguments} - \apiargument{None.}{}{} + \apiargument{OUT}{request}{An opaque request handle identifying the + collective operation.} \end{apiarguments} @@ -21,7 +22,7 @@ once. This routine completes when all \acp{PE} have called \FUNC{shmem\_barrier\_all\_nb}. - A call to the nonblocking barrier routine posts the operation and returns + A call to the nonblocking barrier routine posts the operation and returns immediately without necessarily completing the operation. Upon successful posting of the operation, an opaque request handle is created and returned. The operation is completed after a call to \FUNC{shmem\_req\_test} or From cc72331d9315f957aeb2b1661c98fc44358e7cdd Mon Sep 17 00:00:00 2001 From: Manjunath Gorentla Venkata Date: Thu, 28 Sep 2023 13:50:05 -0700 Subject: [PATCH 10/27] Remove tagged collectives --- content/nb_collectives_intro.tex | 7 ++----- content/shmem_alltoall_nb.tex | 8 +++----- content/shmem_broadcast_nb.tex | 13 +++---------- 3 files changed, 8 insertions(+), 20 deletions(-) diff --git a/content/nb_collectives_intro.tex b/content/nb_collectives_intro.tex index 7b27a4d2..03d47b35 100644 --- a/content/nb_collectives_intro.tex +++ b/content/nb_collectives_intro.tex @@ -19,11 +19,8 @@ the \FUNC{shmem\_req\_wait} routine. \item Threads: While using SHMEM\_THREAD\_MULTIPLE, the \openshmem -programs are allowed to call multiple collective operations on different threads -and the same Team. The collective operations invoked on different threads -are ordered by a user-provided tag. The user may choose to not order the -collective operations by using the library constant -\CONST{SHMEM\_COLL\_UNORDERED} instead of specifying the tag. +programs are not allowed to call multiple collective operations on different threads +and the same Team. \end{enumerate} diff --git a/content/shmem_alltoall_nb.tex b/content/shmem_alltoall_nb.tex index 4a566b1e..7a556599 100644 --- a/content/shmem_alltoall_nb.tex +++ b/content/shmem_alltoall_nb.tex @@ -8,7 +8,7 @@ %% C11 \begin{C11synopsis} int @\FuncDecl{shmem\_alltoall\_nb}@(shmem_team_t team, TYPE *dest, const TYPE -*source, size_t nelems, uint32_t tag, shmem_req_h *request); +*source, size_t nelems, shmem_req_h *request); \end{C11synopsis} where \TYPE{} is one of the standard \ac{RMA} types specified by Table \ref{stdrmatypes}. @@ -16,13 +16,13 @@ \end{Csynopsis} \begin{CsynopsisCol} int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_alltoall\_nb}@(shmem_team_t team, -TYPE *dest, const TYPE *source, size_t nelems, uint32_t tag, shmem_req_h *request); +TYPE *dest, const TYPE *source, size_t nelems, shmem_req_h *request); \end{CsynopsisCol} where \TYPE{} is one of the standard \ac{RMA} types and has a corresponding \TYPENAME{} specified by Table \ref{stdrmatypes}. \begin{CsynopsisCol} int @\FuncDecl{shmem\_alltoallmem\_nb}@(shmem_team_t team, void *dest, const -void *source, size_t nelems, uint32_t tag, shmem_req_h *request); +void *source, size_t nelems, shmem_req_h *request); \end{CsynopsisCol} \begin{apiarguments} @@ -43,8 +43,6 @@ for \FUNC{shmem\_alltoall\{32,64\}\_nb}, elements are 4 or 8 bytes, respectively. } -\apiargument{IN}{tag}{A user defined tag to order the collective operation; -SHMEM\_COLL\_UNORDERED can be provided if no order is required.} \apiargument{OUT}{request}{An opaque request handle identifying the collective operation.} diff --git a/content/shmem_broadcast_nb.tex b/content/shmem_broadcast_nb.tex index 62d7688d..15d88a32 100644 --- a/content/shmem_broadcast_nb.tex +++ b/content/shmem_broadcast_nb.tex @@ -8,7 +8,7 @@ %% C11 \begin{C11synopsis} int @\FuncDecl{shmem\_broadcast\_nb}@(shmem_team_t team, TYPE *dest, const TYPE -*source, size_t nelems, int PE_root, uint32_t tag, shmem_req_h *request); +*source, size_t nelems, int PE_root, shmem_req_h *request); \end{C11synopsis} where \TYPE{} is one of the standard \ac{RMA} types specified by Table \ref{stdrmatypes}. @@ -17,14 +17,13 @@ \end{Csynopsis} \begin{CsynopsisCol} int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_broadcast\_nb}@(shmem_team_t team, TYPE -*dest, const TYPE *source, size_t nelems, int PE_root, uint32_t tag, -shmem_req_h *request); +*dest, const TYPE *source, size_t nelems, int PE_root, shmem_req_h *request); \end{CsynopsisCol} where \TYPE{} is one of the standard \ac{RMA} types and has a corresponding \TYPENAME{} specified by Table \ref{stdrmatypes}. \begin{CsynopsisCol} int @\FuncDecl{shmem\_broadcastmem\_nb}@(shmem_team_t team, void *dest, const void -*source, size_t nelems, int PE_root, uint32_t tag, shmem_req_h *request); +*source, size_t nelems, int PE_root, shmem_req_h *request); \end{CsynopsisCol} \begin{apiarguments} @@ -43,8 +42,6 @@ } \apiargument{IN}{PE\_root}{Zero-based ordinal of the \ac{PE}, with respect to the team, from which the data is copied.} -\apiargument{IN}{tag}{A user defined tag to order the collective operation; -SHMEM\_COLL\_UNORDERED can be provided if no order is required.} \apiargument{OUT}{request}{An opaque request handle identifying the collective operation.} @@ -68,10 +65,6 @@ \FUNC{shmem\_req\_wait}. When the operation is complete, the request handle is deallocated and cannot be reused. - The nonblocking broadcast routine is ordered according to the user-defined tag. - If \CONST{SHMEM\_COLL\_UNORDERED} is used instead of a tag, the collective - operations are not ordered. - Like blocking broadcast, before any \ac{PE} calls a broadcast routine, the following conditions must be ensured: \begin{itemize} From a972738ad16d05f7de5de486fd41573296e89659 Mon Sep 17 00:00:00 2001 From: Manjunath Gorentla Venkata Date: Fri, 29 Sep 2023 07:22:12 -0700 Subject: [PATCH 11/27] Add missing 32 and 64 bit interfaces; minor edits --- content/nb_collectives_intro.tex | 5 ++--- content/shmem_alltoall_nb.tex | 6 ++++++ content/shmem_broadcast_nb.tex | 6 ++++++ main_spec.tex | 4 ++-- 4 files changed, 16 insertions(+), 5 deletions(-) diff --git a/content/nb_collectives_intro.tex b/content/nb_collectives_intro.tex index 03d47b35..91ab3c5a 100644 --- a/content/nb_collectives_intro.tex +++ b/content/nb_collectives_intro.tex @@ -10,9 +10,8 @@ should call this routine. \item Collective Types: The nonblocking variants supported include barrier, alltoall, -broadcast, and reduction collectives. Other collective operations such as -collect, barrier, alltoalls, and sync will not have nonblocking variants. The -reduction types supported are defined in Table \ref{teamreducetypes}. +and broadcast collectives. Other collective operations such as +reductions, collect, barrier, alltoalls, and sync will not have nonblocking variants. \item Completion semantics: \openshmem programs can learn the status of the collective operations using the \FUNC{shmem\_req\_test} routine and can be completed using diff --git a/content/shmem_alltoall_nb.tex b/content/shmem_alltoall_nb.tex index 7a556599..d08b7262 100644 --- a/content/shmem_alltoall_nb.tex +++ b/content/shmem_alltoall_nb.tex @@ -21,6 +21,12 @@ where \TYPE{} is one of the standard \ac{RMA} types and has a corresponding \TYPENAME{} specified by Table \ref{stdrmatypes}. \begin{CsynopsisCol} +int @\FuncDecl{shmem\_alltoall32\_nb}@(shmem_team_t team, void *dest, const +void *source, size_t nelems, shmem_req_h *request); + +int @\FuncDecl{shmem\_alltoall64\_nb}@(shmem_team_t team, void *dest, const +void *source, size_t nelems, shmem_req_h *request); + int @\FuncDecl{shmem\_alltoallmem\_nb}@(shmem_team_t team, void *dest, const void *source, size_t nelems, shmem_req_h *request); \end{CsynopsisCol} diff --git a/content/shmem_broadcast_nb.tex b/content/shmem_broadcast_nb.tex index 15d88a32..4ab0ad35 100644 --- a/content/shmem_broadcast_nb.tex +++ b/content/shmem_broadcast_nb.tex @@ -22,6 +22,12 @@ where \TYPE{} is one of the standard \ac{RMA} types and has a corresponding \TYPENAME{} specified by Table \ref{stdrmatypes}. \begin{CsynopsisCol} +int @\FuncDecl{shmem\_broadcast32\_nb}@(shmem_team_t team, void *dest, const void +*source, size_t nelems, int PE_root, shmem_req_h *request); + +int @\FuncDecl{shmem\_broadcast64\_nb}@(shmem_team_t team, void *dest, const void +*source, size_t nelems, int PE_root, shmem_req_h *request); + int @\FuncDecl{shmem\_broadcastmem\_nb}@(shmem_team_t team, void *dest, const void *source, size_t nelems, int PE_root, shmem_req_h *request); \end{CsynopsisCol} diff --git a/main_spec.tex b/main_spec.tex index a0ad0261..8b0657c9 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -396,10 +396,10 @@ \subsubsection{\textbf{SHMEM\_BROADCAST\_NB}}\label{subsec:shmem_broadcast_nb} \subsubsection{\textbf{SHMEM\_ALLTOALL\_NB}}\label{subsec:shmem_alltoall_nb} \input{content/shmem_alltoall_nb.tex} -\subsubsection{\textbf{SHMEM\_COLLECTIVE\_TEST}}\label{subsec:shmem_collective_test} +\subsubsection{\textbf{SHMEM\_REQ\_TEST}}\label{subsec:shmem_collective_test} \input{content/shmem_collective_test.tex} -\subsubsection{\textbf{SHMEM\_COLLECTIVE\_WAIT}}\label{subsec:shmem_collective_wait} +\subsubsection{\textbf{SHMEM\_REQ\_WAIT}}\label{subsec:shmem_collective_wait} \input{content/shmem_collective_wait.tex} From db8528c1cd0680a9fa2f43db57d3fee5e080ede6 Mon Sep 17 00:00:00 2001 From: Manjunath Gorentla Venkata Date: Fri, 29 Sep 2023 12:18:52 -0700 Subject: [PATCH 12/27] Update content/nb_collectives_intro.tex Co-authored-by: James Dinan --- content/nb_collectives_intro.tex | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/content/nb_collectives_intro.tex b/content/nb_collectives_intro.tex index 91ab3c5a..ffd9f3e4 100644 --- a/content/nb_collectives_intro.tex +++ b/content/nb_collectives_intro.tex @@ -5,9 +5,9 @@ \begin{enumerate} -\item Invocation semantics: Upon invocation of a collective routine interface, -the operation is posted and returned immediately. All participants of the Team -should call this routine. +\item Invocation semantics: Upon invocation of a nonblocking collective routine, +the operation is initiated and the routine returns without ensuring completion. All participants of the Team +must call this routine with identical arguments. \item Collective Types: The nonblocking variants supported include barrier, alltoall, and broadcast collectives. Other collective operations such as From 558e0d36154fee888587a680deb758b11b282ef5 Mon Sep 17 00:00:00 2001 From: Manjunath Gorentla Venkata Date: Fri, 29 Sep 2023 12:19:19 -0700 Subject: [PATCH 13/27] Update content/shmem_alltoall_nb.tex Co-authored-by: James Dinan --- content/shmem_alltoall_nb.tex | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/content/shmem_alltoall_nb.tex b/content/shmem_alltoall_nb.tex index d08b7262..3c80d49f 100644 --- a/content/shmem_alltoall_nb.tex +++ b/content/shmem_alltoall_nb.tex @@ -60,9 +60,9 @@ \VAR{team} compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID} or is otherwise invalid, the behavior is undefined. - {\bf Invocation and completion}: A call to the nonblocking alltoall routine posts the operation and returns - immediately without necessarily completing the operation. On the successful - post of the operation, an opaque request handle is created and returned. The + {\bf Invocation and completion}: A call to the nonblocking alltoall routine initiates the operation and returns + immediately without necessarily completing the operation. On success, + an opaque request handle is created and returned. The operation is completed after a call to \FUNC{shmem\_req\_test} or \FUNC{shmem\_req\_wait}. When the operation is complete, the request handle is deallocated and cannot be reused. From ef3ecf1760aa3345102f98c808528f0dfcbcea58 Mon Sep 17 00:00:00 2001 From: Manjunath Gorentla Venkata Date: Fri, 29 Sep 2023 12:19:41 -0700 Subject: [PATCH 14/27] Update content/shmem_barrier_all_nb.tex Co-authored-by: James Dinan --- content/shmem_barrier_all_nb.tex | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/content/shmem_barrier_all_nb.tex b/content/shmem_barrier_all_nb.tex index 2168ba4d..a21547d4 100644 --- a/content/shmem_barrier_all_nb.tex +++ b/content/shmem_barrier_all_nb.tex @@ -22,8 +22,8 @@ once. This routine completes when all \acp{PE} have called \FUNC{shmem\_barrier\_all\_nb}. - A call to the nonblocking barrier routine posts the operation and returns - immediately without necessarily completing the operation. Upon successful posting of the operation, + A call to the nonblocking barrier routine initiates the operation and returns + immediately without necessarily completing the operation. On success, an opaque request handle is created and returned. The operation is completed after a call to \FUNC{shmem\_req\_test} or \FUNC{shmem\_req\_wait}. When the operation is complete, the request handle From db79fcb7397db8fb1b787b34b2dc92ecc6c6ecd3 Mon Sep 17 00:00:00 2001 From: Manjunath Gorentla Venkata Date: Fri, 29 Sep 2023 12:20:00 -0700 Subject: [PATCH 15/27] Update content/shmem_broadcast_nb.tex Co-authored-by: James Dinan --- content/shmem_broadcast_nb.tex | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/content/shmem_broadcast_nb.tex b/content/shmem_broadcast_nb.tex index 4ab0ad35..21bf4bc3 100644 --- a/content/shmem_broadcast_nb.tex +++ b/content/shmem_broadcast_nb.tex @@ -64,9 +64,9 @@ \VAR{PE\_root} must be passed by all \acp{PE} participating in the collective operation. - A call to the nonblocking broadcast routine posts the operation and returns - immediately without necessarily completing the operation. On the successful - post of the operation, an opaque request handle is created and returned. The + A call to the nonblocking broadcast routine initiates the operation and returns + immediately without necessarily completing the operation. On success, + an opaque request handle is created and returned. The operation is completed after a call to \FUNC{shmem\_req\_test} or \FUNC{shmem\_req\_wait}. When the operation is complete, the request handle is deallocated and cannot be reused. From 7d74314943266716f871214558fd66625f138226 Mon Sep 17 00:00:00 2001 From: Manjunath Gorentla Venkata Date: Fri, 29 Sep 2023 12:20:20 -0700 Subject: [PATCH 16/27] Update content/shmem_broadcast_nb.tex Co-authored-by: James Dinan --- content/shmem_broadcast_nb.tex | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/content/shmem_broadcast_nb.tex b/content/shmem_broadcast_nb.tex index 21bf4bc3..a4eaf3f2 100644 --- a/content/shmem_broadcast_nb.tex +++ b/content/shmem_broadcast_nb.tex @@ -98,8 +98,7 @@ \apireturnvalues{ - Zero on successful posting of the collective - operation; otherwise, nonzero. + Zero on success and nonzero otherwise. } \apinotes{ From 2355bf8d1d34899ae25289516a08a26e310da5aa Mon Sep 17 00:00:00 2001 From: Manjunath Gorentla Venkata Date: Sat, 30 Sep 2023 05:58:33 -0700 Subject: [PATCH 17/27] Remove 32 and 64 bit nb variants --- content/shmem_alltoall_nb.tex | 12 +----------- content/shmem_broadcast_nb.tex | 10 +--------- 2 files changed, 2 insertions(+), 20 deletions(-) diff --git a/content/shmem_alltoall_nb.tex b/content/shmem_alltoall_nb.tex index 3c80d49f..6897ed31 100644 --- a/content/shmem_alltoall_nb.tex +++ b/content/shmem_alltoall_nb.tex @@ -21,12 +21,6 @@ where \TYPE{} is one of the standard \ac{RMA} types and has a corresponding \TYPENAME{} specified by Table \ref{stdrmatypes}. \begin{CsynopsisCol} -int @\FuncDecl{shmem\_alltoall32\_nb}@(shmem_team_t team, void *dest, const -void *source, size_t nelems, shmem_req_h *request); - -int @\FuncDecl{shmem\_alltoall64\_nb}@(shmem_team_t team, void *dest, const -void *source, size_t nelems, shmem_req_h *request); - int @\FuncDecl{shmem\_alltoallmem\_nb}@(shmem_team_t team, void *dest, const void *source, size_t nelems, shmem_req_h *request); \end{CsynopsisCol} @@ -45,9 +39,7 @@ The type of \source{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{nelems}{ The number of elements to exchange for each \ac{PE}. - For \FUNC{shmem\_alltoallmem\_nb}, elements are bytes; - for \FUNC{shmem\_alltoall\{32,64\}\_nb}, elements are 4 or 8 bytes, - respectively. + For \FUNC{shmem\_alltoallmem\_nb} it represents bytes. } \apiargument{OUT}{request}{An opaque request handle identifying the collective operation.} @@ -76,8 +68,6 @@ with all other \acp{PE} participating in the operation. The size of a data element is: \begin{itemize} - \item 32 bits for \FUNC{shmem\_alltoall32} - \item 64 bits for \FUNC{shmem\_alltoall64} \item 8 bits for \FUNC{shmem\_alltoallmem} \item \FUNC{sizeof}(\TYPE{}) for alltoall routines taking typed \VAR{source} and \VAR{dest} \end{itemize} diff --git a/content/shmem_broadcast_nb.tex b/content/shmem_broadcast_nb.tex index a4eaf3f2..4b91f69a 100644 --- a/content/shmem_broadcast_nb.tex +++ b/content/shmem_broadcast_nb.tex @@ -22,12 +22,6 @@ where \TYPE{} is one of the standard \ac{RMA} types and has a corresponding \TYPENAME{} specified by Table \ref{stdrmatypes}. \begin{CsynopsisCol} -int @\FuncDecl{shmem\_broadcast32\_nb}@(shmem_team_t team, void *dest, const void -*source, size_t nelems, int PE_root, shmem_req_h *request); - -int @\FuncDecl{shmem\_broadcast64\_nb}@(shmem_team_t team, void *dest, const void -*source, size_t nelems, int PE_root, shmem_req_h *request); - int @\FuncDecl{shmem\_broadcastmem\_nb}@(shmem_team_t team, void *dest, const void *source, size_t nelems, int PE_root, shmem_req_h *request); \end{CsynopsisCol} @@ -42,9 +36,7 @@ The type of \source{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{nelems}{ The number of elements in \source{} and \dest{} arrays. - For \FUNC{shmem\_broadcastmem\_nb}, elements are bytes; - for \FUNC{shmem\_broadcast\{32,64\}\_nb}, elements are 4 or 8 bytes, - respectively. + For\FUNC{shmem\_broadcastmem\_nb}, elements are bytes. } \apiargument{IN}{PE\_root}{Zero-based ordinal of the \ac{PE}, with respect to the team, from which the data is copied.} From b15c727b5017737447183fb0b45804eaa01c04c2 Mon Sep 17 00:00:00 2001 From: Manjunath Gorentla Venkata Date: Sat, 30 Sep 2023 06:08:55 -0700 Subject: [PATCH 18/27] Minor edits: Remove active set mention --- content/shmem_alltoall_nb.tex | 6 +++--- content/shmem_broadcast_nb.tex | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/content/shmem_alltoall_nb.tex b/content/shmem_alltoall_nb.tex index 6897ed31..9575eb9b 100644 --- a/content/shmem_alltoall_nb.tex +++ b/content/shmem_alltoall_nb.tex @@ -31,10 +31,10 @@ \apiargument{OUT}{dest}{Symmetric address of a data object large enough to receive the combined total of \VAR{nelems} elements from each \ac{PE} in the - active set. + team. The type of \dest{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{source}{Symmetric address of a data object that contains \VAR{nelems} - elements of data for each \ac{PE} in the active set, ordered according to + elements of data for each \ac{PE} in the team, ordered according to destination \ac{PE}. The type of \source{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{nelems}{ @@ -68,7 +68,7 @@ with all other \acp{PE} participating in the operation. The size of a data element is: \begin{itemize} - \item 8 bits for \FUNC{shmem\_alltoallmem} + \item 8 bits for \FUNC{shmem\_alltoallmem\_nb} \item \FUNC{sizeof}(\TYPE{}) for alltoall routines taking typed \VAR{source} and \VAR{dest} \end{itemize} diff --git a/content/shmem_broadcast_nb.tex b/content/shmem_broadcast_nb.tex index 4b91f69a..e6f9c51b 100644 --- a/content/shmem_broadcast_nb.tex +++ b/content/shmem_broadcast_nb.tex @@ -36,7 +36,7 @@ The type of \source{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{nelems}{ The number of elements in \source{} and \dest{} arrays. - For\FUNC{shmem\_broadcastmem\_nb}, elements are bytes. + For \FUNC{shmem\_broadcastmem\_nb}, elements are bytes. } \apiargument{IN}{PE\_root}{Zero-based ordinal of the \ac{PE}, with respect to the team, from which the data is copied.} From 030a019637ec53b5d45301de15f6bd8da3193d8d Mon Sep 17 00:00:00 2001 From: Manjunath Gorentla Venkata Date: Sat, 30 Sep 2023 06:26:21 -0700 Subject: [PATCH 19/27] Make shmem_req_test/wait functions neutral --- content/shmem_collective_test.tex | 14 +++++++------- content/shmem_collective_wait.tex | 14 ++++++-------- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/content/shmem_collective_test.tex b/content/shmem_collective_test.tex index 2777d5c2..8bbce666 100644 --- a/content/shmem_collective_test.tex +++ b/content/shmem_collective_test.tex @@ -1,5 +1,5 @@ \apisummary{ - The routine outputs the status of the collective operation identified by the request. + The routine outputs the status of the operation identified by the request. } \begin{apidefinition} @@ -16,15 +16,15 @@ \apidescription{ A call to \FUNC{shmem\_req\_test} returns immediately. If the - collective operation identified by the request is completed, it returns - zero. The request object is deallocated. If the collective operation is not - completed, it returns an integer (non-negative integer). + operation identified by the request is completed, it returns + zero, and the request object is deallocated. If the operation is not + completed, it returns a non-negative integer. - In a multithreaded environment, the collective and the + In a multithreaded environment, the operation identified by the request and the \FUNC{shmem\_req\_test} can be called by different threads. It is the responsibility of the \openshmem user - to ensure that the \FUNC{shmem\_collective\_test} operation is called after the - collective operation. + to ensure that the \FUNC{shmem\_req\_test} operation is called after the + operation has been initiated. } \apireturnvalues{ diff --git a/content/shmem_collective_wait.tex b/content/shmem_collective_wait.tex index beb4a34f..88666c0a 100644 --- a/content/shmem_collective_wait.tex +++ b/content/shmem_collective_wait.tex @@ -1,5 +1,5 @@ \apisummary{ - The routine waits until a collective operation identified by a request + The routine waits until a operation identified by a request object completes. } @@ -17,13 +17,11 @@ \apidescription{ -The \FUNC{shmem\_req\_wait} function is a blocking operation. It is used to -determine whether a collective operation identified by the request object has been -completed. If the collective operation is completed, -\FUNC{shmem\_req\_wait} -returns zero and deallocates the request object. If the collective operation has -not been completed, \FUNC{shmem\_req\_wait} blocks until collective -operation completes and then returns zero. +The \FUNC{shmem\_req\_wait} function is a blocking operation used to determine whether an +operation identified by the request object has been completed. If the operation +is completed, \FUNC{shmem\_req\_wait} returns zero and deallocates the request object. If +the operation has not been completed, \FUNC{shmem\_req\_wait} blocks until the operation +completes and then returns zero. In a multithreaded environment, \FUNC{shmem\_req\_wait} can be called by different From 82e5d54c4ae5c8f7bab1b5c4dfe143dcccc9979a Mon Sep 17 00:00:00 2001 From: ferrol aderholdt Date: Fri, 12 Apr 2024 15:33:20 -0700 Subject: [PATCH 20/27] Update content/nb_collectives_intro.tex --- content/nb_collectives_intro.tex | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/content/nb_collectives_intro.tex b/content/nb_collectives_intro.tex index ffd9f3e4..0d0e9ccd 100644 --- a/content/nb_collectives_intro.tex +++ b/content/nb_collectives_intro.tex @@ -1,4 +1,4 @@ -An \openshmem nonblocking collective operation, like blocking collective +An \openshmem nonblocking collective operation, like a blocking collective operation, is a group communication operation among the participants of the team. All participants of the team are required to call the collective operation. @@ -9,13 +9,13 @@ the operation is initiated and the routine returns without ensuring completion. All participants of the Team must call this routine with identical arguments. -\item Collective Types: The nonblocking variants supported include barrier, alltoall, +\item Collective Types: The nonblocking variants supported include barrier all, alltoall, and broadcast collectives. Other collective operations such as reductions, collect, barrier, alltoalls, and sync will not have nonblocking variants. \item Completion semantics: \openshmem programs can learn the status of the collective operations -using the \FUNC{shmem\_req\_test} routine and can be completed using -the \FUNC{shmem\_req\_wait} routine. +using the \FUNC{shmem\_req\_test} routine. The operation is completed after +at least one call to \FUNC{shmem\_req\_test} or a call to \FUNC{shmem\_req\_wait}. \item Threads: While using SHMEM\_THREAD\_MULTIPLE, the \openshmem programs are not allowed to call multiple collective operations on different threads @@ -26,6 +26,3 @@ Note: Like other nonblocking \openshmem operations, the implementations are expected to asynchronously progress the collective operations. The guidance on asynchronous progress is provided in Section \ref{subsec:progress}. - - - From fb0b4821ff835cbb5e2b830fad0512d2ea5612d4 Mon Sep 17 00:00:00 2001 From: ferrol aderholdt Date: Fri, 12 Apr 2024 15:33:47 -0700 Subject: [PATCH 21/27] Update content/shmem_alltoall_nb.tex --- content/shmem_alltoall_nb.tex | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/content/shmem_alltoall_nb.tex b/content/shmem_alltoall_nb.tex index 9575eb9b..fd509928 100644 --- a/content/shmem_alltoall_nb.tex +++ b/content/shmem_alltoall_nb.tex @@ -55,8 +55,8 @@ {\bf Invocation and completion}: A call to the nonblocking alltoall routine initiates the operation and returns immediately without necessarily completing the operation. On success, an opaque request handle is created and returned. The - operation is completed after a call to \FUNC{shmem\_req\_test} or - \FUNC{shmem\_req\_wait}. When the operation is complete, the request handle + operation is completed after at least one call to \FUNC{shmem\_req\_test} or + a call to \FUNC{shmem\_req\_wait}. When the operation is complete, the request handle is deallocated and cannot be reused. Though nonblocking alltoall varies in invocation and completion semantics @@ -97,7 +97,7 @@ Like data exchange semantics, the entry and completion - criteria of blocking and nonblocking alltoall is similar. + criteria of blocking and nonblocking alltoall are similar. {\bf Entry criteria}: Before any \ac{PE} calls a \FUNC{shmem\_alltoall\_nb} routine, the following condition must be ensured: From 930b39ca3286f3dc8c1b6d6c0faa46cb916f86e4 Mon Sep 17 00:00:00 2001 From: ferrol aderholdt Date: Fri, 12 Apr 2024 15:34:07 -0700 Subject: [PATCH 22/27] Update content/shmem_broadcast_nb.tex --- content/shmem_broadcast_nb.tex | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/content/shmem_broadcast_nb.tex b/content/shmem_broadcast_nb.tex index e6f9c51b..d9ee6de6 100644 --- a/content/shmem_broadcast_nb.tex +++ b/content/shmem_broadcast_nb.tex @@ -59,8 +59,8 @@ A call to the nonblocking broadcast routine initiates the operation and returns immediately without necessarily completing the operation. On success, an opaque request handle is created and returned. The - operation is completed after a call to \FUNC{shmem\_req\_test} or - \FUNC{shmem\_req\_wait}. When the operation is complete, the request handle + operation is completed after at least one call to \FUNC{shmem\_req\_test} or a + call to \FUNC{shmem\_req\_wait}. When the operation is complete, the request handle is deallocated and cannot be reused. Like blocking broadcast, before any \ac{PE} calls a broadcast routine, the following From 96f0b97cee146191b3f65107f96415282a741523 Mon Sep 17 00:00:00 2001 From: ferrol aderholdt Date: Fri, 12 Apr 2024 15:35:46 -0700 Subject: [PATCH 23/27] Update content/shmem_barrier_all_nb.tex --- content/shmem_barrier_all_nb.tex | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/content/shmem_barrier_all_nb.tex b/content/shmem_barrier_all_nb.tex index a21547d4..8271f5c7 100644 --- a/content/shmem_barrier_all_nb.tex +++ b/content/shmem_barrier_all_nb.tex @@ -25,9 +25,9 @@ A call to the nonblocking barrier routine initiates the operation and returns immediately without necessarily completing the operation. On success, an opaque request handle is created and returned. The - operation is completed after a call to \FUNC{shmem\_req\_test} or - \FUNC{shmem\_req\_wait}. When the operation is complete, the request handle - is deallocated and cannot be reused. + operation is completed after at least one call to \FUNC{shmem\_req\_test} or + a call to \FUNC{shmem\_req\_wait}. When the operation is complete, the + request handle is deallocated and cannot be reused. Prior to completion, \FUNC{shmem\_barrier\_all\_nb} ensures completion of all previously issued memory stores and remote memory From ff96cf580a0ff3ce0b2a49b183a83ebfe6cfba16 Mon Sep 17 00:00:00 2001 From: ferrol aderholdt Date: Fri, 12 Apr 2024 15:36:17 -0700 Subject: [PATCH 24/27] Update content/shmem_collective_test.tex --- content/shmem_collective_test.tex | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/content/shmem_collective_test.tex b/content/shmem_collective_test.tex index 8bbce666..a27f5472 100644 --- a/content/shmem_collective_test.tex +++ b/content/shmem_collective_test.tex @@ -17,18 +17,18 @@ \apidescription{ A call to \FUNC{shmem\_req\_test} returns immediately. If the operation identified by the request is completed, it returns - zero, and the request object is deallocated. If the operation is not - completed, it returns a non-negative integer. - - In a multithreaded environment, the operation identified by the request and the - \FUNC{shmem\_req\_test} can be - called by different threads. It is the responsibility of the \openshmem user - to ensure that the \FUNC{shmem\_req\_test} operation is called after the - operation has been initiated. + zero, and the request object is deallocated and cannot be reused. + If the operation is not completed, it returns a non-negative integer. If + the request object is not valid, behavior is undefined. + + In a multithreaded environment, \FUNC{shmem\_req\_test} can be called by + different threads but on different request objects. It is the responsibility + of the \openshmem user to ensure that proper synchronization is used to + prevent race conditions or deadlock. } \apireturnvalues{ - On success returns zero, otherwise returns a negative integer. + On success returns zero, otherwise returns a nonzero integer. } \end{apidefinition} From ef49e8f99d118a122f9b74255627e540703826fb Mon Sep 17 00:00:00 2001 From: ferrol aderholdt Date: Fri, 12 Apr 2024 15:36:36 -0700 Subject: [PATCH 25/27] Update content/shmem_collective_wait.tex --- content/shmem_collective_wait.tex | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/content/shmem_collective_wait.tex b/content/shmem_collective_wait.tex index 88666c0a..31d05929 100644 --- a/content/shmem_collective_wait.tex +++ b/content/shmem_collective_wait.tex @@ -19,17 +19,15 @@ The \FUNC{shmem\_req\_wait} function is a blocking operation used to determine whether an operation identified by the request object has been completed. If the operation -is completed, \FUNC{shmem\_req\_wait} returns zero and deallocates the request object. If -the operation has not been completed, \FUNC{shmem\_req\_wait} blocks until the operation -completes and then returns zero. - +is completed, \FUNC{shmem\_req\_wait} returns zero, and the request object is +deallocated and cannot be reused. If the operation has not been completed, +\FUNC{shmem\_req\_wait} blocks until the operation completes and then returns +zero. If the request object is not valid, behavior is undefined. In a multithreaded environment, \FUNC{shmem\_req\_wait} can be called by different threads but on different request objects. It is the responsibility of the \openshmem user to ensure that proper synchronization is used to prevent race -conditions or deadlock. Specifically, the \FUNC{shmem\_req\_wait} operation should -be called after the collective operation to ensure that the request object is -not deallocated prematurely. +conditions or deadlock. } \apireturnvalues{ From e40b2635057db47fb68ada37532b85062f31cedb Mon Sep 17 00:00:00 2001 From: ferrol aderholdt Date: Fri, 10 May 2024 12:05:41 -0700 Subject: [PATCH 26/27] Minor update to nb_collectives_intro Add SHMEM_REQ_INVALID library constant Address WG Feedback Clarify collective start/execution Remove barrier_all_nb Address Feedback Minor update to nb_collective_intro.tex Clarify completion of broadcast_nb --- content/library_constants.tex | 9 ++++++ content/nb_collectives_intro.tex | 19 +++++++------ content/shmem_alltoall_nb.tex | 2 +- content/shmem_barrier_all_nb.tex | 47 ------------------------------- content/shmem_broadcast_nb.tex | 8 +++--- content/shmem_collective_test.tex | 9 +++--- content/shmem_collective_wait.tex | 15 +++++----- main_spec.tex | 3 -- 8 files changed, 37 insertions(+), 75 deletions(-) delete mode 100644 content/shmem_barrier_all_nb.tex diff --git a/content/library_constants.tex b/content/library_constants.tex index 0a0194de..66d71725 100644 --- a/content/library_constants.tex +++ b/content/library_constants.tex @@ -84,6 +84,15 @@ See Section~\ref{subsec:shmem_ctx_create} for more detail about its use. \tabularnewline \hline %% +\LibConstDecl{SHMEM\_REQ\_INVALID} & +A value corresponding to an invalid request handle. +This value can be used to initialize or update request handles to indicate +that they do not reference a valid request. +When managed in this way, applications can use an equality comparison +to test whether a given request handle references a valid request. +See Section~\ref{subsec:nb_coll} for more detail about its use. +\tabularnewline \hline +%% \LibConstDecl{SHMEM\_SIGNAL\_SET} & An integer constant expression corresponding to the signal update set operation. See Section~\ref{subsec:shmem_put_signal} and diff --git a/content/nb_collectives_intro.tex b/content/nb_collectives_intro.tex index 0d0e9ccd..6ff5db01 100644 --- a/content/nb_collectives_intro.tex +++ b/content/nb_collectives_intro.tex @@ -1,25 +1,26 @@ An \openshmem nonblocking collective operation, like a blocking collective operation, is a group communication operation among the -participants of the team. All participants of the team are required to call the -collective operation. +participants of the team. All \acp{PE} in the team are required to call the +collective operation and each collective operation must be initiated in the same +order across all \acp{PE} while the execution may be performed in any order. \begin{enumerate} \item Invocation semantics: Upon invocation of a nonblocking collective routine, -the operation is initiated and the routine returns without ensuring completion. All participants of the Team +the operation is initiated and the routine returns without ensuring completion. All \acp{PE} in the team must call this routine with identical arguments. -\item Collective Types: The nonblocking variants supported include barrier all, alltoall, -and broadcast collectives. Other collective operations such as -reductions, collect, barrier, alltoalls, and sync will not have nonblocking variants. +\item Collective Types: The nonblocking variants supported include the alltoall +and broadcast collectives. All other collective operations such as +reductions, collect, fcollect, barrier, barrier all, alltoalls, sync, and sync all will not have nonblocking variants. -\item Completion semantics: \openshmem programs can learn the status of the collective operations +\item Completion semantics: \openshmem programs can learn the status of the collective operations using the \FUNC{shmem\_req\_test} routine. The operation is completed after -at least one call to \FUNC{shmem\_req\_test} or a call to \FUNC{shmem\_req\_wait}. +a call to \FUNC{shmem\_req\_test} or a call to \FUNC{shmem\_req\_wait}. \item Threads: While using SHMEM\_THREAD\_MULTIPLE, the \openshmem programs are not allowed to call multiple collective operations on different threads -and the same Team. +and the same team. \end{enumerate} diff --git a/content/shmem_alltoall_nb.tex b/content/shmem_alltoall_nb.tex index fd509928..7d772baf 100644 --- a/content/shmem_alltoall_nb.tex +++ b/content/shmem_alltoall_nb.tex @@ -55,7 +55,7 @@ {\bf Invocation and completion}: A call to the nonblocking alltoall routine initiates the operation and returns immediately without necessarily completing the operation. On success, an opaque request handle is created and returned. The - operation is completed after at least one call to \FUNC{shmem\_req\_test} or + operation is completed after a call to \FUNC{shmem\_req\_test} or a call to \FUNC{shmem\_req\_wait}. When the operation is complete, the request handle is deallocated and cannot be reused. diff --git a/content/shmem_barrier_all_nb.tex b/content/shmem_barrier_all_nb.tex deleted file mode 100644 index 8271f5c7..00000000 --- a/content/shmem_barrier_all_nb.tex +++ /dev/null @@ -1,47 +0,0 @@ -\apisummary{ - Registers the arrival of a \ac{PE} at a barrier and returns immediately. It completes when all \acp{PE} - arrive at the barrier and all local updates and remote memory updates on the default context are completed. - } - -\begin{apidefinition} - -\begin{Csynopsis} -void @\FuncDecl{shmem\_barrier\_all\_nb}@(shmem_req_h *request); -\end{Csynopsis} - -\begin{apiarguments} - - \apiargument{OUT}{request}{An opaque request handle identifying the - collective operation.} - -\end{apiarguments} - -\apidescription{ - Similar to the \FUNC{shmem\_barrier\_all} routine, the nonblocking \FUNC{shmem\_barrier\_all\_nb} - is a mechanism for synchronizing all \acp{PE} in the world team at - once. This routine completes when all \acp{PE} have called - \FUNC{shmem\_barrier\_all\_nb}. - - A call to the nonblocking barrier routine initiates the operation and returns - immediately without necessarily completing the operation. On success, - an opaque request handle is created and returned. The - operation is completed after at least one call to \FUNC{shmem\_req\_test} or - a call to \FUNC{shmem\_req\_wait}. When the operation is complete, the - request handle is deallocated and cannot be reused. - - Prior to completion, \FUNC{shmem\_barrier\_all\_nb} - ensures completion of all previously issued memory stores and remote memory - updates issued on the default context via \openshmem \acp{AMO} and - \ac{RMA} routine calls such - as \FUNC{shmem\_int\_add}, \FUNC{shmem\_put32}, - \FUNC{shmem\_put\_nbi}, and \FUNC{shmem\_get\_nbi}. -} - -\apireturnvalues{ - None. -} - -\apinotes{ -} - -\end{apidefinition} diff --git a/content/shmem_broadcast_nb.tex b/content/shmem_broadcast_nb.tex index d9ee6de6..ce2bc2bb 100644 --- a/content/shmem_broadcast_nb.tex +++ b/content/shmem_broadcast_nb.tex @@ -59,7 +59,7 @@ A call to the nonblocking broadcast routine initiates the operation and returns immediately without necessarily completing the operation. On success, an opaque request handle is created and returned. The - operation is completed after at least one call to \FUNC{shmem\_req\_test} or a + operation is completed after a call to \FUNC{shmem\_req\_test} or a call to \FUNC{shmem\_req\_wait}. When the operation is complete, the request handle is deallocated and cannot be reused. @@ -82,9 +82,9 @@ Upon completion of a nonblocking broadcast routine, the following are true for the local \ac{PE}: \begin{itemize} - \item The \dest{} data object is - updated. - \item The \source{} data object may be safely reused. + \item The \dest{} data object is updated. + \item If the local \ac{PE} is \VAR{PE\_root}, the data has been copied + out of the \source{} data object. \end{itemize} } diff --git a/content/shmem_collective_test.tex b/content/shmem_collective_test.tex index a27f5472..e4b4851f 100644 --- a/content/shmem_collective_test.tex +++ b/content/shmem_collective_test.tex @@ -5,7 +5,7 @@ \begin{apidefinition} \begin{Csynopsis} -int @\FuncDecl{shmem\_req\_test}@(shmem_req_h request); +int @\FuncDecl{shmem\_req\_test}@(shmem_req_h *request); \end{Csynopsis} \begin{apiarguments} @@ -17,9 +17,10 @@ \apidescription{ A call to \FUNC{shmem\_req\_test} returns immediately. If the operation identified by the request is completed, it returns - zero, and the request object is deallocated and cannot be reused. - If the operation is not completed, it returns a non-negative integer. If - the request object is not valid, behavior is undefined. + zero, and the request object is deallocated and set to \LibConstRef{SHMEM\_REQ\_INVALID}. + If the operation is not completed, it returns a non-negative integer. + If the request object is not valid (i.e., it is set to \LibConstRef{SHMEM\_REQ\_INVALID}), + no operation is performed and a negative value is returned. In a multithreaded environment, \FUNC{shmem\_req\_test} can be called by different threads but on different request objects. It is the responsibility diff --git a/content/shmem_collective_wait.tex b/content/shmem_collective_wait.tex index 31d05929..a1c44a6c 100644 --- a/content/shmem_collective_wait.tex +++ b/content/shmem_collective_wait.tex @@ -6,7 +6,7 @@ \begin{apidefinition} \begin{Csynopsis} -int @\FuncDecl{shmem\_req\_wait}@(shmem_req_h request); +int @\FuncDecl{shmem\_req\_wait}@(shmem_req_h *request); \end{Csynopsis} \begin{apiarguments} @@ -17,12 +17,13 @@ \apidescription{ -The \FUNC{shmem\_req\_wait} function is a blocking operation used to determine whether an -operation identified by the request object has been completed. If the operation -is completed, \FUNC{shmem\_req\_wait} returns zero, and the request object is -deallocated and cannot be reused. If the operation has not been completed, -\FUNC{shmem\_req\_wait} blocks until the operation completes and then returns -zero. If the request object is not valid, behavior is undefined. +The \FUNC{shmem\_req\_wait} function is a blocking operation used to +determine whether an operation identified by the request object has +been completed. When the operation is completed, \FUNC{shmem\_req\_wait} returns +zero, and the request object is deallocated and set to \LibConstRef{SHMEM\_REQ\_INVALID}. +If the request object is not valid (i.e., it is set to +\LibConstRef{SHMEM\_REQ\_INVALID}), no operation is performed and a negative +value is returned. In a multithreaded environment, \FUNC{shmem\_req\_wait} can be called by different threads but on different request objects. It is the responsibility of the diff --git a/main_spec.tex b/main_spec.tex index 8b0657c9..e11af1bb 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -387,9 +387,6 @@ \subsubsection{\textbf{SHMEM\_REDUCTIONS}}\label{subsec:shmem_reductions} \subsection{Nonblocking Collective Routines}\label{subsec:nb_coll} \input{content/nb_collectives_intro.tex} -\subsubsection{\textbf{SHMEM\_BARRIER\_ALL\_NB}}\label{subsec:shmem_barrier_all_nb} -\input{content/shmem_barrier_all_nb.tex} - \subsubsection{\textbf{SHMEM\_BROADCAST\_NB}}\label{subsec:shmem_broadcast_nb} \input{content/shmem_broadcast_nb.tex} From 02a539a1ca05e68410de9af989ecff49f519bbf0 Mon Sep 17 00:00:00 2001 From: ferrol aderholdt Date: Fri, 23 Aug 2024 11:44:41 -0700 Subject: [PATCH 27/27] Inclusion of text from PR #507 --- content/execution_model.tex | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/content/execution_model.tex b/content/execution_model.tex index a1ea1a69..0faa54e4 100644 --- a/content/execution_model.tex +++ b/content/execution_model.tex @@ -32,13 +32,20 @@ \subsection{Progress of OpenSHMEM Operations}\label{subsec:progress} The \openshmem model assumes that computation and communication are naturally overlapped. \openshmem programs are expected to exhibit progression of -communication both with and without \openshmem calls. Consider a \ac{PE} that is +communication both with and without \openshmem calls. For point-to-point +operations, consider a \ac{PE} that is engaged in a computation with no \openshmem calls. Other \acp{PE} should be able to communicate (e.g., \OPR{put}, \OPR{get}, \OPR{atomic}, etc.) and complete communication operations with that computationally-bound \ac{PE} without that \ac{PE} issuing any explicit \openshmem calls. One-sided \openshmem communication calls involving that \ac{PE} should progress regardless of when -that \ac{PE} next engages in an \openshmem call. +that \ac{PE} next engages in an \openshmem call. Similarly, +for non-blocking collectives, consider the \acp{PE} that are part of a team +issuing a non-blocking collective and overlapping collective completion with +computation. Once a non-blocking collective operation is initiated by +all of the \acp{PE} in the team of the collective, any \ac{PE} in the team must +eventually observe completion through a call to \FUNC{shmem\_req\_test} or a +call to \FUNC{shmem\_req\_wait}. \parimpnotes{ An \openshmem implementation for hardware that does not provide