diff --git a/docs/source/ProgrammingGuide/HierarchicalParallelism.md b/docs/source/ProgrammingGuide/HierarchicalParallelism.md index b3e730ffd..017f3e90e 100644 --- a/docs/source/ProgrammingGuide/HierarchicalParallelism.md +++ b/docs/source/ProgrammingGuide/HierarchicalParallelism.md @@ -64,6 +64,9 @@ Kokkos::TeamPolicy The team policy's `member_type` provides the necessary functionality to use teams within a parallel kernel. It allows access to thread identifiers such as the league rank and size, and the team rank and size. It also provides team-synchronous actions such as team barriers, reductions and scans. ```c++ +using Kokkos::atomic_add; +using Kokkos::PerTeam; +using Kokkos::Sum; using Kokkos::TeamPolicy; using Kokkos::parallel_for; @@ -73,12 +76,17 @@ TeamPolicy policy (league_size, Kokkos::AUTO() ); // Launch a kernel parallel_for (policy, KOKKOS_LAMBDA (member_type team_member) { // Calculate a global thread id - int k = team_member.league_rank () * team_member.team_size () + + int k = team_member.league_rank () * team_member.team_size () + team_member.team_rank (); + // Calculate the sum of the global thread ids of this team - int team_sum = team_member.reduce (k); - // Atomically add the value to a global value - a() += team_sum; + int team_sum = k; + team_member.team_reduce(Sum(team_sum)); + + // Atomically add the computed sum to a global value + Kokkos::single (PerTeam (team_member), [=] () { + atomic_add(&global_value(), team_sum); + }); }); ```