From 420b180741cd44ae905abcbe58c7bbedbf110ba3 Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Fri, 31 Jan 2025 12:29:05 -0800 Subject: [PATCH] Run clang-format to resolve merge conflicts. --- CMakeLists.txt | 7 +- cmake/RAJAMacros.cmake | 61 +- include/RAJA/RAJA.hpp | 28 +- include/RAJA/index/IndexSet.hpp | 28 +- include/RAJA/index/IndexSetBuilders.hpp | 36 +- include/RAJA/index/IndexSetUtils.hpp | 42 +- include/RAJA/index/IndexValue.hpp | 59 +- include/RAJA/index/ListSegment.hpp | 58 +- include/RAJA/index/RangeSegment.hpp | 87 +- include/RAJA/internal/DepGraphNode.hpp | 3 +- include/RAJA/internal/Iterators.hpp | 12 +- include/RAJA/internal/MemUtils_CPU.hpp | 20 +- include/RAJA/internal/RAJAVec.hpp | 114 +- include/RAJA/internal/fault_tolerance.hpp | 26 +- include/RAJA/internal/foldl.hpp | 20 +- include/RAJA/internal/get_platform.hpp | 28 +- include/RAJA/pattern/WorkGroup.hpp | 216 +- include/RAJA/pattern/WorkGroup/Dispatcher.hpp | 424 +-- include/RAJA/pattern/WorkGroup/WorkRunner.hpp | 195 +- .../RAJA/pattern/WorkGroup/WorkStorage.hpp | 466 ++- include/RAJA/pattern/WorkGroup/WorkStruct.hpp | 62 +- include/RAJA/pattern/atomic.hpp | 31 +- include/RAJA/pattern/detail/algorithm.hpp | 27 +- include/RAJA/pattern/detail/multi_reduce.hpp | 218 +- include/RAJA/pattern/detail/reduce.hpp | 108 +- include/RAJA/pattern/forall.hpp | 437 +-- include/RAJA/pattern/kernel.hpp | 85 +- include/RAJA/pattern/kernel/Conditional.hpp | 7 +- include/RAJA/pattern/kernel/For.hpp | 25 +- include/RAJA/pattern/kernel/ForICount.hpp | 27 +- include/RAJA/pattern/kernel/Hyperplane.hpp | 27 +- include/RAJA/pattern/kernel/InitLocalMem.hpp | 55 +- include/RAJA/pattern/kernel/Lambda.hpp | 210 +- include/RAJA/pattern/kernel/Param.hpp | 9 +- include/RAJA/pattern/kernel/Reduce.hpp | 6 +- include/RAJA/pattern/kernel/Region.hpp | 30 +- include/RAJA/pattern/kernel/Tile.hpp | 63 +- include/RAJA/pattern/kernel/TileTCount.hpp | 36 +- .../RAJA/pattern/kernel/internal/LoopData.hpp | 96 +- .../pattern/kernel/internal/LoopTypes.hpp | 72 +- .../pattern/kernel/internal/Statement.hpp | 15 +- .../pattern/kernel/internal/StatementList.hpp | 16 +- .../RAJA/pattern/kernel/internal/Template.hpp | 22 +- include/RAJA/pattern/launch/launch_core.hpp | 415 +-- include/RAJA/pattern/multi_reduce.hpp | 8 +- include/RAJA/pattern/params/forall.hpp | 756 ++--- include/RAJA/pattern/params/kernel_name.hpp | 24 +- include/RAJA/pattern/params/params_base.hpp | 358 ++- include/RAJA/pattern/params/reducer.hpp | 302 +- include/RAJA/pattern/reduce.hpp | 5 +- include/RAJA/pattern/scan.hpp | 227 +- include/RAJA/pattern/sort.hpp | 198 +- include/RAJA/pattern/tensor.hpp | 14 +- .../RAJA/pattern/tensor/MatrixRegister.hpp | 42 +- .../RAJA/pattern/tensor/ScalarRegister.hpp | 14 +- include/RAJA/pattern/tensor/TensorBlock.hpp | 7 +- include/RAJA/pattern/tensor/TensorIndex.hpp | 366 +-- include/RAJA/pattern/tensor/TensorLayout.hpp | 81 +- .../RAJA/pattern/tensor/TensorRegister.hpp | 139 +- .../RAJA/pattern/tensor/VectorRegister.hpp | 19 +- .../tensor/internal/ET/BinaryOperator.hpp | 240 +- .../internal/ET/BinaryOperatorTraits.hpp | 235 +- .../tensor/internal/ET/BlockLiteral.hpp | 149 +- .../internal/ET/ExpressionTemplateBase.hpp | 245 +- .../tensor/internal/ET/MultiplyOperator.hpp | 2218 +++++++------- .../tensor/internal/ET/TensorDivide.hpp | 699 ++--- .../tensor/internal/ET/TensorLiteral.hpp | 133 +- .../tensor/internal/ET/TensorLoadStore.hpp | 365 ++- .../tensor/internal/ET/TensorMultiply.hpp | 254 +- .../tensor/internal/ET/TensorMultiplyAdd.hpp | 154 +- .../tensor/internal/ET/TensorNegate.hpp | 107 +- .../internal/ET/TensorScalarLiteral.hpp | 135 +- .../tensor/internal/ET/TensorTranspose.hpp | 118 +- .../tensor/internal/ET/normalizeOperand.hpp | 88 +- .../tensor/internal/ExpressionTemplate.hpp | 3 +- .../tensor/internal/MatrixMatrixMultiply.hpp | 527 ++-- .../tensor/internal/MatrixRegisterImpl.hpp | 2608 +++++++++-------- .../pattern/tensor/internal/RegisterBase.hpp | 2032 +++++++------ .../tensor/internal/TensorIndexTraits.hpp | 588 ++-- .../pattern/tensor/internal/TensorRef.hpp | 1322 +++++---- .../tensor/internal/TensorRegisterBase.hpp | 1532 +++++----- .../tensor/internal/TensorTileExec.hpp | 588 ++-- .../tensor/internal/VectorRegisterImpl.hpp | 1725 +++++------ include/RAJA/pattern/tensor/stats.hpp | 10 +- include/RAJA/policy/MultiPolicy.hpp | 32 +- include/RAJA/policy/PolicyBase.hpp | 34 +- include/RAJA/policy/WorkGroup.hpp | 54 +- include/RAJA/policy/atomic_auto.hpp | 15 +- include/RAJA/policy/atomic_builtin.hpp | 187 +- include/RAJA/policy/cuda.hpp | 10 +- include/RAJA/policy/cuda/MemUtils_CUDA.hpp | 189 +- .../RAJA/policy/cuda/WorkGroup/Dispatcher.hpp | 49 +- .../RAJA/policy/cuda/WorkGroup/WorkRunner.hpp | 270 +- include/RAJA/policy/cuda/atomic.hpp | 363 +-- include/RAJA/policy/cuda/forall.hpp | 698 +++-- include/RAJA/policy/cuda/intrinsics.hpp | 99 +- .../RAJA/policy/cuda/kernel/Conditional.hpp | 19 +- .../RAJA/policy/cuda/kernel/CudaKernel.hpp | 216 +- include/RAJA/policy/cuda/kernel/For.hpp | 203 +- include/RAJA/policy/cuda/kernel/ForICount.hpp | 330 ++- .../RAJA/policy/cuda/kernel/Hyperplane.hpp | 41 +- .../RAJA/policy/cuda/kernel/InitLocalMem.hpp | 172 +- include/RAJA/policy/cuda/kernel/Lambda.hpp | 32 +- include/RAJA/policy/cuda/kernel/Reduce.hpp | 15 +- include/RAJA/policy/cuda/kernel/Sync.hpp | 34 +- include/RAJA/policy/cuda/kernel/Tile.hpp | 161 +- .../RAJA/policy/cuda/kernel/TileTCount.hpp | 229 +- include/RAJA/policy/cuda/kernel/internal.hpp | 781 ++--- include/RAJA/policy/cuda/launch.hpp | 1280 ++++---- include/RAJA/policy/cuda/multi_reduce.hpp | 461 +-- .../RAJA/policy/cuda/params/kernel_name.hpp | 70 +- include/RAJA/policy/cuda/params/reduce.hpp | 105 +- include/RAJA/policy/cuda/policy.hpp | 1775 ++++++----- include/RAJA/policy/cuda/raja_cudaerrchk.hpp | 13 +- include/RAJA/policy/cuda/reduce.hpp | 349 ++- include/RAJA/policy/cuda/scan.hpp | 63 +- include/RAJA/policy/cuda/sort.hpp | 534 ++-- include/RAJA/policy/desul/atomic.hpp | 56 +- include/RAJA/policy/hip.hpp | 8 +- include/RAJA/policy/hip/MemUtils_HIP.hpp | 214 +- .../RAJA/policy/hip/WorkGroup/Dispatcher.hpp | 42 +- .../RAJA/policy/hip/WorkGroup/WorkRunner.hpp | 302 +- include/RAJA/policy/hip/atomic.hpp | 367 ++- include/RAJA/policy/hip/forall.hpp | 683 +++-- include/RAJA/policy/hip/intrinsics.hpp | 86 +- .../RAJA/policy/hip/kernel/Conditional.hpp | 23 +- include/RAJA/policy/hip/kernel/For.hpp | 208 +- include/RAJA/policy/hip/kernel/ForICount.hpp | 332 ++- include/RAJA/policy/hip/kernel/HipKernel.hpp | 194 +- include/RAJA/policy/hip/kernel/Hyperplane.hpp | 41 +- .../RAJA/policy/hip/kernel/InitLocalMem.hpp | 174 +- include/RAJA/policy/hip/kernel/Lambda.hpp | 32 +- include/RAJA/policy/hip/kernel/Reduce.hpp | 36 +- include/RAJA/policy/hip/kernel/Sync.hpp | 30 +- include/RAJA/policy/hip/kernel/Tile.hpp | 161 +- include/RAJA/policy/hip/kernel/TileTCount.hpp | 223 +- include/RAJA/policy/hip/kernel/internal.hpp | 782 ++--- include/RAJA/policy/hip/launch.hpp | 1252 ++++---- include/RAJA/policy/hip/multi_reduce.hpp | 459 +-- .../RAJA/policy/hip/params/kernel_name.hpp | 69 +- include/RAJA/policy/hip/params/reduce.hpp | 104 +- include/RAJA/policy/hip/policy.hpp | 1562 +++++----- include/RAJA/policy/hip/raja_hiperrchk.hpp | 16 +- include/RAJA/policy/hip/reduce.hpp | 337 ++- include/RAJA/policy/hip/scan.hpp | 78 +- include/RAJA/policy/hip/sort.hpp | 443 +-- include/RAJA/policy/openmp.hpp | 9 +- .../policy/openmp/WorkGroup/Dispatcher.hpp | 8 +- .../policy/openmp/WorkGroup/WorkRunner.hpp | 68 +- include/RAJA/policy/openmp/atomic.hpp | 58 +- include/RAJA/policy/openmp/forall.hpp | 470 +-- .../RAJA/policy/openmp/kernel/Collapse.hpp | 25 +- .../policy/openmp/kernel/OmpSyncThreads.hpp | 26 +- include/RAJA/policy/openmp/launch.hpp | 89 +- include/RAJA/policy/openmp/multi_reduce.hpp | 205 +- include/RAJA/policy/openmp/params/forall.hpp | 579 ++-- .../RAJA/policy/openmp/params/kernel_name.hpp | 60 +- include/RAJA/policy/openmp/params/reduce.hpp | 61 +- include/RAJA/policy/openmp/policy.hpp | 179 +- include/RAJA/policy/openmp/reduce.hpp | 8 +- include/RAJA/policy/openmp/region.hpp | 10 +- include/RAJA/policy/openmp/scan.hpp | 110 +- include/RAJA/policy/openmp/sort.hpp | 155 +- include/RAJA/policy/openmp_target.hpp | 7 +- .../openmp_target/WorkGroup/Dispatcher.hpp | 26 +- .../openmp_target/WorkGroup/WorkRunner.hpp | 68 +- include/RAJA/policy/openmp_target/forall.hpp | 95 +- .../policy/openmp_target/kernel/Collapse.hpp | 86 +- .../RAJA/policy/openmp_target/kernel/For.hpp | 46 +- .../openmp_target/params/kernel_name.hpp | 59 +- .../policy/openmp_target/params/reduce.hpp | 60 +- include/RAJA/policy/openmp_target/policy.hpp | 49 +- include/RAJA/policy/openmp_target/reduce.hpp | 69 +- include/RAJA/policy/sequential.hpp | 8 +- .../sequential/WorkGroup/Dispatcher.hpp | 12 +- .../sequential/WorkGroup/WorkRunner.hpp | 66 +- include/RAJA/policy/sequential/atomic.hpp | 46 +- include/RAJA/policy/sequential/forall.hpp | 42 +- .../policy/sequential/kernel/Collapse.hpp | 17 +- .../RAJA/policy/sequential/kernel/Reduce.hpp | 3 +- include/RAJA/policy/sequential/launch.hpp | 62 +- .../RAJA/policy/sequential/multi_reduce.hpp | 60 +- .../policy/sequential/params/kernel_name.hpp | 69 +- .../RAJA/policy/sequential/params/reduce.hpp | 57 +- include/RAJA/policy/sequential/policy.hpp | 40 +- include/RAJA/policy/sequential/reduce.hpp | 4 - include/RAJA/policy/sequential/scan.hpp | 81 +- include/RAJA/policy/sequential/sort.hpp | 96 +- include/RAJA/policy/simd.hpp | 4 +- include/RAJA/policy/simd/forall.hpp | 30 +- include/RAJA/policy/simd/kernel/For.hpp | 15 +- include/RAJA/policy/simd/kernel/ForICount.hpp | 25 +- include/RAJA/policy/sycl.hpp | 3 +- include/RAJA/policy/sycl/MemUtils_SYCL.hpp | 7 +- include/RAJA/policy/sycl/forall.hpp | 253 +- include/RAJA/policy/sycl/kernel.hpp | 2 +- .../RAJA/policy/sycl/kernel/Conditional.hpp | 21 +- include/RAJA/policy/sycl/kernel/For.hpp | 139 +- include/RAJA/policy/sycl/kernel/ForICount.hpp | 244 +- include/RAJA/policy/sycl/kernel/Lambda.hpp | 35 +- .../RAJA/policy/sycl/kernel/SyclKernel.hpp | 106 +- include/RAJA/policy/sycl/kernel/Tile.hpp | 158 +- .../RAJA/policy/sycl/kernel/TileTCount.hpp | 227 +- include/RAJA/policy/sycl/kernel/internal.hpp | 110 +- include/RAJA/policy/sycl/launch.hpp | 992 +++---- .../RAJA/policy/sycl/params/kernel_name.hpp | 76 +- include/RAJA/policy/sycl/params/reduce.hpp | 61 +- include/RAJA/policy/sycl/policy.hpp | 133 +- include/RAJA/policy/sycl/reduce.hpp | 197 +- include/RAJA/policy/tensor/arch.hpp | 58 +- include/RAJA/policy/tensor/arch/avx.hpp | 12 +- .../policy/tensor/arch/avx/avx_double.hpp | 879 +++--- .../RAJA/policy/tensor/arch/avx/avx_float.hpp | 912 +++--- .../RAJA/policy/tensor/arch/avx/avx_int32.hpp | 1471 +++++----- .../RAJA/policy/tensor/arch/avx/avx_int64.hpp | 1012 +++---- .../RAJA/policy/tensor/arch/avx/traits.hpp | 81 +- include/RAJA/policy/tensor/arch/avx2.hpp | 12 +- .../policy/tensor/arch/avx2/avx2_double.hpp | 1003 +++---- .../policy/tensor/arch/avx2/avx2_float.hpp | 964 +++--- .../policy/tensor/arch/avx2/avx2_int32.hpp | 1100 +++---- .../policy/tensor/arch/avx2/avx2_int64.hpp | 1034 +++---- .../RAJA/policy/tensor/arch/avx2/traits.hpp | 105 +- include/RAJA/policy/tensor/arch/avx512.hpp | 12 +- .../tensor/arch/avx512/avx512_double.hpp | 713 ++--- .../tensor/arch/avx512/avx512_float.hpp | 736 ++--- .../tensor/arch/avx512/avx512_int32.hpp | 866 +++--- .../tensor/arch/avx512/avx512_int64.hpp | 759 ++--- .../RAJA/policy/tensor/arch/avx512/traits.hpp | 84 +- include/RAJA/policy/tensor/arch/cuda.hpp | 6 +- .../policy/tensor/arch/cuda/cuda_warp.hpp | 1945 ++++++------ .../RAJA/policy/tensor/arch/cuda/traits.hpp | 38 +- include/RAJA/policy/tensor/arch/hip.hpp | 6 +- .../RAJA/policy/tensor/arch/hip/hip_wave.hpp | 1945 ++++++------ .../RAJA/policy/tensor/arch/hip/traits.hpp | 35 +- include/RAJA/policy/tensor/arch/scalar.hpp | 8 +- .../RAJA/policy/tensor/arch/scalar/scalar.hpp | 886 +++--- .../RAJA/policy/tensor/arch/scalar/traits.hpp | 81 +- include/RAJA/policy/tensor/arch_impl.hpp | 14 +- include/RAJA/policy/tensor/policy.hpp | 30 +- include/RAJA/util/BitMask.hpp | 96 +- include/RAJA/util/CombiningAdapter.hpp | 89 +- include/RAJA/util/EnableIf.hpp | 14 +- include/RAJA/util/IndexLayout.hpp | 111 +- include/RAJA/util/KokkosPluginLoader.hpp | 53 +- include/RAJA/util/Layout.hpp | 87 +- include/RAJA/util/LocalArray.hpp | 91 +- include/RAJA/util/OffsetLayout.hpp | 98 +- include/RAJA/util/OffsetOperators.hpp | 61 +- include/RAJA/util/Operators.hpp | 59 +- include/RAJA/util/Permutations.hpp | 51 +- include/RAJA/util/PermutedLayout.hpp | 6 +- include/RAJA/util/PluginContext.hpp | 27 +- include/RAJA/util/PluginLinker.hpp | 23 +- include/RAJA/util/PluginOptions.hpp | 21 +- include/RAJA/util/PluginStrategy.hpp | 28 +- include/RAJA/util/Registry.hpp | 236 +- include/RAJA/util/RepeatView.hpp | 126 +- include/RAJA/util/RuntimePluginLoader.hpp | 41 +- include/RAJA/util/SoAPtr.hpp | 100 +- include/RAJA/util/Span.hpp | 46 +- include/RAJA/util/StaticLayout.hpp | 143 +- include/RAJA/util/Timer.hpp | 1 + include/RAJA/util/TypeConvert.hpp | 5 +- include/RAJA/util/TypedViewBase.hpp | 1277 ++++---- include/RAJA/util/View.hpp | 185 +- include/RAJA/util/align.hpp | 10 +- include/RAJA/util/basic_mempool.hpp | 8 +- include/RAJA/util/camp_aliases.hpp | 3 +- include/RAJA/util/concepts.hpp | 6 +- include/RAJA/util/for_each.hpp | 40 +- include/RAJA/util/macros.hpp | 25 +- include/RAJA/util/math.hpp | 47 +- include/RAJA/util/plugins.hpp | 84 +- include/RAJA/util/reduce.hpp | 164 +- include/RAJA/util/resource.hpp | 279 +- include/RAJA/util/sort.hpp | 396 ++- include/RAJA/util/types.hpp | 138 +- include/RAJA/util/zip.hpp | 91 +- include/RAJA/util/zip_tuple.hpp | 438 +-- scripts/lc-builds/toss4_clang-format.sh | 43 + src/AlignedRangeIndexSetBuilders.cpp | 13 +- src/DepGraphNode.cpp | 4 +- src/KokkosPluginLoader.cpp | 102 +- src/LockFreeIndexSetBuilders.cpp | 21 +- src/MemUtils_CUDA.cpp | 1 - src/MemUtils_HIP.cpp | 1 - src/PluginStrategy.cpp | 22 +- src/RuntimePluginLoader.cpp | 101 +- src/TensorStats.cpp | 15 +- 289 files changed, 35855 insertions(+), 32126 deletions(-) create mode 100755 scripts/lc-builds/toss4_clang-format.sh diff --git a/CMakeLists.txt b/CMakeLists.txt index 3eb0dbc8d2..f31e3b1795 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ ############################################################################### -# Copyright (c) 2016-24, Lawrence Livermore National Security, LLC +# Copyright (c) 2016-25, Lawrence Livermore National Security, LLC # and RAJA project contributors. See the RAJA/LICENSE file for details. # SPDX-License-Identifier: (BSD-3-Clause) ############################################################################### @@ -41,7 +41,7 @@ project(RAJA LANGUAGES CXX C VERSION ${RAJA_LOADED}) set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/thirdparty" ${CMAKE_MODULE_PATH}) - +set(BLT_REQUIRED_CLANGFORMAT_VERSION "14" CACHE STRING "") include(cmake/SetupRajaOptions.cmake) cmake_minimum_required(VERSION 3.23) @@ -136,6 +136,9 @@ include(cmake/SetupCompilers.cmake) # Macros for building executables and libraries include (cmake/RAJAMacros.cmake) +# Configure `style` target for enforcing code style +raja_add_code_checks() + set (raja_sources src/AlignedRangeIndexSetBuilders.cpp src/DepGraphNode.cpp diff --git a/cmake/RAJAMacros.cmake b/cmake/RAJAMacros.cmake index c412593db7..848f5779e4 100644 --- a/cmake/RAJAMacros.cmake +++ b/cmake/RAJAMacros.cmake @@ -1,5 +1,5 @@ ############################################################################### -# Copyright (c) 2016-24, Lawrence Livermore National Security, LLC +# Copyright (c) 2016-25, Lawrence Livermore National Security, LLC # and other RAJA project contributors. See the RAJA/LICENSE file for details. # # SPDX-License-Identifier: (BSD-3-Clause) @@ -204,3 +204,62 @@ macro(raja_add_benchmark) NUM_OMP_THREADS ${arg_NUM_OMP_THREADS} COMMAND ${TEST_DRIVER} ${arg_NAME}) endmacro(raja_add_benchmark) + +##------------------------------------------------------------------------------ +## raja_add_code_checks() +## +## Adds code checks for all source files recursively in the RAJA repository. +## +## This creates the following parent build targets: +## check - Runs a non file changing style check and CppCheck +## style - In-place code formatting +## +## Creates various child build targets that follow this pattern: +## raja_ +## raja__ +##------------------------------------------------------------------------------ +macro(raja_add_code_checks) + + set(options) + set(singleValueArgs) + set(multiValueArgs) + + # Parse the arguments to the macro + cmake_parse_arguments(arg + "${options}" "${singleValueArgs}" "${multiValueArgs}" ${ARGN}) + + # Only do code checks if building raja by itself and not included in + # another project + if ("${PROJECT_SOURCE_DIR}" STREQUAL "${CMAKE_SOURCE_DIR}") + # Create file globbing expressions that only include directories that contain source + # TODO(bowen) Add examples, exercises, test, and benchmark to the list below + set(_base_dirs "RAJA" "include" "src") + set(_ext_expressions "*.cpp" "*.hpp" "*.inl" + "*.cxx" "*.hxx" "*.cc" "*.c" "*.h" "*.hh") + + set(_glob_expressions) + foreach(_exp ${_ext_expressions}) + foreach(_base_dir ${_base_dirs}) + list(APPEND _glob_expressions "${PROJECT_SOURCE_DIR}/${_base_dir}/${_exp}") + endforeach() + endforeach() + + # Glob for list of files to run code checks on + set(_sources) + file(GLOB_RECURSE _sources ${_glob_expressions}) + + blt_add_code_checks(PREFIX RAJA + SOURCES ${_sources} + CLANGFORMAT_CFG_FILE ${PROJECT_SOURCE_DIR}/.clang-format + CPPCHECK_FLAGS --enable=all --inconclusive) + + # Set FOLDER property for code check targets + foreach(_suffix clangformat_check clangformat_style clang_tidy_check clang_tidy_style) + set(_tgt ${arg_PREFIX}_${_suffix}) + if(TARGET ${_tgt}) + set_target_properties(${_tgt} PROPERTIES FOLDER "RAJA/code_checks") + endif() + endforeach() + endif() + +endmacro(raja_add_code_checks) diff --git a/include/RAJA/RAJA.hpp b/include/RAJA/RAJA.hpp index 59cca4bf22..3fe99418a8 100644 --- a/include/RAJA/RAJA.hpp +++ b/include/RAJA/RAJA.hpp @@ -27,16 +27,15 @@ #define RAJA_HPP #include "RAJA/config.hpp" - #include "RAJA/util/Operators.hpp" +#include "RAJA/util/Registry.hpp" #include "RAJA/util/basic_mempool.hpp" #include "RAJA/util/camp_aliases.hpp" +#include "RAJA/util/for_each.hpp" #include "RAJA/util/macros.hpp" -#include "RAJA/util/types.hpp" #include "RAJA/util/math.hpp" #include "RAJA/util/plugins.hpp" -#include "RAJA/util/Registry.hpp" -#include "RAJA/util/for_each.hpp" +#include "RAJA/util/types.hpp" // @@ -88,7 +87,7 @@ #endif #if defined(RAJA_ENABLE_DESUL_ATOMICS) - #include "RAJA/policy/desul.hpp" +#include "RAJA/policy/desul.hpp" #endif #include "RAJA/index/IndexSet.hpp" @@ -105,18 +104,17 @@ // #include "RAJA/pattern/forall.hpp" #include "RAJA/pattern/region.hpp" - #include "RAJA/policy/MultiPolicy.hpp" // // Multidimensional layouts and views // +#include "RAJA/util/IndexLayout.hpp" #include "RAJA/util/Layout.hpp" #include "RAJA/util/OffsetLayout.hpp" #include "RAJA/util/PermutedLayout.hpp" #include "RAJA/util/StaticLayout.hpp" -#include "RAJA/util/IndexLayout.hpp" #include "RAJA/util/View.hpp" @@ -158,14 +156,14 @@ // // WorkPool, WorkGroup, WorkSite objects // -#include "RAJA/policy/WorkGroup.hpp" #include "RAJA/pattern/WorkGroup.hpp" +#include "RAJA/policy/WorkGroup.hpp" // // Reduction objects // -#include "RAJA/pattern/reduce.hpp" #include "RAJA/pattern/multi_reduce.hpp" +#include "RAJA/pattern/reduce.hpp" // @@ -186,9 +184,8 @@ ////////////////////////////////////////////////////////////////////// // -#include "RAJA/index/IndexSetUtils.hpp" #include "RAJA/index/IndexSetBuilders.hpp" - +#include "RAJA/index/IndexSetUtils.hpp" #include "RAJA/pattern/scan.hpp" #if defined(RAJA_ENABLE_RUNTIME_PLUGINS) @@ -197,11 +194,14 @@ #include "RAJA/pattern/sort.hpp" -namespace RAJA { -namespace expt{} +namespace RAJA +{ +namespace expt +{ +} // // provide a RAJA::expt namespace for experimental work, but bring alias // // it into RAJA so it doesn't affect user code // using namespace expt; -} +} // namespace RAJA #endif // closing endif for header file include guard diff --git a/include/RAJA/index/IndexSet.hpp b/include/RAJA/index/IndexSet.hpp index 1a467c8341..2e12bdb707 100644 --- a/include/RAJA/index/IndexSet.hpp +++ b/include/RAJA/index/IndexSet.hpp @@ -19,15 +19,11 @@ #define RAJA_IndexSet_HPP #include "RAJA/config.hpp" - #include "RAJA/index/ListSegment.hpp" #include "RAJA/index/RangeSegment.hpp" - #include "RAJA/internal/Iterators.hpp" #include "RAJA/internal/RAJAVec.hpp" - #include "RAJA/policy/PolicyBase.hpp" - #include "RAJA/util/Operators.hpp" #include "RAJA/util/concepts.hpp" @@ -91,7 +87,7 @@ class TypedIndexSet : public TypedIndexSet //! Construct empty index set #if _MSC_VER < 1910 - // this one instance of constexpr does not work on VS2012 or VS2015 + // this one instance of constexpr does not work on VS2012 or VS2015 RAJA_INLINE TypedIndexSet() : PARENT() {} #else RAJA_INLINE constexpr TypedIndexSet() : PARENT() {} @@ -240,11 +236,11 @@ class TypedIndexSet : public TypedIndexSet if (pend == PUSH_BACK) { for (Index_type i = 0; i < num; ++i) { segment_push_into(i, c, pend, pcopy); - } + } } else { - for (Index_type i = num-1; i > -1; --i) { + for (Index_type i = num - 1; i > -1; --i) { segment_push_into(i, c, pend, pcopy); - } + } } } @@ -301,14 +297,18 @@ class TypedIndexSet : public TypedIndexSet template RAJA_INLINE void push_back(Tnew &&val) { - push_internal(new typename std::decay::type(std::forward(val)), PUSH_BACK, PUSH_COPY); + push_internal(new typename std::decay::type(std::forward(val)), + PUSH_BACK, + PUSH_COPY); } //! Add copy of segment to front end of index set. template RAJA_INLINE void push_front(Tnew &&val) { - push_internal(new typename std::decay::type(std::forward(val)), PUSH_FRONT, PUSH_COPY); + push_internal(new typename std::decay::type(std::forward(val)), + PUSH_FRONT, + PUSH_COPY); } //! Return total length -- sum of lengths of all segments @@ -341,7 +341,7 @@ class TypedIndexSet : public TypedIndexSet template RAJA_HOST_DEVICE void segmentCall(size_t segid, BODY &&body, - ARGS &&... args) const + ARGS &&...args) const { if (getSegmentTypes()[segid] != T0_TypeId) { PARENT::segmentCall(segid, @@ -762,12 +762,14 @@ namespace type_traits template struct is_index_set - : ::RAJA::type_traits::SpecializationOf::type> { + : ::RAJA::type_traits::SpecializationOf::type> { }; template struct is_indexset_policy - : ::RAJA::type_traits::SpecializationOf::type> { + : ::RAJA::type_traits::SpecializationOf::type> { }; } // namespace type_traits diff --git a/include/RAJA/index/IndexSetBuilders.hpp b/include/RAJA/index/IndexSetBuilders.hpp index 543524be01..6db86ce3bb 100644 --- a/include/RAJA/index/IndexSetBuilders.hpp +++ b/include/RAJA/index/IndexSetBuilders.hpp @@ -19,13 +19,10 @@ #define RAJA_IndexSetBuilders_HPP #include "RAJA/config.hpp" - #include "RAJA/index/IndexSet.hpp" #include "RAJA/index/ListSegment.hpp" #include "RAJA/index/RangeSegment.hpp" - #include "RAJA/util/types.hpp" - #include "camp/resource.hpp" namespace RAJA @@ -37,13 +34,13 @@ namespace RAJA * \brief Generate an index set with aligned Range segments and List segments, * as needed, from given array of indices. * - * Routine does no error-checking on argements and assumes + * Routine does no error-checking on argements and assumes * RAJA::Index_type array contains valid indices. * - * \param iset reference to index set generated with aligned range segments + * \param iset reference to index set generated with aligned range segments * and list segments. Method assumes index set is empty (no segments). - * \param work_res camp resource object that identifies the memory space in - * which list segment index data will live (passed to list segment + * \param work_res camp resource object that identifies the memory space in + * which list segment index data will live (passed to list segment * ctor). * \param indices_in pointer to start of input array of indices. * \param length size of input index array. @@ -79,37 +76,36 @@ void RAJASHAREDDLL_API buildIndexSetAligned( ****************************************************************************** * * \brief Generate a lock-free "block" index set (planar division) containing - * range segments. + * range segments. * - * The method chunks a fastDim x midDim x slowDim mesh into blocks that + * The method chunks a fastDim x midDim x slowDim mesh into blocks that * can be dependency-scheduled, removing need for lock constructs. * * \param iset reference to index set generated with range segments. - * Method assumes index set is empty (no segments). + * Method assumes index set is empty (no segments). * \param fastDim "fast" block dimension (see above). * \param midDim "mid" block dimension (see above). * \param slowDim "slow" block dimension (see above). * ****************************************************************************** */ -void buildLockFreeBlockIndexset( - RAJA::TypedIndexSet& iset, - int fastDim, - int midDim, - int slowDim); +void buildLockFreeBlockIndexset(RAJA::TypedIndexSet& iset, + int fastDim, + int midDim, + int slowDim); /*! ****************************************************************************** * * \brief Generate a lock-free "color" index set containing range and list * segments. - * - * TThe domain-set is colored based on connectivity to the range-set. - * All elements in each segment are independent, and no two segments + * + * TThe domain-set is colored based on connectivity to the range-set. + * All elements in each segment are independent, and no two segments * can be executed in parallel. * - * \param iset reference to index set generated. Method assumes index set - * is empty (no segments). + * \param iset reference to index set generated. Method assumes index set + * is empty (no segments). * \param work_res camp resource object that identifies the memory space in * which list segment index data will live (passed to list segment * ctor). diff --git a/include/RAJA/index/IndexSetUtils.hpp b/include/RAJA/index/IndexSetUtils.hpp index 4baea450fc..739ab818fb 100644 --- a/include/RAJA/index/IndexSetUtils.hpp +++ b/include/RAJA/index/IndexSetUtils.hpp @@ -20,9 +20,7 @@ #define RAJA_IndexSetUtils_HPP #include "RAJA/config.hpp" - #include "RAJA/pattern/forall.hpp" - #include "RAJA/policy/sequential.hpp" namespace RAJA @@ -31,10 +29,10 @@ namespace RAJA //@{ //! @name Methods to gather indices of segment or index set into a container. //! -//! For each method, the given container must be templated on a data type, -//! have default and copy ctors, push_back method, and value_type. Is is -//! assumed that the container data type and segment or index set data type -//! are compatible in the sense that the index set type can be converted to +//! For each method, the given container must be templated on a data type, +//! have default and copy ctors, push_back method, and value_type. Is is +//! assumed that the container data type and segment or index set data type +//! are compatible in the sense that the index set type can be converted to //! the container data type. /*! @@ -49,11 +47,8 @@ RAJA_INLINE void getIndices(CONTAINER_T& con, const TypedIndexSet& iset) { CONTAINER_T tcon; - forall >(iset, - [&](typename CONTAINER_T::value_type idx) { - tcon.push_back(idx); - } - ); + forall >( + iset, [&](typename CONTAINER_T::value_type idx) { tcon.push_back(idx); }); con = tcon; } @@ -68,11 +63,9 @@ template RAJA_INLINE void getIndices(CONTAINER_T& con, const SEGMENT_T& seg) { CONTAINER_T tcon; - forall(seg, - [&](typename CONTAINER_T::value_type idx) { - tcon.push_back(idx); - } - ); + forall(seg, [&](typename CONTAINER_T::value_type idx) { + tcon.push_back(idx); + }); con = tcon; } @@ -90,11 +83,10 @@ RAJA_INLINE void getIndicesConditional(CONTAINER_T& con, CONDITIONAL conditional) { CONTAINER_T tcon; - forall >(iset, - [&](typename CONTAINER_T::value_type idx) { - if (conditional(idx)) tcon.push_back(idx); - } - ); + forall >( + iset, [&](typename CONTAINER_T::value_type idx) { + if (conditional(idx)) tcon.push_back(idx); + }); con = tcon; } @@ -112,11 +104,9 @@ RAJA_INLINE void getIndicesConditional(CONTAINER_T& con, CONDITIONAL conditional) { CONTAINER_T tcon; - forall(seg, - [&](typename CONTAINER_T::value_type idx) { - if (conditional(idx)) tcon.push_back(idx); - } - ); + forall(seg, [&](typename CONTAINER_T::value_type idx) { + if (conditional(idx)) tcon.push_back(idx); + }); con = tcon; } diff --git a/include/RAJA/index/IndexValue.hpp b/include/RAJA/index/IndexValue.hpp index 44fa143445..98fffb104e 100644 --- a/include/RAJA/index/IndexValue.hpp +++ b/include/RAJA/index/IndexValue.hpp @@ -18,10 +18,9 @@ #ifndef RAJA_INDEXVALUE_HPP #define RAJA_INDEXVALUE_HPP -#include "RAJA/config.hpp" - #include +#include "RAJA/config.hpp" #include "RAJA/util/macros.hpp" #include "RAJA/util/types.hpp" @@ -334,18 +333,21 @@ constexpr RAJA_HOST_DEVICE RAJA_INLINE return val; } -namespace internal{ -template +namespace internal +{ +template struct StripIndexTypeT { - using type = FROM; + using type = FROM; }; -template -struct StripIndexTypeT::value>::type> -{ - using type = typename FROM::value_type; +template +struct StripIndexTypeT< + FROM, + typename std::enable_if< + std::is_base_of::value>::type> { + using type = typename FROM::value_type; }; -} // namespace internal +} // namespace internal /*! * \brief Strips a strongly typed index to its underlying type @@ -353,7 +355,7 @@ struct StripIndexTypeT +template using strip_index_type_t = typename internal::StripIndexTypeT::type; /*! @@ -362,12 +364,11 @@ using strip_index_type_t = typename internal::StripIndexTypeT::type; * * \param FROM the original type */ -template -using make_signed_t = typename std::conditional < - std::is_floating_point::value, - std::common_type, - std::make_signed - >::type::type; +template +using make_signed_t = + typename std::conditional::value, + std::common_type, + std::make_signed >::type::type; } // namespace RAJA @@ -397,17 +398,19 @@ using make_signed_t = typename std::conditional < * \param IDXT the index types value type * \param NAME a string literal to identify this index type */ -#define RAJA_INDEX_VALUE_T(TYPE, IDXT, NAME) \ - class TYPE : public ::RAJA::IndexValue \ - { \ - public: \ - RAJA_HOST_DEVICE RAJA_INLINE TYPE() \ - : RAJA::IndexValue::IndexValue() {} \ - RAJA_HOST_DEVICE RAJA_INLINE explicit TYPE(IDXT v) \ - : RAJA::IndexValue::IndexValue(v) \ - { \ - } \ - static inline std::string getName() { return NAME; } \ +#define RAJA_INDEX_VALUE_T(TYPE, IDXT, NAME) \ + class TYPE : public ::RAJA::IndexValue \ + { \ + public: \ + RAJA_HOST_DEVICE RAJA_INLINE TYPE() \ + : RAJA::IndexValue::IndexValue() \ + { \ + } \ + RAJA_HOST_DEVICE RAJA_INLINE explicit TYPE(IDXT v) \ + : RAJA::IndexValue::IndexValue(v) \ + { \ + } \ + static inline std::string getName() { return NAME; } \ }; #endif diff --git a/include/RAJA/index/ListSegment.hpp b/include/RAJA/index/ListSegment.hpp index adee46053c..4eacc1a55f 100644 --- a/include/RAJA/index/ListSegment.hpp +++ b/include/RAJA/index/ListSegment.hpp @@ -18,18 +18,16 @@ #ifndef RAJA_ListSegment_HPP #define RAJA_ListSegment_HPP -#include "RAJA/config.hpp" - #include #include #include -#include "camp/resource.hpp" - +#include "RAJA/config.hpp" +#include "RAJA/util/Span.hpp" #include "RAJA/util/concepts.hpp" #include "RAJA/util/macros.hpp" -#include "RAJA/util/Span.hpp" #include "RAJA/util/types.hpp" +#include "camp/resource.hpp" namespace RAJA { @@ -85,7 +83,6 @@ template class TypedListSegment { public: - //@{ //! @name Types used in implementation based on template parameter. @@ -111,7 +108,7 @@ class TypedListSegment * \param values array of indices defining iteration space of segment * \param length number of indices * \param resource camp resource defining memory space where index data live - * \param owned optional enum value indicating whether segment owns indices + * \param owned optional enum value indicating whether segment owns indices * (Owned or Unowned). Default is Owned. * * If 'Unowned' is passed as last argument, the segment will not own its @@ -121,7 +118,7 @@ class TypedListSegment Index_type length, camp::resources::Resource resource, IndexOwnership owned = Owned) - : m_resource(nullptr), m_owned(Unowned), m_data(nullptr), m_size(0) + : m_resource(nullptr), m_owned(Unowned), m_data(nullptr), m_size(0) { initIndexData(values, length, resource, owned); } @@ -141,7 +138,10 @@ class TypedListSegment template TypedListSegment(const Container& container, camp::resources::Resource resource) - : m_resource(nullptr), m_owned(Unowned), m_data(nullptr), m_size(container.size()) + : m_resource(nullptr), + m_owned(Unowned), + m_data(nullptr), + m_size(container.size()) { if (m_size > 0) { @@ -164,7 +164,6 @@ class TypedListSegment m_owned = Owned; host_res.deallocate(tmp); - } } @@ -175,8 +174,10 @@ class TypedListSegment // As this may be called from a lambda in a // RAJA method we perform a shallow copy RAJA_HOST_DEVICE TypedListSegment(const TypedListSegment& other) - : m_resource(nullptr), - m_owned(Unowned), m_data(other.m_data), m_size(other.m_size) + : m_resource(nullptr), + m_owned(Unowned), + m_data(other.m_data), + m_size(other.m_size) { } @@ -192,7 +193,7 @@ class TypedListSegment m_size = other.m_size; } - //! move assignment for list segment + //! move assignment for list segment // As this may be called from a lambda in a // RAJA method we perform a shallow copy RAJA_HOST_DEVICE TypedListSegment& operator=(TypedListSegment&& rhs) @@ -211,8 +212,10 @@ class TypedListSegment //! Move constructor for list segment RAJA_HOST_DEVICE TypedListSegment(TypedListSegment&& rhs) - : m_resource(rhs.m_resource), - m_owned(rhs.m_owned), m_data(rhs.m_data), m_size(rhs.m_size) + : m_resource(rhs.m_resource), + m_owned(rhs.m_owned), + m_data(rhs.m_data), + m_size(rhs.m_size) { rhs.m_owned = Unowned; rhs.m_resource = nullptr; @@ -221,10 +224,7 @@ class TypedListSegment } //! List segment destructor - RAJA_HOST_DEVICE ~TypedListSegment() - { - clear(); - } + RAJA_HOST_DEVICE ~TypedListSegment() { clear(); } //! Clear method to be called RAJA_HOST_DEVICE void clear() @@ -357,20 +357,20 @@ class TypedListSegment m_owned = container_own; if (m_owned == Owned) { - m_resource = new camp::resources::Resource(resource_); + m_resource = new camp::resources::Resource(resource_); - camp::resources::Resource host_res{camp::resources::Host()}; + camp::resources::Resource host_res{camp::resources::Host()}; - value_type* tmp = host_res.allocate(m_size); + value_type* tmp = host_res.allocate(m_size); - for (Index_type i = 0; i < m_size; ++i) { - tmp[i] = container[i]; - } + for (Index_type i = 0; i < m_size; ++i) { + tmp[i] = container[i]; + } - m_data = m_resource->allocate(m_size); - m_resource->memcpy(m_data, tmp, sizeof(value_type) * m_size); + m_data = m_resource->allocate(m_size); + m_resource->memcpy(m_data, tmp, sizeof(value_type) * m_size); - host_res.deallocate(tmp); + host_res.deallocate(tmp); return; } @@ -382,7 +382,7 @@ class TypedListSegment // Copy of camp resource passed to ctor - camp::resources::Resource *m_resource; + camp::resources::Resource* m_resource; // Ownership flag to guide data copying/management IndexOwnership m_owned; diff --git a/include/RAJA/index/RangeSegment.hpp b/include/RAJA/index/RangeSegment.hpp index a41959c583..6da88b570a 100644 --- a/include/RAJA/index/RangeSegment.hpp +++ b/include/RAJA/index/RangeSegment.hpp @@ -18,16 +18,13 @@ #ifndef RAJA_RangeSegment_HPP #define RAJA_RangeSegment_HPP -#include "RAJA/config.hpp" - #include +#include "RAJA/config.hpp" +#include "RAJA/index/IndexValue.hpp" #include "RAJA/internal/Iterators.hpp" - #include "RAJA/util/concepts.hpp" -#include "RAJA/index/IndexValue.hpp" - namespace RAJA { @@ -50,10 +47,10 @@ namespace RAJA * * NOTE: TypedRangeSegment::iterator is a RandomAccessIterator * - * NOTE: TypedRangeSegment supports negative indices; e.g., an interval of + * NOTE: TypedRangeSegment supports negative indices; e.g., an interval of * indices [-5, 3). * - * NOTE: Proper handling of indices strides requires that StorageT is a + * NOTE: Proper handling of indices strides requires that StorageT is a * signed type. * * Usage: @@ -92,15 +89,18 @@ namespace RAJA * ****************************************************************************** */ -template >> +template >> struct TypedRangeSegment { - // + // // Static asserts to provide some useful error messages during compilation // for incorrect usage. - // - static_assert(std::is_signed::value, "TypedRangeSegment DiffT requires signed type."); - static_assert(!std::is_floating_point::value, "TypedRangeSegment Type must be non floating point."); + // + static_assert(std::is_signed::value, + "TypedRangeSegment DiffT requires signed type."); + static_assert(!std::is_floating_point::value, + "TypedRangeSegment Type must be non floating point."); //@{ //! @name Types used in implementation based on template parameters. @@ -117,18 +117,18 @@ struct TypedRangeSegment { //@} //@{ - //! @name Constructors, destructor, and copy assignment. + //! @name Constructors, destructor, and copy assignment. /*! * \brief Construct a range segment repreenting the interval [begin, end) - * + * * \param begin start value (inclusive) for the range * \param end end value (exclusive) for the range */ using StripStorageT = strip_index_type_t; - RAJA_HOST_DEVICE constexpr TypedRangeSegment(StripStorageT begin, StripStorageT end) - : m_begin(iterator(begin)), - m_end(begin > end ? m_begin : iterator(end)) + RAJA_HOST_DEVICE constexpr TypedRangeSegment(StripStorageT begin, + StripStorageT end) + : m_begin(iterator(begin)), m_end(begin > end ? m_begin : iterator(end)) { } @@ -187,7 +187,7 @@ struct TypedRangeSegment { * \brief Compare this segment to another for inequality * * \return true if begin or end does not match, else false - */ + */ RAJA_HOST_DEVICE RAJA_INLINE bool operator!=(TypedRangeSegment const& o) const { return !(operator==(o)); @@ -198,9 +198,9 @@ struct TypedRangeSegment { /*! * \brief Get a new TypedRangeSegment instance representing a slice of * existing segment - * - * \param begin start iterate of new range - * \param length maximum length of new range + * + * \param begin start iterate of new range + * \param length maximum length of new range * \return TypedRangeSegment representing the interval * [ *begin() + begin, min( *begin() + begin + length, *end() ) ) * @@ -213,7 +213,7 @@ struct TypedRangeSegment { * auto r = RAJA::TypedRangeSegment(-4, 4); * * // s repreents the subinterval [-3, 2) - * auto s = r.slice(1, 5); + * auto s = r.slice(1, 5); * * \endverbatim */ @@ -247,8 +247,8 @@ struct TypedRangeSegment { /*! ****************************************************************************** * - * \class TypedRangeStrideSegment - * + * \class TypedRangeStrideSegment + * * \brief Segment class representing a strided range of typed indices * * \tparam StorageT underlying data type for the segment indices (required) @@ -264,9 +264,9 @@ struct TypedRangeSegment { * * NOTE: TypedRangeStrideSegment::iterator is a RandomAccessIterator * - * NOTE: TypedRangeStrideSegment allows for positive or negative strides and - * indices. This allows for forward (stride > 0) or backward (stride < 0) - * traversal of the iteration space. A stride of zero is undefined and + * NOTE: TypedRangeStrideSegment allows for positive or negative strides and + * indices. This allows for forward (stride > 0) or backward (stride < 0) + * traversal of the iteration space. A stride of zero is undefined and * will cause divide-by-zero errors. * * As with RangeSegment, the iteration space is inclusive of begin() and @@ -275,7 +275,7 @@ struct TypedRangeSegment { * For positive strides, begin() > end() implies size()==0 * For negative strides, begin() < end() implies size()==0 * - * NOTE: Proper handling of negative strides and indices requires that + * NOTE: Proper handling of negative strides and indices requires that * StorageT is a signed type. * * Usage: @@ -321,15 +321,18 @@ struct TypedRangeSegment { * ****************************************************************************** */ -template >> +template >> struct TypedRangeStrideSegment { // // Static asserts to provide some useful error messages during compilation // for incorrect usage. // - static_assert(std::is_signed::value, "TypedRangeStrideSegment DiffT requires signed type."); - static_assert(!std::is_floating_point::value, "TypedRangeStrideSegment Type must be non floating point."); + static_assert(std::is_signed::value, + "TypedRangeStrideSegment DiffT requires signed type."); + static_assert(!std::is_floating_point::value, + "TypedRangeStrideSegment Type must be non floating point."); //@{ //! @name Types used in implementation based on template parameters. @@ -349,7 +352,7 @@ struct TypedRangeStrideSegment { //! @name Constructors, destructor, and copy assignment. /*! - * \brief Construct a range segment for the interval [begin, end) with + * \brief Construct a range segment for the interval [begin, end) with * given stride * * \param begin start value (inclusive) for the range @@ -408,8 +411,8 @@ struct TypedRangeStrideSegment { /*! * \brief Get size of this segment - * - * The size is the number of iterates in the + * + * The size is the number of iterates in the * interval [begin, end) when striding over it */ RAJA_HOST_DEVICE DiffT size() const { return m_size; } @@ -435,7 +438,8 @@ struct TypedRangeStrideSegment { * * \return true if begin, end, or size does not match, else false */ - RAJA_HOST_DEVICE RAJA_INLINE bool operator!=(TypedRangeStrideSegment const& o) const + RAJA_HOST_DEVICE RAJA_INLINE bool operator!=( + TypedRangeStrideSegment const& o) const { return !(operator==(o)); } @@ -450,7 +454,7 @@ struct TypedRangeStrideSegment { * \param length maximum length of new range * * \return TypedRangeStrideSegment representing the interval - * [ *begin() + begin * stride, + * [ *begin() + begin * stride, * min( *begin() + (begin + length) * stride, *end() ) * * Here's an example of a slice operation on a range segment with a negative @@ -466,7 +470,7 @@ struct TypedRangeStrideSegment { * // 5 indices in r starting at the 6th entry * auto s = r.slice(6, 6); * - * \endverbatim + * \endverbatim */ RAJA_HOST_DEVICE TypedRangeStrideSegment slice(StorageT begin, DiffT length) const @@ -549,7 +553,7 @@ RAJA_HOST_DEVICE TypedRangeSegment make_range(BeginT&& begin, } /*! - * \brief Function to make a TypedRangeStride Segment for the interval + * \brief Function to make a TypedRangeStride Segment for the interval * [begin, end) with given stride * * \return a newly constructed TypedRangeStrideSegment where @@ -566,8 +570,11 @@ RAJA_HOST_DEVICE TypedRangeStrideSegment make_strided_range( EndT&& end, StrideT&& stride) { - static_assert(std::is_signed::value, "make_strided_segment : stride must be signed."); - static_assert(std::is_same, StrideT>::value, "make_stride_segment : stride and end must be of similar types."); + static_assert(std::is_signed::value, + "make_strided_segment : stride must be signed."); + static_assert(std::is_same, StrideT>::value, + "make_stride_segment : stride and end must be of similar " + "types."); return {begin, end, stride}; } diff --git a/include/RAJA/internal/DepGraphNode.hpp b/include/RAJA/internal/DepGraphNode.hpp index 8feceae22f..3b0fd8b016 100644 --- a/include/RAJA/internal/DepGraphNode.hpp +++ b/include/RAJA/internal/DepGraphNode.hpp @@ -19,13 +19,12 @@ #ifndef RAJA_DepGraphNode_HPP #define RAJA_DepGraphNode_HPP -#include "RAJA/config.hpp" - #include #include #include #include +#include "RAJA/config.hpp" #include "RAJA/util/types.hpp" namespace RAJA diff --git a/include/RAJA/internal/Iterators.hpp b/include/RAJA/internal/Iterators.hpp index 6f32a56e6d..c127c96def 100644 --- a/include/RAJA/internal/Iterators.hpp +++ b/include/RAJA/internal/Iterators.hpp @@ -287,10 +287,14 @@ class strided_numeric_iterator using iterator_category = std::random_access_iterator_tag; constexpr strided_numeric_iterator() noexcept = default; - constexpr strided_numeric_iterator(const strided_numeric_iterator&) noexcept = default; - constexpr strided_numeric_iterator(strided_numeric_iterator&&) noexcept = default; - strided_numeric_iterator& operator=(const strided_numeric_iterator&) noexcept = default; - strided_numeric_iterator& operator=(strided_numeric_iterator&&) noexcept = default; + constexpr strided_numeric_iterator(const strided_numeric_iterator&) noexcept = + default; + constexpr strided_numeric_iterator(strided_numeric_iterator&&) noexcept = + default; + strided_numeric_iterator& operator=( + const strided_numeric_iterator&) noexcept = default; + strided_numeric_iterator& operator=(strided_numeric_iterator&&) noexcept = + default; RAJA_HOST_DEVICE constexpr strided_numeric_iterator( stripped_value_type rhs, diff --git a/include/RAJA/internal/MemUtils_CPU.hpp b/include/RAJA/internal/MemUtils_CPU.hpp index 55015f9ab7..19df756191 100644 --- a/include/RAJA/internal/MemUtils_CPU.hpp +++ b/include/RAJA/internal/MemUtils_CPU.hpp @@ -19,12 +19,11 @@ #ifndef RAJA_MemUtils_CPU_HPP #define RAJA_MemUtils_CPU_HPP -#include "RAJA/config.hpp" - #include #include #include +#include "RAJA/config.hpp" #include "RAJA/util/types.hpp" #if defined(_WIN32) || defined(WIN32) || defined(__CYGWIN__) || \ @@ -95,27 +94,22 @@ inline void free_aligned(void* ptr) /// /// Deleter function object for memory allocated with allocate_aligned /// -struct FreeAligned -{ - void operator()(void* ptr) - { - free_aligned(ptr); - } +struct FreeAligned { + void operator()(void* ptr) { free_aligned(ptr); } }; /// /// Deleter function object for memory allocated with allocate_aligned_type /// that calls the destructor for the fist size objects in the storage. /// -template < typename T, typename index_type > -struct FreeAlignedType : FreeAligned -{ +template +struct FreeAlignedType : FreeAligned { index_type size = 0; void operator()(T* ptr) { - for ( index_type i = size; i > 0; --i ) { - ptr[i-1].~T(); + for (index_type i = size; i > 0; --i) { + ptr[i - 1].~T(); } FreeAligned::operator()(ptr); } diff --git a/include/RAJA/internal/RAJAVec.hpp b/include/RAJA/internal/RAJAVec.hpp index 1d0ec0cbeb..a97ceb5b7b 100644 --- a/include/RAJA/internal/RAJAVec.hpp +++ b/include/RAJA/internal/RAJAVec.hpp @@ -19,13 +19,12 @@ #ifndef RAJAVec_HPP #define RAJAVec_HPP -#include "RAJA/config.hpp" - #include #include #include #include +#include "RAJA/config.hpp" #include "RAJA/internal/MemUtils_CPU.hpp" namespace RAJA @@ -57,8 +56,9 @@ class RAJAVec typename allocator_traits_type::propagate_on_container_copy_assignment; using propagate_on_container_move_assignment = typename allocator_traits_type::propagate_on_container_move_assignment; - using propagate_on_container_swap = + using propagate_on_container_swap = typename allocator_traits_type::propagate_on_container_swap; + public: using value_type = T; using allocator_type = Allocator; @@ -86,7 +86,9 @@ class RAJAVec /// RAJAVec(const RAJAVec& other) : m_data(nullptr), - m_allocator(allocator_traits_type::select_on_container_copy_construction(other.m_allocator)), + m_allocator( + allocator_traits_type::select_on_container_copy_construction( + other.m_allocator)), m_capacity(0), m_size(0) { @@ -125,7 +127,8 @@ class RAJAVec RAJAVec& operator=(RAJAVec&& rhs) { if (&rhs != this) { - move_assign_private(std::move(rhs), propagate_on_container_move_assignment{}); + move_assign_private(std::move(rhs), + propagate_on_container_move_assignment{}); } return *this; } @@ -150,25 +153,25 @@ class RAJAVec /// /// Get a pointer to the beginning of the contiguous vector /// - pointer data() { return m_data; } + pointer data() { return m_data; } /// const_pointer data() const { return m_data; } /// /// Get an iterator to the end. /// - iterator end() { return m_data + m_size; } + iterator end() { return m_data + m_size; } /// - const_iterator end() const { return m_data + m_size; } + const_iterator end() const { return m_data + m_size; } /// const_iterator cend() const { return m_data + m_size; } /// /// Get an iterator to the beginning. /// - iterator begin() { return m_data; } + iterator begin() { return m_data; } /// - const_iterator begin() const { return m_data; } + const_iterator begin() const { return m_data; } /// const_iterator cbegin() const { return m_data; } @@ -200,18 +203,12 @@ class RAJAVec /// /// Shrink the capacity of the vector to the current size. /// - void shrink_to_fit() - { - shrink_cap(m_size); - } + void shrink_to_fit() { shrink_cap(m_size); } /// /// Empty vector of all data. /// - void clear() - { - destroy_items_after(0); - } + void clear() { destroy_items_after(0); } /// /// Change the size of the vector, @@ -248,23 +245,23 @@ class RAJAVec /// /// Bracket operator accessor. /// - reference operator[](difference_type i) { return m_data[i]; } + reference operator[](difference_type i) { return m_data[i]; } /// const_reference operator[](difference_type i) const { return m_data[i]; } /// /// Access the last item of the vector. /// - reference front() { return m_data[0]; } + reference front() { return m_data[0]; } /// const_reference front() const { return m_data[0]; } /// /// Access the last item of the vector. /// - reference back() { return m_data[m_size-1]; } + reference back() { return m_data[m_size - 1]; } /// - const_reference back() const { return m_data[m_size-1]; } + const_reference back() const { return m_data[m_size - 1]; } /// /// Add item to front end of vector. Note that this operation is unique to @@ -272,28 +269,31 @@ class RAJAVec /// void push_front(const_reference item) { emplace_front_private(item); } /// - void push_front( value_type&& item) { emplace_front_private(std::move(item)); } + void push_front(value_type&& item) { emplace_front_private(std::move(item)); } /// - template < typename ... Os > - void emplace_front(Os&&... os) { emplace_front_private(std::forward(os)...); } + template + void emplace_front(Os&&... os) + { + emplace_front_private(std::forward(os)...); + } /// /// Add item to back end of vector. /// void push_back(const_reference item) { emplace_back_private(item); } /// - void push_back( value_type&& item) { emplace_back_private(std::move(item)); } + void push_back(value_type&& item) { emplace_back_private(std::move(item)); } /// - template < typename ... Os > - void emplace_back(Os&&... os) { emplace_back_private(std::forward(os)...); } + template + void emplace_back(Os&&... os) + { + emplace_back_private(std::forward(os)...); + } /// /// Remove the last item of the vector. /// - void pop_back() - { - destroy_items_after(m_size-1); - } + void pop_back() { destroy_items_after(m_size - 1); } private: pointer m_data; @@ -386,10 +386,10 @@ class RAJAVec void swap_private(RAJAVec& other, std::true_type) { using std::swap; - swap(m_data, other.m_data); + swap(m_data, other.m_data); swap(m_allocator, other.m_allocator); - swap(m_capacity, other.m_capacity); - swap(m_size, other.m_size); + swap(m_capacity, other.m_capacity); + swap(m_size, other.m_size); } /// @@ -398,9 +398,9 @@ class RAJAVec void swap_private(RAJAVec& other, std::false_type) { using std::swap; - swap(m_data, other.m_data); - swap(m_capacity, other.m_capacity); - swap(m_size, other.m_size); + swap(m_data, other.m_data); + swap(m_capacity, other.m_capacity); + swap(m_size, other.m_size); } // @@ -426,11 +426,13 @@ class RAJAVec // // Construct items [m_size, new_size) from args. // - template < typename ... Os > + template void construct_items_back(size_type new_size, Os&&... os) { for (; m_size < new_size; ++m_size) { - allocator_traits_type::construct(m_allocator, m_data+m_size, std::forward(os)...); + allocator_traits_type::construct(m_allocator, + m_data + m_size, + std::forward(os)...); } } @@ -440,7 +442,9 @@ class RAJAVec void copy_construct_items_back(size_type new_size, const_pointer o_data) { for (; m_size < new_size; ++m_size) { - allocator_traits_type::construct(m_allocator, m_data+m_size, o_data[m_size]); + allocator_traits_type::construct(m_allocator, + m_data + m_size, + o_data[m_size]); } } @@ -450,7 +454,9 @@ class RAJAVec void move_construct_items_back(size_type new_size, pointer o_data) { for (; m_size < new_size; ++m_size) { - allocator_traits_type::construct(m_allocator, m_data+m_size, std::move(o_data[m_size])); + allocator_traits_type::construct(m_allocator, + m_data + m_size, + std::move(o_data[m_size])); } } @@ -460,38 +466,44 @@ class RAJAVec void destroy_items_after(size_type new_end) { for (; m_size > new_end; --m_size) { - allocator_traits_type::destroy(m_allocator, m_data+m_size-1); + allocator_traits_type::destroy(m_allocator, m_data + m_size - 1); } } // // Add an item to the front, shifting all existing items back one. // - template < typename ... Os > + template void emplace_front_private(Os&&... os) { reserve(m_size + 1); if (m_size > 0) { size_type i = m_size; - allocator_traits_type::construct(m_allocator, m_data+i, std::move(m_data[i - 1])); + allocator_traits_type::construct(m_allocator, + m_data + i, + std::move(m_data[i - 1])); for (--i; i > 0; --i) { m_data[i] = std::move(m_data[i - 1]); } allocator_traits_type::destroy(m_allocator, m_data); } - allocator_traits_type::construct(m_allocator, m_data, std::forward(os)...); + allocator_traits_type::construct(m_allocator, + m_data, + std::forward(os)...); m_size++; } // // Add an item to the back. // - template < typename ... Os > + template void emplace_back_private(Os&&... os) { reserve(m_size + 1); - allocator_traits_type::construct(m_allocator, m_data+m_size, std::forward(os)...); + allocator_traits_type::construct(m_allocator, + m_data + m_size, + std::forward(os)...); m_size++; } @@ -548,8 +560,10 @@ class RAJAVec if (m_data) { for (size_type i = 0; i < m_size; ++i) { - allocator_traits_type::construct(m_allocator, tdata+i, std::move(m_data[i])); - allocator_traits_type::destroy(m_allocator, m_data+i); + allocator_traits_type::construct(m_allocator, + tdata + i, + std::move(m_data[i])); + allocator_traits_type::destroy(m_allocator, m_data + i); } allocator_traits_type::deallocate(m_allocator, m_data, m_capacity); } diff --git a/include/RAJA/internal/fault_tolerance.hpp b/include/RAJA/internal/fault_tolerance.hpp index cf3a86cede..baa4941b38 100644 --- a/include/RAJA/internal/fault_tolerance.hpp +++ b/include/RAJA/internal/fault_tolerance.hpp @@ -35,6 +35,7 @@ #if defined(RAJA_REPORT_FT) #include + #include "cycle.h" #define RAJA_FT_BEGIN \ @@ -80,17 +81,20 @@ do { \ repeat = false; -#define RAJA_FT_END \ - if (fault_type > 0) { \ - /* invalidate cache */ \ - repeat = true; \ - fault_type = 0; \ - } \ - } \ - while (repeat == true) \ - ; \ - } \ - else { fault_type = 0; /* ignore for the simulation */ } +#define RAJA_FT_END \ + if (fault_type > 0) { \ + /* invalidate cache */ \ + repeat = true; \ + fault_type = 0; \ + } \ + } \ + while (repeat == true) \ + ; \ + } \ + else \ + { \ + fault_type = 0; /* ignore for the simulation */ \ + } #endif // RAJA_REPORT_FT diff --git a/include/RAJA/internal/foldl.hpp b/include/RAJA/internal/foldl.hpp index af65c05392..02a5fa3a2b 100644 --- a/include/RAJA/internal/foldl.hpp +++ b/include/RAJA/internal/foldl.hpp @@ -18,17 +18,15 @@ #ifndef RAJA_foldl_HPP #define RAJA_foldl_HPP -#include "RAJA/config.hpp" - #include #include #include #include #include -#include "camp/camp.hpp" - +#include "RAJA/config.hpp" #include "RAJA/util/macros.hpp" +#include "camp/camp.hpp" namespace RAJA @@ -61,11 +59,13 @@ template struct foldl_impl { - using Ret = typename foldl_impl< - Op, - typename std::invoke_result::type, - Arg3>::type, - Rest...>::Ret; + using Ret = + typename foldl_impl::type, + Arg3>::type, + Rest...>::Ret; }; #else @@ -90,7 +90,7 @@ struct foldl_impl { #endif -} // namespace detail +} // namespace detail template RAJA_HOST_DEVICE RAJA_INLINE constexpr auto foldl( diff --git a/include/RAJA/internal/get_platform.hpp b/include/RAJA/internal/get_platform.hpp index 0354d04bfd..ccf233c72a 100644 --- a/include/RAJA/internal/get_platform.hpp +++ b/include/RAJA/internal/get_platform.hpp @@ -1,22 +1,24 @@ #ifndef RAJA_get_platform_HPP #define RAJA_get_platform_HPP -#include "RAJA/util/Operators.hpp" #include "RAJA/internal/foldl.hpp" #include "RAJA/pattern/kernel/internal.hpp" +#include "RAJA/util/Operators.hpp" namespace RAJA { -namespace policy { -namespace multi { +namespace policy +{ +namespace multi +{ template class MultiPolicy; } -} +} // namespace policy -namespace detail +namespace detail { struct max_platform { @@ -66,11 +68,11 @@ struct get_platform_from_list<> { * (not for MultiPolicy or nested::Policy) */ template -struct get_platform::value - && !RAJA::type_traits::is_indexset_policy:: - value>::type> { +struct get_platform< + T, + typename std::enable_if< + std::is_base_of::value && + !RAJA::type_traits::is_indexset_policy::value>::type> { static constexpr Platform value = T::platform; }; @@ -124,7 +126,7 @@ struct get_platform> { static constexpr Platform value = Platform::undefined; }; -} // closing brace for detail namespace -} // closing brace for RAJA namespace +} // namespace detail +} // namespace RAJA -#endif // RAJA_get_platform_HPP +#endif // RAJA_get_platform_HPP diff --git a/include/RAJA/pattern/WorkGroup.hpp b/include/RAJA/pattern/WorkGroup.hpp index 767821b8d8..dd1fff40c6 100644 --- a/include/RAJA/pattern/WorkGroup.hpp +++ b/include/RAJA/pattern/WorkGroup.hpp @@ -19,11 +19,9 @@ #define RAJA_PATTERN_WorkGroup_HPP #include "RAJA/config.hpp" - -#include "RAJA/pattern/WorkGroup/WorkStorage.hpp" -#include "RAJA/pattern/WorkGroup/WorkRunner.hpp" - #include "RAJA/internal/get_platform.hpp" +#include "RAJA/pattern/WorkGroup/WorkRunner.hpp" +#include "RAJA/pattern/WorkGroup/WorkStorage.hpp" #include "RAJA/util/plugins.hpp" namespace RAJA @@ -38,38 +36,42 @@ namespace RAJA * * \verbatim - WorkPool, Allocator> pool(allocator); + WorkPool, Allocator> + pool(allocator); pool.enqueue(..., [=] (Index_type i, int* xarg0, int xarg1) { xarg0[i] = xarg1; }); - WorkGroup, Allocator> group = pool.instantiate(); + WorkGroup, Allocator> group = + pool.instantiate(); int* xarg0 = ...; int xarg1 = ...; - WorkSite, Allocator> site = group.run(xarg0, xarg1); + WorkSite, Allocator> site = + group.run(xarg0, xarg1); * \endverbatim * ****************************************************************************** */ -template < typename ... Args > +template using xargs = camp::list; -namespace detail { +namespace detail +{ -template < typename T > +template struct is_xargs { static constexpr bool value = false; }; -template < typename ... Args > +template struct is_xargs> { static constexpr bool value = true; }; -} +} // namespace detail // @@ -102,7 +104,8 @@ struct is_xargs> { data[i] = 1; }); - WorkGroup, Allocator> group = pool.instantiate(); + WorkGroup, Allocator> group = + pool.instantiate(); * \endverbatim * @@ -113,10 +116,11 @@ template struct WorkPool { - static_assert(RAJA::pattern_is::value, + static_assert( + RAJA::pattern_is::value, "WorkPool: WORKGROUP_POLICY_T must be a workgroup policy"); static_assert(detail::is_xargs::value, - "WorkPool: EXTRA_ARGS_T must be a RAJA::xargs<...> type"); + "WorkPool: EXTRA_ARGS_T must be a RAJA::xargs<...> type"); }; /*! @@ -135,9 +139,11 @@ struct WorkPool { * * \verbatim - WorkGroup, Allocator> group = pool.instantiate(); + WorkGroup, Allocator> group = + pool.instantiate(); - WorkSite, Allocator> site = group.run(); + WorkSite, Allocator> site = + group.run(); * \endverbatim * @@ -148,10 +154,11 @@ template struct WorkGroup { - static_assert(RAJA::pattern_is::value, + static_assert( + RAJA::pattern_is::value, "WorkGroup: WORKGROUP_POLICY_T must be a workgroup policy"); static_assert(detail::is_xargs::value, - "WorkGroup: EXTRA_ARGS_T must be a RAJA::xargs<...> type"); + "WorkGroup: EXTRA_ARGS_T must be a RAJA::xargs<...> type"); }; /*! @@ -170,7 +177,8 @@ struct WorkGroup { * * \verbatim - WorkSite, Allocator> site = group.run(); + WorkSite, Allocator> site = + group.run(); site.synchronize(); @@ -183,10 +191,11 @@ template struct WorkSite { - static_assert(RAJA::pattern_is::value, + static_assert( + RAJA::pattern_is::value, "WorkSite: WORKGROUP_POLICY_T must be a workgroup policy"); static_assert(detail::is_xargs::value, - "WorkSite: EXTRA_ARGS_T must be a RAJA::xargs<...> type"); + "WorkSite: EXTRA_ARGS_T must be a RAJA::xargs<...> type"); }; @@ -195,7 +204,7 @@ template struct WorkPool, INDEX_T, xargs, - ALLOCATOR_T> -{ + ALLOCATOR_T> { using exec_policy = EXEC_POLICY_T; using order_policy = ORDER_POLICY_T; using storage_policy = STORAGE_POLICY_T; using dispatch_policy = DISPATCH_POLICY_T; - using policy = WorkGroupPolicy; + using policy = WorkGroupPolicy; using index_type = INDEX_T; using xarg_type = xargs; using Allocator = ALLOCATOR_T; @@ -218,10 +229,16 @@ struct WorkPool; private: - using workrunner_type = detail::WorkRunner< - exec_policy, order_policy, dispatch_policy, Allocator, index_type, Args...>; - using storage_type = detail::WorkStorage< - storage_policy, Allocator, typename workrunner_type::dispatcher_type>; + using workrunner_type = detail::WorkRunner; + using storage_type = + detail::WorkStorage; friend workgroup_type; friend worksite_type; @@ -229,9 +246,7 @@ struct WorkPool + template inline void enqueue(segment_T&& seg, loop_T&& loop_body) { { // ignore zero length loops - using std::begin; using std::end; + using std::begin; + using std::end; if (begin(seg) == end(seg)) return; } if (m_storage.begin() == m_storage.end()) { @@ -273,8 +283,7 @@ struct WorkPool(seg), std::move(body)); + m_runner.enqueue(m_storage, std::forward(seg), std::move(body)); util::callPostCapturePlugins(context); } @@ -289,10 +298,7 @@ struct WorkPool struct WorkGroup, INDEX_T, xargs, - ALLOCATOR_T> -{ + ALLOCATOR_T> { using exec_policy = EXEC_POLICY_T; using order_policy = ORDER_POLICY_T; using storage_policy = STORAGE_POLICY_T; using dispatch_policy = DISPATCH_POLICY_T; - using policy = WorkGroupPolicy; + using policy = WorkGroupPolicy; using index_type = INDEX_T; using xarg_type = xargs; using Allocator = ALLOCATOR_T; @@ -347,7 +355,8 @@ struct WorkGroup struct WorkSite, INDEX_T, xargs, - ALLOCATOR_T> -{ + ALLOCATOR_T> { using exec_policy = EXEC_POLICY_T; using order_policy = ORDER_POLICY_T; using storage_policy = STORAGE_POLICY_T; using dispatch_policy = DISPATCH_POLICY_T; - using policy = WorkGroupPolicy; + using policy = WorkGroupPolicy; using index_type = INDEX_T; using xarg_type = xargs; using Allocator = ALLOCATOR_T; @@ -418,10 +426,7 @@ struct WorkSite -inline -typename WorkPool< - WorkGroupPolicy, - INDEX_T, - xargs, - ALLOCATOR_T>::workgroup_type -WorkPool< - WorkGroupPolicy, - INDEX_T, - xargs, - ALLOCATOR_T>::instantiate() +inline typename WorkPool, + INDEX_T, + xargs, + ALLOCATOR_T>::workgroup_type +WorkPool, + INDEX_T, + xargs, + ALLOCATOR_T>::instantiate() { // update max sizes to auto-reserve on reuse m_max_num_loops = std::max(m_storage.size(), m_max_num_loops); @@ -477,30 +482,37 @@ template -inline -typename WorkGroup< - WorkGroupPolicy, - INDEX_T, - xargs, - ALLOCATOR_T>::worksite_type +inline typename WorkGroup, + INDEX_T, + xargs, + ALLOCATOR_T>::worksite_type WorkGroup< - WorkGroupPolicy, + WorkGroupPolicy, INDEX_T, xargs, - ALLOCATOR_T>::run(typename WorkGroup< - WorkGroupPolicy, - INDEX_T, - xargs, - ALLOCATOR_T>::resource_type r, + ALLOCATOR_T>::run(typename WorkGroup, + INDEX_T, + xargs, + ALLOCATOR_T>::resource_type r, Args... args) { util::PluginContext context{util::make_context()}; util::callPreLaunchPlugins(context); // move any per run storage into worksite - worksite_type site(r, m_runner.run(m_storage, r, std::forward(args)...)); + worksite_type site(r, + m_runner.run(m_storage, r, std::forward(args)...)); util::callPostLaunchPlugins(context); diff --git a/include/RAJA/pattern/WorkGroup/Dispatcher.hpp b/include/RAJA/pattern/WorkGroup/Dispatcher.hpp index 1eac283f4b..67ed9bccb3 100644 --- a/include/RAJA/pattern/WorkGroup/Dispatcher.hpp +++ b/include/RAJA/pattern/WorkGroup/Dispatcher.hpp @@ -19,15 +19,13 @@ #define RAJA_PATTERN_WORKGROUP_Dispatcher_HPP -#include "RAJA/config.hpp" +#include +#include "RAJA/config.hpp" #include "RAJA/policy/WorkGroup.hpp" - -#include "camp/number.hpp" -#include "camp/list.hpp" #include "camp/helpers.hpp" - -#include +#include "camp/list.hpp" +#include "camp/number.hpp" namespace RAJA @@ -36,35 +34,34 @@ namespace RAJA namespace detail { -template < typename > -struct DispatcherVoidPtrWrapper -{ +template +struct DispatcherVoidPtrWrapper { void* ptr; DispatcherVoidPtrWrapper() = default; // implicit constructor from void* - RAJA_HOST_DEVICE DispatcherVoidPtrWrapper(void* p) : ptr(p) { } + RAJA_HOST_DEVICE DispatcherVoidPtrWrapper(void* p) : ptr(p) {} }; -template < typename > -struct DispatcherVoidConstPtrWrapper -{ +template +struct DispatcherVoidConstPtrWrapper { const void* ptr; DispatcherVoidConstPtrWrapper() = default; // implicit constructor from const void* - RAJA_HOST_DEVICE DispatcherVoidConstPtrWrapper(const void* p) : ptr(p) { } + RAJA_HOST_DEVICE DispatcherVoidConstPtrWrapper(const void* p) : ptr(p) {} }; -constexpr bool dispatcher_use_host_invoke(Platform platform) { +constexpr bool dispatcher_use_host_invoke(Platform platform) +{ return !(platform == Platform::cuda || platform == Platform::hip); } // Transforms one dispatch policy into another by creating a dispatch policy // of holder_type objects. See usage in WorkRunner for more explanation. -template < typename dispatch_policy, typename holder_type > +template struct dispatcher_transform_types; /// -template < typename dispatch_policy, typename holder_type > +template using dispatcher_transform_types_t = typename dispatcher_transform_types::type; @@ -75,12 +72,16 @@ using dispatcher_transform_types_t = * DispatcherID is used to differentiate function pointers based on their * function signature. */ -template < Platform platform, typename dispatch_policy, typename DispatcherID, typename ... CallArgs > +template struct Dispatcher; -template < typename holder_type > -struct dispatcher_transform_types<::RAJA::indirect_function_call_dispatch, holder_type> { +template +struct dispatcher_transform_types<::RAJA::indirect_function_call_dispatch, + holder_type> { using type = ::RAJA::indirect_function_call_dispatch; }; @@ -93,8 +94,11 @@ struct dispatcher_transform_types<::RAJA::indirect_function_call_dispatch, holde * during device linking when functions with high register counts may cause * device linking to fail. */ -template < Platform platform, typename DispatcherID, typename ... CallArgs > -struct Dispatcher { +template +struct Dispatcher { static constexpr bool use_host_invoke = dispatcher_use_host_invoke(platform); using dispatch_policy = ::RAJA::indirect_function_call_dispatch; using void_ptr_wrapper = DispatcherVoidPtrWrapper; @@ -104,27 +108,29 @@ struct Dispatcher - static void s_move_construct_destroy(void_ptr_wrapper dest, void_ptr_wrapper src) + template + static void s_move_construct_destroy(void_ptr_wrapper dest, + void_ptr_wrapper src) { T* dest_as_T = static_cast(dest.ptr); T* src_as_T = static_cast(src.ptr); - new(dest_as_T) T(std::move(*src_as_T)); + new (dest_as_T) T(std::move(*src_as_T)); (*src_as_T).~T(); } /// /// invoke the call operator of the object of type T in obj with args /// - template < typename T > + template static void s_host_invoke(void_cptr_wrapper obj, CallArgs... args) { const T* obj_as_T = static_cast(obj.ptr); (*obj_as_T)(std::forward(args)...); } /// - template < typename T > - static RAJA_DEVICE void s_device_invoke(void_cptr_wrapper obj, CallArgs... args) + template + static RAJA_DEVICE void s_device_invoke(void_cptr_wrapper obj, + CallArgs... args) { const T* obj_as_T = static_cast(obj.ptr); (*obj_as_T)(std::forward(args)...); @@ -133,22 +139,25 @@ struct Dispatcher + template static void s_destroy(void_ptr_wrapper obj) { T* obj_as_T = static_cast(obj.ptr); (*obj_as_T).~T(); } - using mover_type = void(*)(void_ptr_wrapper /*dest*/, void_ptr_wrapper /*src*/); - using invoker_type = void(*)(void_cptr_wrapper /*obj*/, CallArgs... /*args*/); - using destroyer_type = void(*)(void_ptr_wrapper /*obj*/); + using mover_type = void (*)(void_ptr_wrapper /*dest*/, + void_ptr_wrapper /*src*/); + using invoker_type = void (*)(void_cptr_wrapper /*obj*/, + CallArgs... /*args*/); + using destroyer_type = void (*)(void_ptr_wrapper /*obj*/); // This can't be a cuda device lambda due to compiler limitations - template < typename T > + template struct DeviceInvokerFactory { using value_type = invoker_type; - RAJA_DEVICE value_type operator()() { + RAJA_DEVICE value_type operator()() + { #if defined(RAJA_ENABLE_HIP) && !defined(RAJA_ENABLE_HIP_INDIRECT_FUNCTION_CALL) return nullptr; #else @@ -160,14 +169,15 @@ struct Dispatcher* = nullptr > - static inline Dispatcher makeDispatcher() { - return { mover_type{&s_move_construct_destroy}, - invoker_type{&s_host_invoke}, - destroyer_type{&s_destroy}, - sizeof(T) - }; + template * = nullptr> + static inline Dispatcher makeDispatcher() + { + return {mover_type{&s_move_construct_destroy}, + invoker_type{&s_host_invoke}, + destroyer_type{&s_destroy}, + sizeof(T)}; } /// /// create a Dispatcher that can be used on the device for objects of type T @@ -179,14 +189,17 @@ struct Dispatcher* = nullptr > - static inline Dispatcher makeDispatcher(CreateOnDevice&& createOnDevice) { - return { mover_type{&s_move_construct_destroy}, - invoker_type{std::forward(createOnDevice)(DeviceInvokerFactory{})}, - destroyer_type{&s_destroy}, - sizeof(T) - }; + template * = nullptr> + static inline Dispatcher makeDispatcher(CreateOnDevice&& createOnDevice) + { + return {mover_type{&s_move_construct_destroy}, + invoker_type{std::forward(createOnDevice)( + DeviceInvokerFactory{})}, + destroyer_type{&s_destroy}, + sizeof(T)}; } mover_type move_construct_destroy; @@ -196,8 +209,9 @@ struct Dispatcher -struct dispatcher_transform_types<::RAJA::indirect_virtual_function_dispatch, holder_type> { +template +struct dispatcher_transform_types<::RAJA::indirect_virtual_function_dispatch, + holder_type> { using type = ::RAJA::indirect_virtual_function_dispatch; }; @@ -210,15 +224,19 @@ struct dispatcher_transform_types<::RAJA::indirect_virtual_function_dispatch, ho * during device linking when functions with high register counts may cause * device linking to fail. */ -template < Platform platform, typename DispatcherID, typename ... CallArgs > -struct Dispatcher { +template +struct Dispatcher { static constexpr bool use_host_invoke = dispatcher_use_host_invoke(platform); using dispatch_policy = ::RAJA::indirect_virtual_function_dispatch; using void_ptr_wrapper = DispatcherVoidPtrWrapper; using void_cptr_wrapper = DispatcherVoidConstPtrWrapper; struct impl_base { - virtual void move_destroy(void_ptr_wrapper dest, void_ptr_wrapper src) const = 0; + virtual void move_destroy(void_ptr_wrapper dest, + void_ptr_wrapper src) const = 0; virtual void destroy(void_ptr_wrapper obj) const = 0; }; @@ -227,21 +245,22 @@ struct Dispatcher - struct base_impl_type : impl_base - { + template + struct base_impl_type : impl_base { /// /// move construct an object of type T in dest as a copy of a T from src and /// destroy the T obj in src /// - virtual void move_destroy(void_ptr_wrapper dest, void_ptr_wrapper src) const override + virtual void move_destroy(void_ptr_wrapper dest, + void_ptr_wrapper src) const override { T* dest_as_T = static_cast(dest.ptr); T* src_as_T = static_cast(src.ptr); - new(dest_as_T) T(std::move(*src_as_T)); + new (dest_as_T) T(std::move(*src_as_T)); (*src_as_T).~T(); } @@ -255,9 +274,8 @@ struct Dispatcher - struct host_impl_type : host_impl_base - { + template + struct host_impl_type : host_impl_base { /// /// invoke the call operator of the object of type T in obj with args /// @@ -268,13 +286,13 @@ struct Dispatcher - struct device_impl_type : device_impl_base - { + template + struct device_impl_type : device_impl_base { /// /// invoke the call operator of the object of type T in obj with args /// - virtual RAJA_DEVICE void invoke(void_cptr_wrapper obj, CallArgs... args) const override + virtual RAJA_DEVICE void invoke(void_cptr_wrapper obj, + CallArgs... args) const override { const T* obj_as_T = static_cast(obj.ptr); (*obj_as_T)(std::forward(args)...); @@ -304,23 +322,20 @@ struct Dispatcherinvoke(obj, std::forward(args)...); } }; - using invoker_type = std::conditional_t; + using invoker_type = std:: + conditional_t; struct destroyer_type { impl_base* m_impl; - void operator()(void_ptr_wrapper obj) const - { - m_impl->destroy(obj); - } + void operator()(void_ptr_wrapper obj) const { m_impl->destroy(obj); } }; // This can't be a cuda device lambda due to compiler limitations - template < typename T > + template struct DeviceImplTypeFactory { using value_type = device_impl_type*; - RAJA_DEVICE value_type operator()() { + RAJA_DEVICE value_type operator()() + { #if defined(RAJA_ENABLE_HIP) && !defined(RAJA_ENABLE_HIP_INDIRECT_FUNCTION_CALL) return nullptr; #else @@ -333,16 +348,17 @@ struct Dispatcher* = nullptr > - static inline Dispatcher makeDispatcher() { + template * = nullptr> + static inline Dispatcher makeDispatcher() + { static base_impl_type s_base_impl; static host_impl_type s_host_impl; - return { mover_type{&s_base_impl}, - host_invoker_type{&s_host_impl}, - destroyer_type{&s_base_impl}, - sizeof(T) - }; + return {mover_type{&s_base_impl}, + host_invoker_type{&s_host_impl}, + destroyer_type{&s_base_impl}, + sizeof(T)}; } /// /// create a Dispatcher that can be used on the device for objects of type T @@ -354,17 +370,19 @@ struct Dispatcher* = nullptr> - static inline Dispatcher makeDispatcher(CreateOnDevice&& createOnDevice) { + template * = nullptr> + static inline Dispatcher makeDispatcher(CreateOnDevice&& createOnDevice) + { static base_impl_type s_base_impl; - static device_impl_type* s_device_impl_ptr{ - std::forward(createOnDevice)(DeviceImplTypeFactory{}) }; - return { mover_type{&s_base_impl}, - device_invoker_type{s_device_impl_ptr}, - destroyer_type{&s_base_impl}, - sizeof(T) - }; + static device_impl_type* s_device_impl_ptr{std::forward( + createOnDevice)(DeviceImplTypeFactory{})}; + return {mover_type{&s_base_impl}, + device_invoker_type{s_device_impl_ptr}, + destroyer_type{&s_base_impl}, + sizeof(T)}; } mover_type move_construct_destroy; @@ -375,17 +393,21 @@ struct Dispatcher +template struct dispatcher_transform_types<::RAJA::direct_dispatch, holder_type> { - using type = ::RAJA::direct_dispatch...>; + using type = + ::RAJA::direct_dispatch...>; }; /*! * Version of Dispatcher that does direct dispatch to zero callable types. * It implements the interface with callable objects. */ -template < Platform platform, typename DispatcherID, typename ... CallArgs > -struct Dispatcher, DispatcherID, CallArgs...> { +template +struct Dispatcher, + DispatcherID, + CallArgs...> { static constexpr bool use_host_invoke = dispatcher_use_host_invoke(platform); using dispatch_policy = ::RAJA::direct_dispatch<>; using void_ptr_wrapper = DispatcherVoidPtrWrapper; @@ -396,39 +418,36 @@ struct Dispatcher, DispatcherID, CallArgs... /// destroy the T obj in src /// struct mover_type { - void operator()(void_ptr_wrapper, void_ptr_wrapper) const - { } + void operator()(void_ptr_wrapper, void_ptr_wrapper) const {} }; /// /// invoke the call operator of the object of type T in obj with args /// struct host_invoker_type { - void operator()(void_cptr_wrapper, CallArgs...) const - { } + void operator()(void_cptr_wrapper, CallArgs...) const {} }; struct device_invoker_type { - RAJA_DEVICE void operator()(void_cptr_wrapper, CallArgs...) const - { } + RAJA_DEVICE void operator()(void_cptr_wrapper, CallArgs...) const {} }; - using invoker_type = std::conditional_t; + using invoker_type = std:: + conditional_t; /// /// destroy the object of type T in obj /// struct destroyer_type { - void operator()(void_ptr_wrapper) const - { } + void operator()(void_ptr_wrapper) const {} }; /// /// create a Dispatcher that can be used on the host for objects of type T /// - template< typename T, - bool uhi = use_host_invoke, std::enable_if_t* = nullptr > - static inline Dispatcher makeDispatcher() { + template * = nullptr> + static inline Dispatcher makeDispatcher() + { return {mover_type{}, host_invoker_type{}, destroyer_type{}, sizeof(T)}; } /// @@ -437,9 +456,12 @@ struct Dispatcher, DispatcherID, CallArgs... /// Ignore the CreateOnDevice object as the same invoker object can be used /// on the host and device. /// - template< typename T, typename CreateOnDevice, - bool uhi = use_host_invoke, std::enable_if_t* = nullptr > - static inline Dispatcher makeDispatcher(CreateOnDevice&&) { + template * = nullptr> + static inline Dispatcher makeDispatcher(CreateOnDevice&&) + { return {mover_type{}, device_invoker_type{}, destroyer_type{}, sizeof(T)}; } @@ -453,8 +475,14 @@ struct Dispatcher, DispatcherID, CallArgs... * Version of Dispatcher that does direct dispatch to a single callable type. * It implements the interface with callable objects. */ -template < Platform platform, typename T, typename DispatcherID, typename ... CallArgs > -struct Dispatcher, DispatcherID, CallArgs...> { +template +struct Dispatcher, + DispatcherID, + CallArgs...> { static constexpr bool use_host_invoke = dispatcher_use_host_invoke(platform); using dispatch_policy = ::RAJA::direct_dispatch; using void_ptr_wrapper = DispatcherVoidPtrWrapper; @@ -469,7 +497,7 @@ struct Dispatcher, DispatcherID, CallArgs.. { T* dest_as_T = static_cast(dest.ptr); T* src_as_T = static_cast(src.ptr); - new(dest_as_T) T(std::move(*src_as_T)); + new (dest_as_T) T(std::move(*src_as_T)); (*src_as_T).~T(); } }; @@ -491,9 +519,8 @@ struct Dispatcher, DispatcherID, CallArgs.. (*obj_as_T)(std::forward(args)...); } }; - using invoker_type = std::conditional_t; + using invoker_type = std:: + conditional_t; /// /// destroy the object of type T in obj @@ -509,10 +536,13 @@ struct Dispatcher, DispatcherID, CallArgs.. /// /// create a Dispatcher that can be used on the host for objects of type T /// - template< typename U, - bool uhi = use_host_invoke, std::enable_if_t* = nullptr > - static inline Dispatcher makeDispatcher() { - static_assert(std::is_same::value, "U must be in direct_dispatch types"); + template * = nullptr> + static inline Dispatcher makeDispatcher() + { + static_assert(std::is_same::value, + "U must be in direct_dispatch types"); return {mover_type{}, host_invoker_type{}, destroyer_type{}, sizeof(T)}; } /// @@ -521,10 +551,14 @@ struct Dispatcher, DispatcherID, CallArgs.. /// Ignore the CreateOnDevice object as the same invoker object can be used /// on the host and device. /// - template< typename U, typename CreateOnDevice, - bool uhi = use_host_invoke, std::enable_if_t* = nullptr > - static inline Dispatcher makeDispatcher(CreateOnDevice&&) { - static_assert(std::is_same::value, "U must be in direct_dispatch types"); + template * = nullptr> + static inline Dispatcher makeDispatcher(CreateOnDevice&&) + { + static_assert(std::is_same::value, + "U must be in direct_dispatch types"); return {mover_type{}, device_invoker_type{}, destroyer_type{}, sizeof(T)}; } @@ -538,17 +572,23 @@ struct Dispatcher, DispatcherID, CallArgs.. * Version of Dispatcher that does direct dispatch to multiple callable types. * It implements the interface with callable objects. */ -template < typename T0, typename T1, typename ... TNs, - Platform platform, typename DispatcherID, typename ... CallArgs > -struct Dispatcher, - DispatcherID, CallArgs...> { +template +struct Dispatcher, + DispatcherID, + CallArgs...> { static constexpr bool use_host_invoke = dispatcher_use_host_invoke(platform); using dispatch_policy = ::RAJA::direct_dispatch; using void_ptr_wrapper = DispatcherVoidPtrWrapper; using void_cptr_wrapper = DispatcherVoidConstPtrWrapper; using id_type = int; - using callable_indices = camp::make_int_seq_t; + using callable_indices = camp::make_int_seq_t; using callable_types = camp::list; /// @@ -560,24 +600,25 @@ struct Dispatcher, void operator()(void_ptr_wrapper dest, void_ptr_wrapper src) const { - impl_helper(callable_indices{}, callable_types{}, - dest, src); + impl_helper(callable_indices{}, callable_types{}, dest, src); } private: - template < int ... id_types, typename ... Ts > - void impl_helper(camp::int_seq, camp::list, - void_ptr_wrapper dest, void_ptr_wrapper src) const + template + void impl_helper(camp::int_seq, + camp::list, + void_ptr_wrapper dest, + void_ptr_wrapper src) const { camp::sink(((id_types == id) ? (impl(dest, src), 0) : 0)...); } - template < typename T > + template void impl(void_ptr_wrapper dest, void_ptr_wrapper src) const { T* dest_as_T = static_cast(dest.ptr); T* src_as_T = static_cast(src.ptr); - new(dest_as_T) T(std::move(*src_as_T)); + new (dest_as_T) T(std::move(*src_as_T)); (*src_as_T).~T(); } }; @@ -590,19 +631,25 @@ struct Dispatcher, void operator()(void_cptr_wrapper obj, CallArgs... args) const { - impl_helper(callable_indices{}, callable_types{}, - obj, std::forward(args)...); + impl_helper(callable_indices{}, + callable_types{}, + obj, + std::forward(args)...); } private: - template < int ... id_types, typename ... Ts > - void impl_helper(camp::int_seq, camp::list, - void_cptr_wrapper obj, CallArgs... args) const + template + void impl_helper(camp::int_seq, + camp::list, + void_cptr_wrapper obj, + CallArgs... args) const { - camp::sink(((id_types == id) ? (impl(obj, std::forward(args)...), 0) : 0)...); + camp::sink(((id_types == id) + ? (impl(obj, std::forward(args)...), 0) + : 0)...); } - template < typename T > + template void impl(void_cptr_wrapper obj, CallArgs... args) const { const T* obj_as_T = static_cast(obj.ptr); @@ -614,28 +661,33 @@ struct Dispatcher, RAJA_DEVICE void operator()(void_cptr_wrapper obj, CallArgs... args) const { - impl_helper(callable_indices{}, callable_types{}, - obj, std::forward(args)...); + impl_helper(callable_indices{}, + callable_types{}, + obj, + std::forward(args)...); } private: - template < int ... id_types, typename ... Ts > - RAJA_DEVICE void impl_helper(camp::int_seq, camp::list, - void_cptr_wrapper obj, CallArgs... args) const + template + RAJA_DEVICE void impl_helper(camp::int_seq, + camp::list, + void_cptr_wrapper obj, + CallArgs... args) const { - camp::sink(((id_types == id) ? (impl(obj, std::forward(args)...), 0) : 0)...); + camp::sink(((id_types == id) + ? (impl(obj, std::forward(args)...), 0) + : 0)...); } - template < typename T > + template RAJA_DEVICE void impl(void_cptr_wrapper obj, CallArgs... args) const { const T* obj_as_T = static_cast(obj.ptr); (*obj_as_T)(std::forward(args)...); } }; - using invoker_type = std::conditional_t; + using invoker_type = std:: + conditional_t; /// /// destroy the object of type T in obj @@ -645,19 +697,19 @@ struct Dispatcher, void operator()(void_ptr_wrapper obj) const { - impl_helper(callable_indices{}, callable_types{}, - obj); + impl_helper(callable_indices{}, callable_types{}, obj); } private: - template < int ... id_types, typename ... Ts > - void impl_helper(camp::int_seq, camp::list, - void_ptr_wrapper obj) const + template + void impl_helper(camp::int_seq, + camp::list, + void_ptr_wrapper obj) const { camp::sink(((id_types == id) ? (impl(obj), 0) : 0)...); } - template < typename T > + template void impl(void_ptr_wrapper obj) const { T* obj_as_T = static_cast(obj.ptr); @@ -671,25 +723,32 @@ struct Dispatcher, /// The id is just the index of T in the list of callable_types. /// If T is not in Ts return -1. /// - template < typename T, int ... id_types, typename ... Ts > - static constexpr id_type get_id(camp::int_seq, camp::list) + template + static constexpr id_type get_id(camp::int_seq, + camp::list) { id_type id{-1}; // quiet UB warning by sequencing assignment to id with list initialization - int unused[] {0, (std::is_same::value ? ((id = id_types), 0) : 0)...}; - camp::sink(unused); // quiet unused var warning + int unused[]{0, (std::is_same::value ? ((id = id_types), 0) : 0)...}; + camp::sink(unused); // quiet unused var warning return id; } /// /// create a Dispatcher that can be used on the host for objects of type T /// - template< typename T, - bool uhi = use_host_invoke, std::enable_if_t* = nullptr > - static inline Dispatcher makeDispatcher() { - static constexpr id_type id = get_id(callable_indices{}, callable_types{}); + template * = nullptr> + static inline Dispatcher makeDispatcher() + { + static constexpr id_type id = + get_id(callable_indices{}, callable_types{}); static_assert(id != id_type(-1), "T must be in direct_dispatch types"); - return {mover_type{id}, host_invoker_type{id}, destroyer_type{id}, sizeof(T)}; + return {mover_type{id}, + host_invoker_type{id}, + destroyer_type{id}, + sizeof(T)}; } /// /// create a Dispatcher that can be used on the device for objects of type T @@ -697,12 +756,19 @@ struct Dispatcher, /// Ignore the CreateOnDevice object as the same invoker object can be used /// on the host and device. /// - template< typename T, typename CreateOnDevice, - bool uhi = use_host_invoke, std::enable_if_t* = nullptr > - static inline Dispatcher makeDispatcher(CreateOnDevice&&) { - static constexpr id_type id = get_id(callable_indices{}, callable_types{}); + template * = nullptr> + static inline Dispatcher makeDispatcher(CreateOnDevice&&) + { + static constexpr id_type id = + get_id(callable_indices{}, callable_types{}); static_assert(id != id_type(-1), "T must be in direct_dispatch types"); - return {mover_type{id}, device_invoker_type{id}, destroyer_type{id}, sizeof(T)}; + return {mover_type{id}, + device_invoker_type{id}, + destroyer_type{id}, + sizeof(T)}; } mover_type move_construct_destroy; diff --git a/include/RAJA/pattern/WorkGroup/WorkRunner.hpp b/include/RAJA/pattern/WorkGroup/WorkRunner.hpp index 9645f73050..d1945555a6 100644 --- a/include/RAJA/pattern/WorkGroup/WorkRunner.hpp +++ b/include/RAJA/pattern/WorkGroup/WorkRunner.hpp @@ -18,17 +18,14 @@ #ifndef RAJA_PATTERN_WORKGROUP_WorkRunner_HPP #define RAJA_PATTERN_WORKGROUP_WorkRunner_HPP -#include "RAJA/config.hpp" - -#include #include +#include -#include "RAJA/policy/sequential/policy.hpp" - -#include "RAJA/pattern/forall.hpp" - +#include "RAJA/config.hpp" #include "RAJA/pattern/WorkGroup/Dispatcher.hpp" +#include "RAJA/pattern/forall.hpp" #include "RAJA/policy/WorkGroup.hpp" +#include "RAJA/policy/sequential/policy.hpp" namespace RAJA @@ -40,18 +37,18 @@ namespace detail /*! * A body and args holder for storing loops that are being executed in foralls */ -template -struct HoldBodyArgs_base -{ +template +struct HoldBodyArgs_base { // NOTE: This constructor is disabled when body_in is not LoopBody // to avoid it conflicting with the copy and move constructors - template < typename body_in, - typename = typename std::enable_if< - std::is_same>::value>::type > + template >::value>::type> HoldBodyArgs_base(body_in&& body, Args... args) - : m_body(std::forward(body)) - , m_arg_tuple(std::forward(args)...) - { } + : m_body(std::forward(body)), + m_arg_tuple(std::forward(args)...) + { + } protected: LoopBody m_body; @@ -62,9 +59,8 @@ struct HoldBodyArgs_base * A body and args holder for storing loops that are being executed in foralls * that run on the host */ -template -struct HoldBodyArgs_host : HoldBodyArgs_base -{ +template +struct HoldBodyArgs_host : HoldBodyArgs_base { using base = HoldBodyArgs_base; using base::base; @@ -73,7 +69,7 @@ struct HoldBodyArgs_host : HoldBodyArgs_base invoke(i, camp::make_idx_seq_t{}); } - template < camp::idx_t ... Is > + template RAJA_INLINE void invoke(index_type i, camp::idx_seq) const { this->m_body(i, get(this->m_arg_tuple)...); @@ -84,9 +80,8 @@ struct HoldBodyArgs_host : HoldBodyArgs_base * A body and args holder for storing loops that are being executed in foralls * that run on the device */ -template -struct HoldBodyArgs_device : HoldBodyArgs_base -{ +template +struct HoldBodyArgs_device : HoldBodyArgs_base { using base = HoldBodyArgs_base; using base::base; @@ -95,7 +90,7 @@ struct HoldBodyArgs_device : HoldBodyArgs_base invoke(i, camp::make_idx_seq_t{}); } - template < camp::idx_t ... Is > + template RAJA_DEVICE RAJA_INLINE void invoke(index_type i, camp::idx_seq) const { this->m_body(i, get(this->m_arg_tuple)...); @@ -105,21 +100,24 @@ struct HoldBodyArgs_device : HoldBodyArgs_base /*! * A body and segment holder for storing loops that will be executed as foralls */ -template -struct HoldForall -{ +template +struct HoldForall { using resource_type = typename resources::get_resource::type; using HoldBodyArgs = typename std::conditional< !type_traits::is_device_exec_policy::value, HoldBodyArgs_host, - HoldBodyArgs_device >::type; + HoldBodyArgs_device>::type; - template < typename segment_in, typename body_in > + template HoldForall(segment_in&& segment, body_in&& body) - : m_segment(std::forward(segment)) - , m_body(std::forward(body)) - { } + : m_segment(std::forward(segment)), + m_body(std::forward(body)) + { + } RAJA_INLINE void operator()(resource_type r, Args... args) const { @@ -143,7 +141,7 @@ template + typename... Args> struct WorkRunner; @@ -156,28 +154,30 @@ template -struct WorkRunnerForallOrdered_base -{ + typename... Args> +struct WorkRunnerForallOrdered_base { using exec_policy = EXEC_POLICY_T; using order_policy = ORDER_POLICY_T; using dispatch_policy = DISPATCH_POLICY_T; using Allocator = ALLOCATOR_T; using index_type = INDEX_T; - using resource_type = typename resources::get_resource::type; + using resource_type = + typename resources::get_resource::type; using forall_exec_policy = FORALL_EXEC_POLICY; // The type that will hold the segment and loop body in work storage struct holder_type { - template < typename T > - using type = HoldForall>::type, // segment_type - typename camp::at>::type, // loop_type - index_type, Args...>; + template + using type = + HoldForall>::type, // segment_type + typename camp::at>::type, // loop_type + index_type, + Args...>; }; /// - template < typename T > + template using holder_type_t = typename holder_type::template type; // The policy indicating where the call function is invoked @@ -186,33 +186,41 @@ struct WorkRunnerForallOrdered_base // The Dispatcher policy with holder_types used internally to handle the // ranges and callables passed in by the user. - using dispatcher_holder_policy = dispatcher_transform_types_t; + using dispatcher_holder_policy = + dispatcher_transform_types_t; - using dispatcher_type = Dispatcher; + using dispatcher_type = Dispatcher; WorkRunnerForallOrdered_base() = default; WorkRunnerForallOrdered_base(WorkRunnerForallOrdered_base const&) = delete; - WorkRunnerForallOrdered_base& operator=(WorkRunnerForallOrdered_base const&) = delete; + WorkRunnerForallOrdered_base& operator=(WorkRunnerForallOrdered_base const&) = + delete; - WorkRunnerForallOrdered_base(WorkRunnerForallOrdered_base &&) = default; - WorkRunnerForallOrdered_base& operator=(WorkRunnerForallOrdered_base &&) = default; + WorkRunnerForallOrdered_base(WorkRunnerForallOrdered_base&&) = default; + WorkRunnerForallOrdered_base& operator=(WorkRunnerForallOrdered_base&&) = + default; // runner interfaces with storage to enqueue so the runner can get // information from the segment and loop at enqueue time - template < typename WorkContainer, typename segment_T, typename loop_T > + template inline void enqueue(WorkContainer& storage, segment_T&& seg, loop_T&& loop) { - using holder = holder_type_t, camp::decay>>; + using holder = + holder_type_t, camp::decay>>; - storage.template emplace( - get_Dispatcher(dispatcher_exec_policy{}), - std::forward(seg), std::forward(loop)); + storage.template emplace(get_Dispatcher( + dispatcher_exec_policy{}), + std::forward(seg), + std::forward(loop)); } // clear any state so ready to be destroyed or reused - void clear() - { } + void clear() {} // no extra storage required here using per_run_storage = int; @@ -227,29 +235,26 @@ template + typename... Args> struct WorkRunnerForallOrdered - : WorkRunnerForallOrdered_base< - FORALL_EXEC_POLICY, - EXEC_POLICY_T, - ORDER_POLICY_T, - DISPATCH_POLICY_T, - ALLOCATOR_T, - INDEX_T, - Args...> -{ - using base = WorkRunnerForallOrdered_base< - FORALL_EXEC_POLICY, - EXEC_POLICY_T, - ORDER_POLICY_T, - DISPATCH_POLICY_T, - ALLOCATOR_T, - INDEX_T, - Args...>; + : WorkRunnerForallOrdered_base { + using base = WorkRunnerForallOrdered_base; using base::base; // run the loops using forall in the order that they were enqueued - template < typename WorkContainer > + template typename base::per_run_storage run(WorkContainer const& storage, typename base::resource_type r, Args... args) const @@ -276,29 +281,27 @@ template + typename... Args> struct WorkRunnerForallReverse - : WorkRunnerForallOrdered_base< - FORALL_EXEC_POLICY, - EXEC_POLICY_T, - ORDER_POLICY_T, - DISPATCH_POLICY_T, - ALLOCATOR_T, - INDEX_T, - Args...> -{ - using base = WorkRunnerForallOrdered_base< - FORALL_EXEC_POLICY, - EXEC_POLICY_T, - ORDER_POLICY_T, - DISPATCH_POLICY_T, - ALLOCATOR_T, - INDEX_T, - Args...>; + : WorkRunnerForallOrdered_base { + using base = WorkRunnerForallOrdered_base; using base::base; - // run the loops using forall in the reverse order to the order they were enqueued - template < typename WorkContainer > + // run the loops using forall in the reverse order to the order they were + // enqueued + template typename base::per_run_storage run(WorkContainer const& storage, typename base::resource_type r, Args... args) const @@ -309,7 +312,7 @@ struct WorkRunnerForallReverse auto begin = storage.begin(); for (auto iter = storage.end(); iter != begin; --iter) { - value_type::host_call(&*(iter-1), r, args...); + value_type::host_call(&*(iter - 1), r, args...); } return run_storage; diff --git a/include/RAJA/pattern/WorkGroup/WorkStorage.hpp b/include/RAJA/pattern/WorkGroup/WorkStorage.hpp index 52631d108f..6e0216e72d 100644 --- a/include/RAJA/pattern/WorkGroup/WorkStorage.hpp +++ b/include/RAJA/pattern/WorkGroup/WorkStorage.hpp @@ -18,19 +18,16 @@ #ifndef RAJA_PATTERN_WORKGROUP_WorkStorage_HPP #define RAJA_PATTERN_WORKGROUP_WorkStorage_HPP -#include "RAJA/config.hpp" - #include #include -#include #include +#include -#include "RAJA/util/Operators.hpp" -#include "RAJA/util/macros.hpp" - +#include "RAJA/config.hpp" #include "RAJA/internal/RAJAVec.hpp" - #include "RAJA/pattern/WorkGroup/WorkStruct.hpp" +#include "RAJA/util/Operators.hpp" +#include "RAJA/util/macros.hpp" namespace RAJA @@ -46,9 +43,8 @@ namespace detail // operator - ( iterator_base const& ) // operator == ( iterator_base const& ) // operator < ( iterator_base const& ) -template < typename iterator_base > -struct random_access_iterator : iterator_base -{ +template +struct random_access_iterator : iterator_base { using base = iterator_base; using value_type = const typename base::value_type; using pointer = typename base::pointer; @@ -59,10 +55,10 @@ struct random_access_iterator : iterator_base using base::base; random_access_iterator(random_access_iterator const&) = default; - random_access_iterator(random_access_iterator &&) = default; + random_access_iterator(random_access_iterator&&) = default; random_access_iterator& operator=(random_access_iterator const&) = default; - random_access_iterator& operator=(random_access_iterator &&) = default; + random_access_iterator& operator=(random_access_iterator&&) = default; RAJA_HOST_DEVICE reference operator*() const @@ -70,10 +66,7 @@ struct random_access_iterator : iterator_base return *static_cast(*this); } - RAJA_HOST_DEVICE pointer operator->() const - { - return &(*(*this)); - } + RAJA_HOST_DEVICE pointer operator->() const { return &(*(*this)); } RAJA_HOST_DEVICE reference operator[](difference_type i) const { @@ -121,7 +114,8 @@ struct random_access_iterator : iterator_base } RAJA_HOST_DEVICE friend inline random_access_iterator operator+( - random_access_iterator const& lhs, difference_type rhs) + random_access_iterator const& lhs, + difference_type rhs) { random_access_iterator copy = lhs; copy += rhs; @@ -129,7 +123,8 @@ struct random_access_iterator : iterator_base } RAJA_HOST_DEVICE friend inline random_access_iterator operator+( - difference_type lhs, random_access_iterator const& rhs) + difference_type lhs, + random_access_iterator const& rhs) { random_access_iterator copy = rhs; copy += lhs; @@ -137,7 +132,8 @@ struct random_access_iterator : iterator_base } RAJA_HOST_DEVICE friend inline random_access_iterator operator-( - random_access_iterator const& lhs, difference_type rhs) + random_access_iterator const& lhs, + difference_type rhs) { random_access_iterator copy = lhs; copy -= rhs; @@ -145,43 +141,50 @@ struct random_access_iterator : iterator_base } RAJA_HOST_DEVICE friend inline difference_type operator-( - random_access_iterator const& lhs, random_access_iterator const& rhs) + random_access_iterator const& lhs, + random_access_iterator const& rhs) { return static_cast(lhs) - static_cast(rhs); } RAJA_HOST_DEVICE friend inline bool operator==( - random_access_iterator const& lhs, random_access_iterator const& rhs) + random_access_iterator const& lhs, + random_access_iterator const& rhs) { return static_cast(lhs) == static_cast(rhs); } RAJA_HOST_DEVICE friend inline bool operator!=( - random_access_iterator const& lhs, random_access_iterator const& rhs) + random_access_iterator const& lhs, + random_access_iterator const& rhs) { return !(lhs == rhs); } RAJA_HOST_DEVICE friend inline bool operator<( - random_access_iterator const& lhs, random_access_iterator const& rhs) + random_access_iterator const& lhs, + random_access_iterator const& rhs) { return static_cast(lhs) < static_cast(rhs); } RAJA_HOST_DEVICE friend inline bool operator<=( - random_access_iterator const& lhs, random_access_iterator const& rhs) + random_access_iterator const& lhs, + random_access_iterator const& rhs) { return !(rhs < lhs); } RAJA_HOST_DEVICE friend inline bool operator>( - random_access_iterator const& lhs, random_access_iterator const& rhs) + random_access_iterator const& lhs, + random_access_iterator const& rhs) { return rhs < lhs; } RAJA_HOST_DEVICE friend inline bool operator>=( - random_access_iterator const& lhs, random_access_iterator const& rhs) + random_access_iterator const& lhs, + random_access_iterator const& rhs) { return !(lhs < rhs); } @@ -191,10 +194,12 @@ struct random_access_iterator : iterator_base /*! * A storage container for work groups */ -template < typename STORAGE_POLICY_T, typename ALLOCATOR_T, typename Dispatcher_T > +template class WorkStorage; -template < typename ALLOCATOR_T, typename Dispatcher_T > +template class WorkStorage { using allocator_traits_type = std::allocator_traits; @@ -202,15 +207,17 @@ class WorkStorage typename allocator_traits_type::propagate_on_container_copy_assignment; using propagate_on_container_move_assignment = typename allocator_traits_type::propagate_on_container_move_assignment; - using propagate_on_container_swap = + using propagate_on_container_swap = typename allocator_traits_type::propagate_on_container_swap; - static_assert(std::is_same::value, + static_assert( + std::is_same::value, "WorkStorage expects an allocator for 'char's."); + public: using storage_policy = RAJA::array_of_pointers; using dispatcher_type = Dispatcher_T; - template < typename holder > + template using true_value_type = WorkStruct; using value_type = GenericWorkStruct; @@ -224,31 +231,24 @@ class WorkStorage private: // struct used in storage vector to retain pointer and allocation size - struct pointer_and_size - { + struct pointer_and_size { pointer ptr; size_type size; }; public: - - // iterator base class for accessing stored WorkStructs outside of the container - struct const_iterator_base - { + // iterator base class for accessing stored WorkStructs outside of the + // container + struct const_iterator_base { using value_type = const typename WorkStorage::value_type; using pointer = typename WorkStorage::const_pointer; using reference = typename WorkStorage::const_reference; using difference_type = typename WorkStorage::difference_type; using iterator_category = std::random_access_iterator_tag; - const_iterator_base(const pointer_and_size* ptrptr) - : m_ptrptr(ptrptr) - { } + const_iterator_base(const pointer_and_size* ptrptr) : m_ptrptr(ptrptr) {} - RAJA_HOST_DEVICE reference operator*() const - { - return *(m_ptrptr->ptr); - } + RAJA_HOST_DEVICE reference operator*() const { return *(m_ptrptr->ptr); } RAJA_HOST_DEVICE const_iterator_base& operator+=(difference_type n) { @@ -257,19 +257,22 @@ class WorkStorage } RAJA_HOST_DEVICE friend inline difference_type operator-( - const_iterator_base const& lhs_iter, const_iterator_base const& rhs_iter) + const_iterator_base const& lhs_iter, + const_iterator_base const& rhs_iter) { return lhs_iter.m_ptrptr - rhs_iter.m_ptrptr; } RAJA_HOST_DEVICE friend inline bool operator==( - const_iterator_base const& lhs_iter, const_iterator_base const& rhs_iter) + const_iterator_base const& lhs_iter, + const_iterator_base const& rhs_iter) { return lhs_iter.m_ptrptr == rhs_iter.m_ptrptr; } RAJA_HOST_DEVICE friend inline bool operator<( - const_iterator_base const& lhs_iter, const_iterator_base const& rhs_iter) + const_iterator_base const& lhs_iter, + const_iterator_base const& rhs_iter) { return lhs_iter.m_ptrptr < rhs_iter.m_ptrptr; } @@ -282,22 +285,23 @@ class WorkStorage explicit WorkStorage(allocator_type const& aloc) - : m_vec(0, aloc) - , m_aloc(aloc) - { } + : m_vec(0, aloc), m_aloc(aloc) + { + } WorkStorage(WorkStorage const&) = delete; WorkStorage& operator=(WorkStorage const&) = delete; WorkStorage(WorkStorage&& rhs) - : m_vec(std::move(rhs.m_vec)) - , m_aloc(std::move(rhs.m_aloc)) - { } + : m_vec(std::move(rhs.m_vec)), m_aloc(std::move(rhs.m_aloc)) + { + } WorkStorage& operator=(WorkStorage&& rhs) { if (this != &rhs) { - move_assign_private(std::move(rhs), propagate_on_container_move_assignment{}); + move_assign_private(std::move(rhs), + propagate_on_container_move_assignment{}); } return *this; } @@ -312,20 +316,11 @@ class WorkStorage } // number of loops stored - size_type size() const - { - return m_vec.size(); - } + size_type size() const { return m_vec.size(); } - const_iterator begin() const - { - return const_iterator(m_vec.begin()); - } + const_iterator begin() const { return const_iterator(m_vec.begin()); } - const_iterator end() const - { - return const_iterator(m_vec.end()); - } + const_iterator end() const { return const_iterator(m_vec.end()); } // number of bytes used for storage of loops size_type storage_size() const @@ -337,11 +332,13 @@ class WorkStorage return storage_size_nbytes; } - template < typename holder, typename ... holder_ctor_args > - void emplace(const dispatcher_type* dispatcher, holder_ctor_args&&... ctor_args) + template + void emplace(const dispatcher_type* dispatcher, + holder_ctor_args&&... ctor_args) { - m_vec.emplace_back(create_value( - dispatcher, std::forward(ctor_args)...)); + m_vec.emplace_back( + create_value(dispatcher, + std::forward(ctor_args)...)); } // destroy all stored loops, deallocates all storage @@ -354,13 +351,13 @@ class WorkStorage m_vec.shrink_to_fit(); } - ~WorkStorage() - { - clear(); - } + ~WorkStorage() { clear(); } private: - RAJAVec> m_vec; + RAJAVec< + pointer_and_size, + typename allocator_traits_type::template rebind_alloc> + m_vec; allocator_type m_aloc; // move assignment if allocator propagates on move assignment @@ -389,7 +386,7 @@ class WorkStorage } // allocate and construct value in storage - template < typename holder, typename ... holder_ctor_args > + template pointer_and_size create_value(const dispatcher_type* dispatcher, holder_ctor_args&&... ctor_args) { @@ -415,7 +412,9 @@ class WorkStorage value_type::move_destroy(value_ptr, other_value_and_size.ptr); allocator_traits_type::deallocate(rhs.m_aloc, - reinterpret_cast(other_value_and_size.ptr), other_value_and_size.size); + reinterpret_cast( + other_value_and_size.ptr), + other_value_and_size.size); return pointer_and_size{value_ptr, other_value_and_size.size}; } @@ -425,11 +424,13 @@ class WorkStorage { value_type::destroy(value_and_size_ptr.ptr); allocator_traits_type::deallocate(m_aloc, - reinterpret_cast(value_and_size_ptr.ptr), value_and_size_ptr.size); + reinterpret_cast( + value_and_size_ptr.ptr), + value_and_size_ptr.size); } }; -template < typename ALLOCATOR_T, typename Dispatcher_T > +template class WorkStorage { using allocator_traits_type = std::allocator_traits; @@ -437,15 +438,17 @@ class WorkStorage typename allocator_traits_type::propagate_on_container_copy_assignment; using propagate_on_container_move_assignment = typename allocator_traits_type::propagate_on_container_move_assignment; - using propagate_on_container_swap = + using propagate_on_container_swap = typename allocator_traits_type::propagate_on_container_swap; - static_assert(std::is_same::value, + static_assert( + std::is_same::value, "WorkStorage expects an allocator for 'char's."); + public: using storage_policy = RAJA::ragged_array_of_objects; using dispatcher_type = Dispatcher_T; - template < typename holder > + template using true_value_type = WorkStruct; using value_type = GenericWorkStruct; @@ -457,9 +460,9 @@ class WorkStorage using pointer = value_type*; using const_pointer = const value_type*; - // iterator base class for accessing stored WorkStructs outside of the container - struct const_iterator_base - { + // iterator base class for accessing stored WorkStructs outside of the + // container + struct const_iterator_base { using value_type = const typename WorkStorage::value_type; using pointer = typename WorkStorage::const_pointer; using reference = typename WorkStorage::const_reference; @@ -467,14 +470,13 @@ class WorkStorage using iterator_category = std::random_access_iterator_tag; const_iterator_base(const char* array_begin, const size_type* offset_iter) - : m_array_begin(array_begin) - , m_offset_iter(offset_iter) - { } + : m_array_begin(array_begin), m_offset_iter(offset_iter) + { + } RAJA_HOST_DEVICE reference operator*() const { - return *reinterpret_cast( - m_array_begin + *m_offset_iter); + return *reinterpret_cast(m_array_begin + *m_offset_iter); } RAJA_HOST_DEVICE const_iterator_base& operator+=(difference_type n) @@ -484,19 +486,22 @@ class WorkStorage } RAJA_HOST_DEVICE friend inline difference_type operator-( - const_iterator_base const& lhs_iter, const_iterator_base const& rhs_iter) + const_iterator_base const& lhs_iter, + const_iterator_base const& rhs_iter) { return lhs_iter.m_offset_iter - rhs_iter.m_offset_iter; } RAJA_HOST_DEVICE friend inline bool operator==( - const_iterator_base const& lhs_iter, const_iterator_base const& rhs_iter) + const_iterator_base const& lhs_iter, + const_iterator_base const& rhs_iter) { return lhs_iter.m_offset_iter == rhs_iter.m_offset_iter; } RAJA_HOST_DEVICE friend inline bool operator<( - const_iterator_base const& lhs_iter, const_iterator_base const& rhs_iter) + const_iterator_base const& lhs_iter, + const_iterator_base const& rhs_iter) { return lhs_iter.m_offset_iter < rhs_iter.m_offset_iter; } @@ -510,19 +515,19 @@ class WorkStorage explicit WorkStorage(allocator_type const& aloc) - : m_offsets(0, aloc) - , m_aloc(aloc) - { } + : m_offsets(0, aloc), m_aloc(aloc) + { + } WorkStorage(WorkStorage const&) = delete; WorkStorage& operator=(WorkStorage const&) = delete; WorkStorage(WorkStorage&& rhs) - : m_offsets(std::move(rhs.m_offsets)) - , m_array_begin(rhs.m_array_begin) - , m_array_end(rhs.m_array_end) - , m_array_cap(rhs.m_array_cap) - , m_aloc(std::move(rhs.m_aloc)) + : m_offsets(std::move(rhs.m_offsets)), + m_array_begin(rhs.m_array_begin), + m_array_end(rhs.m_array_end), + m_array_cap(rhs.m_array_cap), + m_aloc(std::move(rhs.m_aloc)) { rhs.m_array_begin = nullptr; rhs.m_array_end = nullptr; @@ -532,7 +537,8 @@ class WorkStorage WorkStorage& operator=(WorkStorage&& rhs) { if (this != &rhs) { - move_assign_private(std::move(rhs), propagate_on_container_move_assignment{}); + move_assign_private(std::move(rhs), + propagate_on_container_move_assignment{}); } return *this; } @@ -546,10 +552,7 @@ class WorkStorage } // number of loops stored - size_type size() const - { - return m_offsets.size(); - } + size_type size() const { return m_offsets.size(); } const_iterator begin() const { @@ -562,17 +565,17 @@ class WorkStorage } // number of bytes used for storage of loops - size_type storage_size() const - { - return m_array_end - m_array_begin; - } + size_type storage_size() const { return m_array_end - m_array_begin; } - template < typename holder, typename ... holder_ctor_args > - void emplace(const dispatcher_type* dispatcher, holder_ctor_args&&... ctor_args) + template + void emplace(const dispatcher_type* dispatcher, + holder_ctor_args&&... ctor_args) { size_type value_offset = storage_size(); - size_type value_size = create_value(value_offset, - dispatcher, std::forward(ctor_args)...); + size_type value_size = + create_value(value_offset, + dispatcher, + std::forward(ctor_args)...); m_offsets.emplace_back(value_offset); m_array_end += value_size; } @@ -582,23 +585,24 @@ class WorkStorage { array_clear(); if (m_array_begin != nullptr) { - allocator_traits_type::deallocate(m_aloc, m_array_begin, storage_capacity()); + allocator_traits_type::deallocate(m_aloc, + m_array_begin, + storage_capacity()); m_array_begin = nullptr; - m_array_end = nullptr; - m_array_cap = nullptr; + m_array_end = nullptr; + m_array_cap = nullptr; } } - ~WorkStorage() - { - clear(); - } + ~WorkStorage() { clear(); } private: - RAJAVec> m_offsets; + RAJAVec> + m_offsets; char* m_array_begin = nullptr; - char* m_array_end = nullptr; - char* m_array_cap = nullptr; + char* m_array_end = nullptr; + char* m_array_cap = nullptr; allocator_type m_aloc; // move assignment if allocator propagates on move assignment @@ -606,15 +610,15 @@ class WorkStorage { clear(); - m_offsets = std::move(rhs.m_offsets); + m_offsets = std::move(rhs.m_offsets); m_array_begin = rhs.m_array_begin; - m_array_end = rhs.m_array_end ; - m_array_cap = rhs.m_array_cap ; - m_aloc = std::move(rhs.m_aloc); + m_array_end = rhs.m_array_end; + m_array_cap = rhs.m_array_cap; + m_aloc = std::move(rhs.m_aloc); rhs.m_array_begin = nullptr; - rhs.m_array_end = nullptr; - rhs.m_array_cap = nullptr; + rhs.m_array_end = nullptr; + rhs.m_array_cap = nullptr; } // move assignment if allocator does not propagate on move assignment @@ -623,14 +627,14 @@ class WorkStorage clear(); if (m_aloc == rhs.m_aloc) { - m_offsets = std::move(rhs.m_offsets); + m_offsets = std::move(rhs.m_offsets); m_array_begin = rhs.m_array_begin; - m_array_end = rhs.m_array_end ; - m_array_cap = rhs.m_array_cap ; + m_array_end = rhs.m_array_end; + m_array_cap = rhs.m_array_cap; rhs.m_array_begin = nullptr; - rhs.m_array_end = nullptr; - rhs.m_array_cap = nullptr; + rhs.m_array_end = nullptr; + rhs.m_array_cap = nullptr; } else { array_reserve(rhs.storage_size()); @@ -647,16 +651,10 @@ class WorkStorage } // get loop storage capacity, used and unused in bytes - size_type storage_capacity() const - { - return m_array_cap - m_array_begin; - } + size_type storage_capacity() const { return m_array_cap - m_array_begin; } // get unused loop storage capacity in bytes - size_type storage_unused() const - { - return m_array_cap - m_array_end; - } + size_type storage_unused() const { return m_array_cap - m_array_end; } // reserve space for loop_storage_size bytes of loop storage void array_reserve(size_type loop_storage_size) @@ -665,21 +663,23 @@ class WorkStorage char* new_array_begin = allocator_traits_type::allocate(m_aloc, loop_storage_size); - char* new_array_end = new_array_begin + storage_size(); - char* new_array_cap = new_array_begin + loop_storage_size; + char* new_array_end = new_array_begin + storage_size(); + char* new_array_cap = new_array_begin + loop_storage_size; for (size_type i = 0; i < size(); ++i) { move_destroy_value(new_array_begin + m_offsets[i], - m_array_begin + m_offsets[i]); + m_array_begin + m_offsets[i]); } if (m_array_begin != nullptr) { - allocator_traits_type::deallocate(m_aloc, m_array_begin, storage_capacity()); + allocator_traits_type::deallocate(m_aloc, + m_array_begin, + storage_capacity()); } m_array_begin = new_array_begin; - m_array_end = new_array_end ; - m_array_cap = new_array_cap ; + m_array_end = new_array_end; + m_array_cap = new_array_cap; } } @@ -696,7 +696,7 @@ class WorkStorage // ensure there is enough storage to hold the next loop body at value offset // and store the loop body - template < typename holder, typename ... holder_ctor_args > + template size_type create_value(size_type value_offset, const dispatcher_type* dispatcher, holder_ctor_args&&... ctor_args) @@ -704,7 +704,8 @@ class WorkStorage const size_type value_size = sizeof(true_value_type); if (value_size > storage_unused()) { - array_reserve(std::max(storage_size() + value_size, 2*storage_capacity())); + array_reserve( + std::max(storage_size() + value_size, 2 * storage_capacity())); } pointer value_ptr = reinterpret_cast(m_array_begin + value_offset); @@ -726,13 +727,12 @@ class WorkStorage // destroy the loop body at value offset void destroy_value(size_type value_offset) { - pointer value_ptr = - reinterpret_cast(m_array_begin + value_offset); + pointer value_ptr = reinterpret_cast(m_array_begin + value_offset); value_type::destroy(value_ptr); } }; -template < typename ALLOCATOR_T, typename Dispatcher_T > +template class WorkStorage @@ -742,15 +742,17 @@ class WorkStorage::value, + static_assert( + std::is_same::value, "WorkStorage expects an allocator for 'char's."); + public: using storage_policy = RAJA::constant_stride_array_of_objects; using dispatcher_type = Dispatcher_T; - template < typename holder > + template using true_value_type = WorkStruct; using value_type = GenericWorkStruct; @@ -762,9 +764,9 @@ class WorkStorage; - explicit WorkStorage(allocator_type const& aloc) - : m_aloc(aloc) - { } + explicit WorkStorage(allocator_type const& aloc) : m_aloc(aloc) {} WorkStorage(WorkStorage const&) = delete; WorkStorage& operator=(WorkStorage const&) = delete; WorkStorage(WorkStorage&& rhs) - : m_aloc(std::move(rhs.m_aloc)) - , m_stride(rhs.m_stride) - , m_array_begin(rhs.m_array_begin) - , m_array_end(rhs.m_array_end) - , m_array_cap(rhs.m_array_cap) + : m_aloc(std::move(rhs.m_aloc)), + m_stride(rhs.m_stride), + m_array_begin(rhs.m_array_begin), + m_array_end(rhs.m_array_end), + m_array_cap(rhs.m_array_cap) { // do not reset stride, leave it for reuse rhs.m_array_begin = nullptr; - rhs.m_array_end = nullptr; - rhs.m_array_cap = nullptr; + rhs.m_array_end = nullptr; + rhs.m_array_cap = nullptr; } WorkStorage& operator=(WorkStorage&& rhs) { if (this != &rhs) { - move_assign_private(std::move(rhs), propagate_on_container_move_assignment{}); + move_assign_private(std::move(rhs), + propagate_on_container_move_assignment{}); } return *this; } @@ -847,35 +851,28 @@ class WorkStorage - void emplace(const dispatcher_type* dispatcher, holder_ctor_args&&... ctor_args) + template + void emplace(const dispatcher_type* dispatcher, + holder_ctor_args&&... ctor_args) { - create_value(dispatcher, std::forward(ctor_args)...); + create_value(dispatcher, + std::forward(ctor_args)...); m_array_end += m_stride; } @@ -884,40 +881,39 @@ class WorkStorage 0; value_offset -= m_stride) { + for (size_type value_offset = storage_size(); value_offset > 0; + value_offset -= m_stride) { destroy_value(value_offset - m_stride); m_array_end -= m_stride; } @@ -1002,18 +995,17 @@ class WorkStorage + template void create_value(const dispatcher_type* dispatcher, holder_ctor_args&&... ctor_args) { const size_type value_size = sizeof(true_value_type); if (value_size > storage_unused() && value_size <= m_stride) { - array_reserve(std::max(storage_size() + m_stride, 2*storage_capacity()), + array_reserve(std::max(storage_size() + m_stride, 2 * storage_capacity()), m_stride); } else if (value_size > m_stride) { - array_reserve((size()+1)*value_size, - value_size); + array_reserve((size() + 1) * value_size, value_size); } size_type value_offset = storage_size(); @@ -1025,8 +1017,7 @@ class WorkStorage(value_ptr), reinterpret_cast(other_value_ptr)); @@ -1035,8 +1026,7 @@ class WorkStorage(m_array_begin + value_offset); + pointer value_ptr = reinterpret_cast(m_array_begin + value_offset); value_type::destroy(value_ptr); } }; diff --git a/include/RAJA/pattern/WorkGroup/WorkStruct.hpp b/include/RAJA/pattern/WorkGroup/WorkStruct.hpp index 72e1540c54..39a35c5176 100644 --- a/include/RAJA/pattern/WorkGroup/WorkStruct.hpp +++ b/include/RAJA/pattern/WorkGroup/WorkStruct.hpp @@ -18,11 +18,10 @@ #ifndef RAJA_PATTERN_WORKGROUP_WorkStruct_HPP #define RAJA_PATTERN_WORKGROUP_WorkStruct_HPP -#include "RAJA/config.hpp" - -#include #include +#include +#include "RAJA/config.hpp" #include "RAJA/pattern/WorkGroup/Dispatcher.hpp" @@ -35,7 +34,7 @@ namespace detail /*! * A struct that gives a generic way to layout memory for different loops */ -template < size_t size, typename Dispatcher_T > +template struct WorkStruct; /*! @@ -44,67 +43,74 @@ struct WorkStruct; * offsetof(GenericWorkStruct<>, obj) == offsetof(WorkStruct, obj) * sizeof(GenericWorkStruct) <= sizeof(WorkStruct) */ -template < typename Dispatcher_T > +template using GenericWorkStruct = WorkStruct; -template < size_t size, Platform platform, typename dispatch_policy, typename DispatcherID, typename ... CallArgs > -struct WorkStruct> -{ - using dispatcher_type = Dispatcher; +template +struct WorkStruct< + size, + Dispatcher> { + using dispatcher_type = + Dispatcher; // construct a WorkStruct with a value of type holder from the args and // check a variety of constraints at compile time - template < typename holder, typename ... holder_ctor_args > - static RAJA_INLINE - void construct(void* ptr, const dispatcher_type* dispatcher, holder_ctor_args&&... ctor_args) + template + static RAJA_INLINE void construct(void* ptr, + const dispatcher_type* dispatcher, + holder_ctor_args&&... ctor_args) { using true_value_type = WorkStruct; using value_type = GenericWorkStruct; static_assert(sizeof(holder) <= sizeof(true_value_type::obj), - "holder must fit in WorkStruct::obj"); + "holder must fit in WorkStruct::obj"); static_assert(std::is_standard_layout::value, - "WorkStruct must be a standard layout type"); + "WorkStruct must be a standard layout type"); static_assert(std::is_standard_layout::value, - "GenericWorkStruct must be a standard layout type"); + "GenericWorkStruct must be a standard layout type"); static_assert(offsetof(value_type, obj) == offsetof(true_value_type, obj), - "WorkStruct and GenericWorkStruct must have obj at the same offset"); + "WorkStruct and GenericWorkStruct must have obj at the same " + "offset"); static_assert(sizeof(value_type) <= sizeof(true_value_type), - "WorkStruct must not be smaller than GenericWorkStruct"); + "WorkStruct must not be smaller than GenericWorkStruct"); true_value_type* value_ptr = static_cast(ptr); value_ptr->dispatcher = dispatcher; value_ptr->invoke = dispatcher->invoke; - new(&value_ptr->obj) holder(std::forward(ctor_args)...); + new (&value_ptr->obj) holder(std::forward(ctor_args)...); } // move construct in dst from the value in src and destroy the value in src - static RAJA_INLINE - void move_destroy(WorkStruct* value_dst, - WorkStruct* value_src) + static RAJA_INLINE void move_destroy(WorkStruct* value_dst, + WorkStruct* value_src) { value_dst->dispatcher = value_src->dispatcher; value_dst->invoke = value_src->invoke; - value_dst->dispatcher->move_construct_destroy(&value_dst->obj, &value_src->obj); + value_dst->dispatcher->move_construct_destroy(&value_dst->obj, + &value_src->obj); } // destroy the value ptr - static RAJA_INLINE - void destroy(WorkStruct* value_ptr) + static RAJA_INLINE void destroy(WorkStruct* value_ptr) { value_ptr->dispatcher->destroy(&value_ptr->obj); } // invoke the call operator of the value ptr with args - static RAJA_INLINE - void host_call(const WorkStruct* value_ptr, CallArgs... args) + static RAJA_INLINE void host_call(const WorkStruct* value_ptr, + CallArgs... args) { value_ptr->invoke(&value_ptr->obj, std::forward(args)...); } /// // invoke the call operator of the value ptr with args - static RAJA_DEVICE RAJA_INLINE - void device_call(const WorkStruct* value_ptr, CallArgs... args) + static RAJA_DEVICE RAJA_INLINE void device_call(const WorkStruct* value_ptr, + CallArgs... args) { value_ptr->invoke(&value_ptr->obj, std::forward(args)...); } diff --git a/include/RAJA/pattern/atomic.hpp b/include/RAJA/pattern/atomic.hpp index d5905f7928..e0fbe00451 100644 --- a/include/RAJA/pattern/atomic.hpp +++ b/include/RAJA/pattern/atomic.hpp @@ -19,10 +19,8 @@ #define RAJA_pattern_atomic_HPP #include "RAJA/config.hpp" - #include "RAJA/policy/atomic_auto.hpp" #include "RAJA/policy/atomic_builtin.hpp" - #include "RAJA/util/macros.hpp" namespace RAJA @@ -317,22 +315,19 @@ class AtomicRef RAJA_INLINE RAJA_HOST_DEVICE - constexpr explicit AtomicRef(value_type *value_ptr) - : m_value_ptr(value_ptr) {} + constexpr explicit AtomicRef(value_type *value_ptr) : m_value_ptr(value_ptr) + { + } RAJA_INLINE RAJA_HOST_DEVICE - constexpr AtomicRef(AtomicRef const &c) - : m_value_ptr(c.m_value_ptr) {} + constexpr AtomicRef(AtomicRef const &c) : m_value_ptr(c.m_value_ptr) {} - AtomicRef& operator=(AtomicRef const&) = delete; + AtomicRef &operator=(AtomicRef const &) = delete; RAJA_INLINE RAJA_HOST_DEVICE - value_type * getPointer() const - { - return m_value_ptr; - } + value_type *getPointer() const { return m_value_ptr; } RAJA_INLINE RAJA_HOST_DEVICE @@ -351,17 +346,11 @@ class AtomicRef RAJA_INLINE RAJA_HOST_DEVICE - value_type load() const - { - return RAJA::atomicLoad(m_value_ptr); - } + value_type load() const { return RAJA::atomicLoad(m_value_ptr); } RAJA_INLINE RAJA_HOST_DEVICE - operator value_type() const - { - return RAJA::atomicLoad(m_value_ptr); - } + operator value_type() const { return RAJA::atomicLoad(m_value_ptr); } RAJA_INLINE RAJA_HOST_DEVICE @@ -379,7 +368,7 @@ class AtomicRef RAJA_INLINE RAJA_HOST_DEVICE - bool compare_exchange_strong(value_type& expect, value_type rhs) const + bool compare_exchange_strong(value_type &expect, value_type rhs) const { value_type compare = expect; value_type old = RAJA::atomicCAS(m_value_ptr, compare, rhs); @@ -393,7 +382,7 @@ class AtomicRef RAJA_INLINE RAJA_HOST_DEVICE - bool compare_exchange_weak(value_type& expect, value_type rhs) const + bool compare_exchange_weak(value_type &expect, value_type rhs) const { return this->compare_exchange_strong(expect, rhs); } diff --git a/include/RAJA/pattern/detail/algorithm.hpp b/include/RAJA/pattern/detail/algorithm.hpp index 21d266bd21..ff2fe33f00 100644 --- a/include/RAJA/pattern/detail/algorithm.hpp +++ b/include/RAJA/pattern/detail/algorithm.hpp @@ -20,12 +20,12 @@ #ifndef RAJA_pattern_detail_algorithm_HPP #define RAJA_pattern_detail_algorithm_HPP +#include + #include "RAJA/config.hpp" #include "RAJA/util/macros.hpp" #include "camp/helpers.hpp" -#include - namespace RAJA { @@ -49,16 +49,17 @@ using ContainerVal = camp::decay>())>; template -using ContainerRef = - decltype(*camp::val>()); +using ContainerRef = decltype(*camp::val>()); template using ContainerDiff = - camp::decay>()-camp::val>())>; + camp::decay>() - + camp::val>())>; template -RAJA_INLINE -DiffType firstIndex(DiffType n, CountType num_threads, CountType thread_id) +RAJA_INLINE DiffType firstIndex(DiffType n, + CountType num_threads, + CountType thread_id) { return (static_cast(n) * thread_id) / num_threads; } @@ -70,9 +71,7 @@ DiffType firstIndex(DiffType n, CountType num_threads, CountType thread_id) \brief swap values at iterators lhs and rhs */ template -RAJA_HOST_DEVICE RAJA_INLINE -void -safe_iter_swap(Iter lhs, Iter rhs) +RAJA_HOST_DEVICE RAJA_INLINE void safe_iter_swap(Iter lhs, Iter rhs) { #ifdef RAJA_GPU_DEVICE_COMPILE_PASS_ACTIVE using camp::safe_swap; @@ -87,9 +86,7 @@ safe_iter_swap(Iter lhs, Iter rhs) \brief returns iterator to next item */ template -RAJA_HOST_DEVICE RAJA_INLINE -Iter -next(Iter it) +RAJA_HOST_DEVICE RAJA_INLINE Iter next(Iter it) { ++it; return it; @@ -99,9 +96,7 @@ next(Iter it) \brief returns iterator to next item */ template -RAJA_HOST_DEVICE RAJA_INLINE -Iter -prev(Iter it) +RAJA_HOST_DEVICE RAJA_INLINE Iter prev(Iter it) { --it; return it; diff --git a/include/RAJA/pattern/detail/multi_reduce.hpp b/include/RAJA/pattern/detail/multi_reduce.hpp index 884b9aa989..1c8edcf3f2 100644 --- a/include/RAJA/pattern/detail/multi_reduce.hpp +++ b/include/RAJA/pattern/detail/multi_reduce.hpp @@ -19,39 +19,34 @@ #define RAJA_PATTERN_DETAIL_MULTI_REDUCE_HPP #include "RAJA/pattern/detail/forall.hpp" - -#include "RAJA/util/macros.hpp" #include "RAJA/util/Operators.hpp" -#include "RAJA/util/types.hpp" #include "RAJA/util/RepeatView.hpp" +#include "RAJA/util/macros.hpp" +#include "RAJA/util/types.hpp" -#define RAJA_DECLARE_MULTI_REDUCER(OP_NAME, OP, POL, DATA) \ - template \ - struct MultiReduce##OP_NAME, T> \ - : reduce::detail::BaseMultiReduce##OP_NAME< \ - DATA, tuning>> \ - { \ - using policy = POL; \ - using Base = reduce::detail::BaseMultiReduce##OP_NAME< \ - DATA, tuning>>; \ - using Base::Base; \ - using typename Base::value_type; \ - using typename Base::reference; \ - \ - RAJA_SUPPRESS_HD_WARN \ - RAJA_HOST_DEVICE \ - reference operator[](size_t bin) const \ - { \ - return reference(*this, bin); \ - } \ +#define RAJA_DECLARE_MULTI_REDUCER(OP_NAME, OP, POL, DATA) \ + template \ + struct MultiReduce##OP_NAME, T> \ + : reduce::detail::BaseMultiReduce##OP_NAME< \ + DATA, tuning>> { \ + using policy = POL; \ + using Base = reduce::detail::BaseMultiReduce##OP_NAME< \ + DATA, tuning>>; \ + using Base::Base; \ + using typename Base::value_type; \ + using typename Base::reference; \ + \ + RAJA_SUPPRESS_HD_WARN \ + RAJA_HOST_DEVICE \ + reference operator[](size_t bin) const { return reference(*this, bin); } \ }; -#define RAJA_DECLARE_ALL_MULTI_REDUCERS(POL, DATA) \ - RAJA_DECLARE_MULTI_REDUCER(Sum, sum, POL, DATA) \ - RAJA_DECLARE_MULTI_REDUCER(Min, min, POL, DATA) \ - RAJA_DECLARE_MULTI_REDUCER(Max, max, POL, DATA) \ - RAJA_DECLARE_MULTI_REDUCER(BitOr, or_bit, POL, DATA) \ +#define RAJA_DECLARE_ALL_MULTI_REDUCERS(POL, DATA) \ + RAJA_DECLARE_MULTI_REDUCER(Sum, sum, POL, DATA) \ + RAJA_DECLARE_MULTI_REDUCER(Min, min, POL, DATA) \ + RAJA_DECLARE_MULTI_REDUCER(Max, max, POL, DATA) \ + RAJA_DECLARE_MULTI_REDUCER(BitOr, or_bit, POL, DATA) \ RAJA_DECLARE_MULTI_REDUCER(BitAnd, and_bit, POL, DATA) namespace RAJA @@ -64,34 +59,40 @@ namespace detail { template -struct BaseMultiReduce -{ +struct BaseMultiReduce { using MultiReduceData = t_MultiReduceData; using MultiReduceOp = typename t_MultiReduceData::MultiReduceOp; using value_type = typename t_MultiReduceData::value_type; - BaseMultiReduce() : BaseMultiReduce{RepeatView(MultiReduceOp::identity(), 0)} {} + BaseMultiReduce() + : BaseMultiReduce{RepeatView(MultiReduceOp::identity(), 0)} + { + } explicit BaseMultiReduce(size_t num_bins, value_type init_val = MultiReduceOp::identity(), value_type identity = MultiReduceOp::identity()) : BaseMultiReduce{RepeatView(init_val, num_bins), identity} - { } + { + } - template < typename Container, - concepts::enable_if_t, - concepts::negate>, - concepts::negate>>* = nullptr > - explicit BaseMultiReduce(Container const& container, + template , + concepts::negate>, + concepts::negate>> + * = nullptr> + explicit BaseMultiReduce(Container const &container, value_type identity = MultiReduceOp::identity()) : data{container, identity} - { } + { + } RAJA_SUPPRESS_HD_WARN - BaseMultiReduce(BaseMultiReduce const&) = default; + BaseMultiReduce(BaseMultiReduce const &) = default; RAJA_SUPPRESS_HD_WARN BaseMultiReduce(BaseMultiReduce &&) = default; - BaseMultiReduce &operator=(BaseMultiReduce const&) = delete; + BaseMultiReduce &operator=(BaseMultiReduce const &) = delete; BaseMultiReduce &operator=(BaseMultiReduce &&) = delete; RAJA_SUPPRESS_HD_WARN ~BaseMultiReduce() = default; @@ -108,13 +109,13 @@ struct BaseMultiReduce reset(RepeatView(init_val, num_bins), identity); } - template < typename Container, - concepts::enable_if_t>* = nullptr > - void reset(Container const& container, + template > * = nullptr> + void reset(Container const &container, value_type identity = MultiReduceOp::identity()) { for (size_t bin = 0; bin < data.num_bins(); ++bin) { - RAJA_UNUSED_VAR(get(bin)); // automatic get() before reset + RAJA_UNUSED_VAR(get(bin)); // automatic get() before reset } data.reset(container, identity); } @@ -125,7 +126,7 @@ struct BaseMultiReduce RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE - BaseMultiReduce const& combine(size_t bin, value_type const &other) const + BaseMultiReduce const &combine(size_t bin, value_type const &other) const { data.combine(bin, other); return *this; @@ -135,16 +136,18 @@ struct BaseMultiReduce value_type get(size_t bin) const { return data.get(bin); } //! Get the calculated reduced value for each bin and store it in container - template < typename Container, - concepts::enable_if_t>* = nullptr > - void get_all(Container& container) const + template > * = nullptr> + void get_all(Container &container) const { RAJA_EXTRACT_BED_IT(container); if (size_t(distance_it) != data.num_bins()) { - RAJA_ABORT_OR_THROW("MultiReduce::get_all container has different size than multi reducer"); + RAJA_ABORT_OR_THROW( + "MultiReduce::get_all container has different size than multi " + "reducer"); } size_t bin = 0; - for (auto& val : container) { + for (auto &val : container) { val = data.get(bin); ++bin; } @@ -167,42 +170,39 @@ class BaseMultiReduceMin : public BaseMultiReduce { public: using Base = BaseMultiReduce; - using typename Base::value_type; using Base::Base; + using typename Base::value_type; RAJA_SUPPRESS_HD_WARN - BaseMultiReduceMin(BaseMultiReduceMin const&) = default; + BaseMultiReduceMin(BaseMultiReduceMin const &) = default; RAJA_SUPPRESS_HD_WARN BaseMultiReduceMin(BaseMultiReduceMin &&) = default; RAJA_SUPPRESS_HD_WARN - BaseMultiReduceMin &operator=(BaseMultiReduceMin const&) = delete; + BaseMultiReduceMin &operator=(BaseMultiReduceMin const &) = delete; RAJA_SUPPRESS_HD_WARN BaseMultiReduceMin &operator=(BaseMultiReduceMin &&) = delete; RAJA_SUPPRESS_HD_WARN ~BaseMultiReduceMin() = default; - struct reference - { + struct reference { RAJA_HOST_DEVICE - reference(BaseMultiReduceMin const& base, size_t bin) - : m_base(base), m_bin(bin) - { } + reference(BaseMultiReduceMin const &base, size_t bin) + : m_base(base), m_bin(bin) + { + } //! reducer function; updates the current instance's state RAJA_HOST_DEVICE - reference const& min(value_type rhs) const + reference const &min(value_type rhs) const { m_base.combine(m_bin, rhs); return *this; } - value_type get() const - { - return m_base.get(m_bin); - } + value_type get() const { return m_base.get(m_bin); } private: - BaseMultiReduceMin const& m_base; + BaseMultiReduceMin const &m_base; size_t m_bin; }; }; @@ -224,36 +224,33 @@ class BaseMultiReduceMax : public BaseMultiReduce using Base::Base; RAJA_SUPPRESS_HD_WARN - BaseMultiReduceMax(BaseMultiReduceMax const&) = default; + BaseMultiReduceMax(BaseMultiReduceMax const &) = default; RAJA_SUPPRESS_HD_WARN BaseMultiReduceMax(BaseMultiReduceMax &&) = default; - BaseMultiReduceMax &operator=(BaseMultiReduceMax const&) = delete; + BaseMultiReduceMax &operator=(BaseMultiReduceMax const &) = delete; BaseMultiReduceMax &operator=(BaseMultiReduceMax &&) = delete; RAJA_SUPPRESS_HD_WARN ~BaseMultiReduceMax() = default; - struct reference - { + struct reference { RAJA_HOST_DEVICE - reference(BaseMultiReduceMax const& base, size_t bin) - : m_base(base), m_bin(bin) - { } + reference(BaseMultiReduceMax const &base, size_t bin) + : m_base(base), m_bin(bin) + { + } //! reducer function; updates the current instance's state RAJA_HOST_DEVICE - reference const& max(value_type rhs) const + reference const &max(value_type rhs) const { m_base.combine(m_bin, rhs); return *this; } - value_type get() const - { - return m_base.get(m_bin); - } + value_type get() const { return m_base.get(m_bin); } private: - BaseMultiReduceMax const& m_base; + BaseMultiReduceMax const &m_base; size_t m_bin; }; }; @@ -275,36 +272,33 @@ class BaseMultiReduceSum : public BaseMultiReduce using Base::Base; RAJA_SUPPRESS_HD_WARN - BaseMultiReduceSum(BaseMultiReduceSum const&) = default; + BaseMultiReduceSum(BaseMultiReduceSum const &) = default; RAJA_SUPPRESS_HD_WARN BaseMultiReduceSum(BaseMultiReduceSum &&) = default; - BaseMultiReduceSum &operator=(BaseMultiReduceSum const&) = delete; + BaseMultiReduceSum &operator=(BaseMultiReduceSum const &) = delete; BaseMultiReduceSum &operator=(BaseMultiReduceSum &&) = delete; RAJA_SUPPRESS_HD_WARN ~BaseMultiReduceSum() = default; - struct reference - { + struct reference { RAJA_HOST_DEVICE - reference(BaseMultiReduceSum const& base, size_t bin) - : m_base(base), m_bin(bin) - { } + reference(BaseMultiReduceSum const &base, size_t bin) + : m_base(base), m_bin(bin) + { + } //! reducer function; updates the current instance's state RAJA_HOST_DEVICE - reference const& operator+=(value_type rhs) const + reference const &operator+=(value_type rhs) const { m_base.combine(m_bin, rhs); return *this; } - value_type get() const - { - return m_base.get(m_bin); - } + value_type get() const { return m_base.get(m_bin); } private: - BaseMultiReduceSum const& m_base; + BaseMultiReduceSum const &m_base; size_t m_bin; }; }; @@ -326,36 +320,33 @@ class BaseMultiReduceBitOr : public BaseMultiReduce using Base::Base; RAJA_SUPPRESS_HD_WARN - BaseMultiReduceBitOr(BaseMultiReduceBitOr const&) = default; + BaseMultiReduceBitOr(BaseMultiReduceBitOr const &) = default; RAJA_SUPPRESS_HD_WARN BaseMultiReduceBitOr(BaseMultiReduceBitOr &&) = default; - BaseMultiReduceBitOr &operator=(BaseMultiReduceBitOr const&) = delete; + BaseMultiReduceBitOr &operator=(BaseMultiReduceBitOr const &) = delete; BaseMultiReduceBitOr &operator=(BaseMultiReduceBitOr &&) = delete; RAJA_SUPPRESS_HD_WARN ~BaseMultiReduceBitOr() = default; - struct reference - { + struct reference { RAJA_HOST_DEVICE - reference(BaseMultiReduceBitOr const& base, size_t bin) - : m_base(base), m_bin(bin) - { } + reference(BaseMultiReduceBitOr const &base, size_t bin) + : m_base(base), m_bin(bin) + { + } //! reducer function; updates the current instance's state RAJA_HOST_DEVICE - reference const& operator|=(value_type rhs) const + reference const &operator|=(value_type rhs) const { m_base.combine(m_bin, rhs); return *this; } - value_type get() const - { - return m_base.get(m_bin); - } + value_type get() const { return m_base.get(m_bin); } private: - BaseMultiReduceBitOr const& m_base; + BaseMultiReduceBitOr const &m_base; size_t m_bin; }; }; @@ -377,36 +368,33 @@ class BaseMultiReduceBitAnd : public BaseMultiReduce using Base::Base; RAJA_SUPPRESS_HD_WARN - BaseMultiReduceBitAnd(BaseMultiReduceBitAnd const&) = default; + BaseMultiReduceBitAnd(BaseMultiReduceBitAnd const &) = default; RAJA_SUPPRESS_HD_WARN BaseMultiReduceBitAnd(BaseMultiReduceBitAnd &&) = default; - BaseMultiReduceBitAnd &operator=(BaseMultiReduceBitAnd const&) = delete; + BaseMultiReduceBitAnd &operator=(BaseMultiReduceBitAnd const &) = delete; BaseMultiReduceBitAnd &operator=(BaseMultiReduceBitAnd &&) = delete; RAJA_SUPPRESS_HD_WARN ~BaseMultiReduceBitAnd() = default; - struct reference - { + struct reference { RAJA_HOST_DEVICE - reference(BaseMultiReduceBitAnd const& base, size_t bin) - : m_base(base), m_bin(bin) - { } + reference(BaseMultiReduceBitAnd const &base, size_t bin) + : m_base(base), m_bin(bin) + { + } //! reducer function; updates the current instance's state RAJA_HOST_DEVICE - reference const& operator&=(value_type rhs) const + reference const &operator&=(value_type rhs) const { m_base.combine(m_bin, rhs); return *this; } - value_type get() const - { - return m_base.get(m_bin); - } + value_type get() const { return m_base.get(m_bin); } private: - BaseMultiReduceBitAnd const& m_base; + BaseMultiReduceBitAnd const &m_base; size_t m_bin; }; }; diff --git a/include/RAJA/pattern/detail/reduce.hpp b/include/RAJA/pattern/detail/reduce.hpp index 788f3c698d..0fa1369763 100644 --- a/include/RAJA/pattern/detail/reduce.hpp +++ b/include/RAJA/pattern/detail/reduce.hpp @@ -41,13 +41,13 @@ using Base::Base; \ }; -#define RAJA_DECLARE_ALL_REDUCERS(POL, COMBINER) \ - RAJA_DECLARE_REDUCER(Sum, POL, COMBINER) \ - RAJA_DECLARE_REDUCER(Min, POL, COMBINER) \ - RAJA_DECLARE_REDUCER(Max, POL, COMBINER) \ - RAJA_DECLARE_INDEX_REDUCER(MinLoc, POL, COMBINER) \ - RAJA_DECLARE_INDEX_REDUCER(MaxLoc, POL, COMBINER) \ - RAJA_DECLARE_REDUCER(BitOr, POL, COMBINER) \ +#define RAJA_DECLARE_ALL_REDUCERS(POL, COMBINER) \ + RAJA_DECLARE_REDUCER(Sum, POL, COMBINER) \ + RAJA_DECLARE_REDUCER(Min, POL, COMBINER) \ + RAJA_DECLARE_REDUCER(Max, POL, COMBINER) \ + RAJA_DECLARE_INDEX_REDUCER(MinLoc, POL, COMBINER) \ + RAJA_DECLARE_INDEX_REDUCER(MaxLoc, POL, COMBINER) \ + RAJA_DECLARE_REDUCER(BitOr, POL, COMBINER) \ RAJA_DECLARE_REDUCER(BitAnd, POL, COMBINER) namespace RAJA @@ -107,7 +107,8 @@ namespace detail { template ::value> -struct DefaultLoc {}; +struct DefaultLoc { +}; template struct DefaultLoc // any non-integral type @@ -116,8 +117,7 @@ struct DefaultLoc // any non-integral type }; template -struct DefaultLoc -{ +struct DefaultLoc { RAJA_HOST_DEVICE constexpr T value() const { return -1; } }; @@ -128,18 +128,30 @@ class ValueLoc T val = doing_min ? operators::limits::max() : operators::limits::min(); IndexType loc = DefaultLoc().value(); -#if __NVCC__ && defined(CUDART_VERSION) && CUDART_VERSION < 9020 || defined(__HIPCC__) +#if __NVCC__ && defined(CUDART_VERSION) && CUDART_VERSION < 9020 || \ + defined(__HIPCC__) RAJA_HOST_DEVICE constexpr ValueLoc() {} - RAJA_HOST_DEVICE constexpr ValueLoc(ValueLoc const &other) : val{other.val}, loc{other.loc} {} + RAJA_HOST_DEVICE constexpr ValueLoc(ValueLoc const &other) + : val{other.val}, loc{other.loc} + { + } RAJA_HOST_DEVICE - ValueLoc &operator=(ValueLoc const &other) { val = other.val; loc = other.loc; return *this;} + ValueLoc &operator=(ValueLoc const &other) + { + val = other.val; + loc = other.loc; + return *this; + } #else constexpr ValueLoc() = default; constexpr ValueLoc(ValueLoc const &) = default; ValueLoc &operator=(ValueLoc const &) = default; #endif - RAJA_HOST_DEVICE constexpr ValueLoc(T const &val_) : val{val_}, loc{DefaultLoc().value()} {} + RAJA_HOST_DEVICE constexpr ValueLoc(T const &val_) + : val{val_}, loc{DefaultLoc().value()} + { + } RAJA_HOST_DEVICE constexpr ValueLoc(T const &val_, IndexType const &loc_) : val{val_}, loc{loc_} { @@ -165,13 +177,15 @@ namespace operators { template struct limits<::RAJA::reduce::detail::ValueLoc> { - RAJA_INLINE RAJA_HOST_DEVICE static constexpr - ::RAJA::reduce::detail::ValueLoc min() + RAJA_INLINE RAJA_HOST_DEVICE static constexpr ::RAJA::reduce::detail:: + ValueLoc + min() { return ::RAJA::reduce::detail::ValueLoc(limits::min()); } - RAJA_INLINE RAJA_HOST_DEVICE static constexpr - ::RAJA::reduce::detail::ValueLoc max() + RAJA_INLINE RAJA_HOST_DEVICE static constexpr ::RAJA::reduce::detail:: + ValueLoc + max() { return ::RAJA::reduce::detail::ValueLoc(limits::max()); } @@ -215,7 +229,7 @@ class BaseReduce RAJA_HOST_DEVICE void reset(T val, T identity_ = Reduce::identity()) { - operator T(); // automatic get() before reset + operator T(); // automatic get() before reset c.reset(val, identity_); } @@ -350,7 +364,10 @@ class BaseReduceMin : public BaseReduce * ************************************************************************** */ -template class Combiner> +template + class Combiner> class BaseReduceMinLoc : public BaseReduce, RAJA::reduce::min, Combiner> { @@ -362,19 +379,24 @@ class BaseReduceMinLoc constexpr BaseReduceMinLoc() : Base(value_type(T(), IndexType())) {} - constexpr BaseReduceMinLoc(T init_val, IndexType init_idx, - T identity_val_ = reduce_type::identity(), - IndexType identity_loc_ = DefaultLoc().value()) - : Base(value_type(init_val, init_idx), value_type(identity_val_, identity_loc_)) + constexpr BaseReduceMinLoc( + T init_val, + IndexType init_idx, + T identity_val_ = reduce_type::identity(), + IndexType identity_loc_ = DefaultLoc().value()) + : Base(value_type(init_val, init_idx), + value_type(identity_val_, identity_loc_)) { } - void reset(T init_val, IndexType init_idx, + void reset(T init_val, + IndexType init_idx, T identity_val_ = reduce_type::identity(), IndexType identity_loc_ = DefaultLoc().value()) { - operator T(); // automatic get() before reset - Base::reset(value_type(init_val, init_idx), value_type(identity_val_, identity_loc_)); + operator T(); // automatic get() before reset + Base::reset(value_type(init_val, init_idx), + value_type(identity_val_, identity_loc_)); } /// \brief reducer function; updates the current instance's state @@ -495,31 +517,41 @@ class BaseReduceBitAnd : public BaseReduce * ************************************************************************** */ -template class Combiner> -class BaseReduceMaxLoc - : public BaseReduce, RAJA::reduce::max, Combiner> +template + class Combiner> +class BaseReduceMaxLoc : public BaseReduce, + RAJA::reduce::max, + Combiner> { public: - using Base = BaseReduce, RAJA::reduce::max, Combiner>; + using Base = + BaseReduce, RAJA::reduce::max, Combiner>; using value_type = typename Base::value_type; using reduce_type = typename Base::reduce_type; using Base::Base; constexpr BaseReduceMaxLoc() : Base(value_type(T(), IndexType())) {} - constexpr BaseReduceMaxLoc(T init_val, IndexType init_idx, - T identity_val_ = reduce_type::identity(), - IndexType identity_loc_ = DefaultLoc().value()) - : Base(value_type(init_val, init_idx), value_type(identity_val_, identity_loc_)) + constexpr BaseReduceMaxLoc( + T init_val, + IndexType init_idx, + T identity_val_ = reduce_type::identity(), + IndexType identity_loc_ = DefaultLoc().value()) + : Base(value_type(init_val, init_idx), + value_type(identity_val_, identity_loc_)) { } - void reset(T init_val, IndexType init_idx, + void reset(T init_val, + IndexType init_idx, T identity_val_ = reduce_type::identity(), IndexType identity_loc_ = DefaultLoc().value()) { - operator T(); // automatic get() before reset - Base::reset(value_type(init_val, init_idx), value_type(identity_val_, identity_loc_)); + operator T(); // automatic get() before reset + Base::reset(value_type(init_val, init_idx), + value_type(identity_val_, identity_loc_)); } //! reducer function; updates the current instance's state diff --git a/include/RAJA/pattern/forall.hpp b/include/RAJA/pattern/forall.hpp index e75cc43af7..bcd8afd8c6 100644 --- a/include/RAJA/pattern/forall.hpp +++ b/include/RAJA/pattern/forall.hpp @@ -52,36 +52,27 @@ #ifndef RAJA_forall_generic_HPP #define RAJA_forall_generic_HPP -#include "RAJA/config.hpp" - #include #include #include -#include "RAJA/internal/Iterators.hpp" - -#include "RAJA/policy/PolicyBase.hpp" -#include "RAJA/policy/MultiPolicy.hpp" - +#include "RAJA/config.hpp" #include "RAJA/index/IndexSet.hpp" #include "RAJA/index/ListSegment.hpp" #include "RAJA/index/RangeSegment.hpp" - +#include "RAJA/internal/Iterators.hpp" #include "RAJA/internal/fault_tolerance.hpp" - -#include "RAJA/util/concepts.hpp" -#include "RAJA/util/Span.hpp" -#include "RAJA/util/types.hpp" - -#include "RAJA/policy/sequential/forall.hpp" - +#include "RAJA/internal/get_platform.hpp" #include "RAJA/pattern/detail/forall.hpp" #include "RAJA/pattern/detail/privatizer.hpp" - -#include "RAJA/internal/get_platform.hpp" +#include "RAJA/policy/MultiPolicy.hpp" +#include "RAJA/policy/PolicyBase.hpp" +#include "RAJA/policy/sequential/forall.hpp" +#include "RAJA/util/Span.hpp" +#include "RAJA/util/concepts.hpp" #include "RAJA/util/plugins.hpp" - #include "RAJA/util/resource.hpp" +#include "RAJA/util/types.hpp" namespace RAJA { @@ -120,15 +111,31 @@ struct icount_adapter { }; struct CallForall { - template - RAJA_INLINE camp::resources::EventProxy operator()(T const&, ExecPol, Body, Res, ForallParams) const; + template + RAJA_INLINE camp::resources::EventProxy operator()(T const&, + ExecPol, + Body, + Res, + ForallParams) const; }; struct CallForallIcount { constexpr CallForallIcount(int s); - template - RAJA_INLINE camp::resources::EventProxy operator()(T const&, ExecPol, Body, Res, ForallParams) const; + template + RAJA_INLINE camp::resources::EventProxy operator()(T const&, + ExecPol, + Body, + Res, + ForallParams) const; const int start; }; @@ -152,12 +159,20 @@ namespace wrap * ****************************************************************************** */ -template +template RAJA_INLINE concepts::enable_if_t< RAJA::resources::EventProxy, concepts::negate>, type_traits::is_range> -forall(Res r, ExecutionPolicy&& p, Container&& c, LoopBody&& loop_body, ForallParams&& f_params) +forall(Res r, + ExecutionPolicy&& p, + Container&& c, + LoopBody&& loop_body, + ForallParams&& f_params) { RAJA_FORCEINLINE_RECURSIVE return forall_impl(r, @@ -167,7 +182,10 @@ forall(Res r, ExecutionPolicy&& p, Container&& c, LoopBody&& loop_body, ForallPa std::forward(f_params)); } -template +template RAJA_INLINE concepts::enable_if_t< RAJA::resources::EventProxy, concepts::negate>, @@ -197,11 +215,11 @@ template RAJA_INLINE resources::EventProxy forall_Icount(Res r, - ExecutionPolicy&& p, - Container&& c, - IndexType&& icount, - LoopBody&& loop_body, - ForallParams&& f_params) + ExecutionPolicy&& p, + Container&& c, + IndexType&& icount, + LoopBody&& loop_body, + ForallParams&& f_params) { using std::begin; using std::distance; @@ -212,7 +230,11 @@ RAJA_INLINE resources::EventProxy forall_Icount(Res r, icount); using policy::sequential::forall_impl; RAJA_FORCEINLINE_RECURSIVE - return forall_impl(r, std::forward(p), range, adapted, std::forward(f_params)); + return forall_impl(r, + std::forward(p), + range, + adapted, + std::forward(f_params)); } /*! @@ -230,15 +252,16 @@ template -RAJA_INLINE resources::EventProxy forall_Icount(Res r, - ExecPolicy, - const TypedIndexSet& iset, - LoopBody loop_body, - ForallParams f_params) +RAJA_INLINE resources::EventProxy forall_Icount( + Res r, + ExecPolicy, + const TypedIndexSet& iset, + LoopBody loop_body, + ForallParams f_params) { // no need for icount variant here - auto segIterRes = resources::get_resource::type::get_default(); + auto segIterRes = + resources::get_resource::type::get_default(); wrap::forall(segIterRes, SegmentIterPolicy(), iset, [=, &r](int segID) { iset.segmentCall(segID, detail::CallForallIcount(iset.getStartingIcount(segID)), @@ -256,16 +279,22 @@ template -RAJA_INLINE resources::EventProxy forall(Res r, - ExecPolicy, - const TypedIndexSet& iset, - LoopBody loop_body, - ForallParams f_params) +RAJA_INLINE resources::EventProxy forall( + Res r, + ExecPolicy, + const TypedIndexSet& iset, + LoopBody loop_body, + ForallParams f_params) { - auto segIterRes = resources::get_resource::type::get_default(); + auto segIterRes = + resources::get_resource::type::get_default(); wrap::forall(segIterRes, SegmentIterPolicy(), iset, [=, &r](int segID) { - iset.segmentCall(segID, detail::CallForall{}, SegmentExecPolicy(), loop_body, r, f_params); + iset.segmentCall(segID, + detail::CallForall{}, + SegmentExecPolicy(), + loop_body, + r, + f_params); }); return RAJA::resources::EventProxy(r); } @@ -273,13 +302,12 @@ RAJA_INLINE resources::EventProxy forall(Res r, } // end namespace wrap - /*! ****************************************************************************** * - * \brief The RAJA::policy_by_value_interface forall functions provide an interface with - * value-based policies. It also enforces the interface and performs - * static checks as well as triggering plugins and loop body updates. + * \brief The RAJA::policy_by_value_interface forall functions provide an + *interface with value-based policies. It also enforces the interface and + *performs static checks as well as triggering plugins and loop body updates. * ****************************************************************************** */ @@ -294,7 +322,10 @@ inline namespace policy_by_value_interface * ****************************************************************************** */ -template +template RAJA_INLINE resources::EventProxy forall_Icount(ExecutionPolicy&& p, Res r, IdxSet&& c, @@ -306,9 +337,10 @@ RAJA_INLINE resources::EventProxy forall_Icount(ExecutionPolicy&& p, auto f_params = expt::make_forall_param_pack(std::forward(params)...); auto&& loop_body = expt::get_lambda(std::forward(params)...); - //expt::check_forall_optional_args(loop_body, f_params); + // expt::check_forall_optional_args(loop_body, f_params); - util::PluginContext context{util::make_context>()}; + util::PluginContext context{ + util::make_context>()}; util::callPreCapturePlugins(context); using RAJA::util::trigger_updates_before; @@ -318,18 +350,21 @@ RAJA_INLINE resources::EventProxy forall_Icount(ExecutionPolicy&& p, util::callPreLaunchPlugins(context); - RAJA::resources::EventProxy e = wrap::forall_Icount( - r, - std::forward(p), - std::forward(c), - std::move(body), - f_params); + RAJA::resources::EventProxy e = + wrap::forall_Icount(r, + std::forward(p), + std::forward(c), + std::move(body), + f_params); util::callPostLaunchPlugins(context); return e; } -template ::type > +template < + typename ExecutionPolicy, + typename IdxSet, + typename LoopBody, + typename Res = typename resources::get_resource::type> RAJA_INLINE resources::EventProxy forall_Icount(ExecutionPolicy&& p, IdxSet&& c, LoopBody&& loop_body) @@ -349,7 +384,10 @@ RAJA_INLINE resources::EventProxy forall_Icount(ExecutionPolicy&& p, * ****************************************************************************** */ -template +template RAJA_INLINE concepts::enable_if_t< resources::EventProxy, type_traits::is_indexset_policy> @@ -363,7 +401,8 @@ forall(ExecutionPolicy&& p, Res r, IdxSet&& c, Params&&... params) auto&& loop_body = expt::get_lambda(std::forward(params)...); expt::check_forall_optional_args(loop_body, f_params); - util::PluginContext context{util::make_context>()}; + util::PluginContext context{ + util::make_context>()}; util::callPreCapturePlugins(context); using RAJA::util::trigger_updates_before; @@ -373,18 +412,20 @@ forall(ExecutionPolicy&& p, Res r, IdxSet&& c, Params&&... params) util::callPreLaunchPlugins(context); - resources::EventProxy e = wrap::forall( - r, - std::forward(p), - std::forward(c), - std::move(body), - f_params); + resources::EventProxy e = wrap::forall(r, + std::forward(p), + std::forward(c), + std::move(body), + f_params); util::callPostLaunchPlugins(context); return e; } -template ::type > +template < + typename ExecutionPolicy, + typename IdxSet, + typename LoopBody, + typename Res = typename resources::get_resource::type> RAJA_INLINE concepts::enable_if_t< resources::EventProxy, type_traits::is_indexset_policy> @@ -405,12 +446,14 @@ forall(ExecutionPolicy&& p, IdxSet&& c, LoopBody&& loop_body) * ****************************************************************************** */ -template ::type > -RAJA_INLINE concepts::enable_if_t< - resources::EventProxy, - type_traits::is_multi_policy, - type_traits::is_range> +template < + typename ExecutionPolicy, + typename Container, + typename LoopBody, + typename Res = typename resources::get_resource::type> +RAJA_INLINE concepts::enable_if_t, + type_traits::is_multi_policy, + type_traits::is_range> forall(ExecutionPolicy&& p, Container&& c, LoopBody&& loop_body) { static_assert(type_traits::is_random_access_range::value, @@ -420,9 +463,9 @@ forall(ExecutionPolicy&& p, Container&& c, LoopBody&& loop_body) // plugins handled in multipolicy policy_invoker return forall_impl(r, - std::forward(p), - std::forward(c), - std::forward(loop_body)); + std::forward(p), + std::forward(c), + std::forward(loop_body)); } /*! @@ -438,10 +481,9 @@ template -RAJA_INLINE concepts::enable_if_t< - resources::EventProxy, - type_traits::is_range, - type_traits::is_integral> +RAJA_INLINE concepts::enable_if_t, + type_traits::is_range, + type_traits::is_integral> forall_Icount(ExecutionPolicy&& p, Res r, Container&& c, @@ -452,11 +494,14 @@ forall_Icount(ExecutionPolicy&& p, static_assert(type_traits::is_random_access_range::value, "Container does not model RandomAccessIterator"); - auto f_params = expt::make_forall_param_pack(std::forward(first), std::forward(params)...); - auto&& loop_body = expt::get_lambda(std::forward(first), std::forward(params)...); - //expt::check_forall_optional_args(loop_body, f_params); + auto f_params = expt::make_forall_param_pack(std::forward(first), + std::forward(params)...); + auto&& loop_body = expt::get_lambda(std::forward(first), + std::forward(params)...); + // expt::check_forall_optional_args(loop_body, f_params); - util::PluginContext context{util::make_context>()}; + util::PluginContext context{ + util::make_context>()}; util::callPreCapturePlugins(context); using RAJA::util::trigger_updates_before; @@ -466,22 +511,23 @@ forall_Icount(ExecutionPolicy&& p, util::callPreLaunchPlugins(context); - resources::EventProxy e = wrap::forall_Icount( - r, - std::forward(p), - std::forward(c), - icount, - std::move(body), - f_params); + resources::EventProxy e = + wrap::forall_Icount(r, + std::forward(p), + std::forward(c), + icount, + std::move(body), + f_params); util::callPostLaunchPlugins(context); return e; } -template ::type > +template < + typename ExecutionPolicy, + typename Container, + typename IndexType, + typename LoopBody, + typename Res = typename resources::get_resource::type> RAJA_INLINE concepts::enable_if_t< resources::EventProxy, type_traits::is_range, @@ -509,7 +555,10 @@ forall_Icount(ExecutionPolicy&& p, ****************************************************************************** */ -template +template RAJA_INLINE concepts::enable_if_t< resources::EventProxy, concepts::negate>, @@ -524,7 +573,8 @@ forall(ExecutionPolicy&& p, Res r, Container&& c, Params&&... params) auto&& loop_body = expt::get_lambda(std::forward(params)...); expt::check_forall_optional_args(loop_body, f_params); - util::PluginContext context{util::make_context>()}; + util::PluginContext context{ + util::make_context>()}; util::callPreCapturePlugins(context); using RAJA::util::trigger_updates_before; @@ -534,19 +584,21 @@ forall(ExecutionPolicy&& p, Res r, Container&& c, Params&&... params) util::callPreLaunchPlugins(context); - resources::EventProxy e = wrap::forall( - r, - std::forward(p), - std::forward(c), - std::move(body), - f_params); + resources::EventProxy e = wrap::forall(r, + std::forward(p), + std::forward(c), + std::move(body), + f_params); util::callPostLaunchPlugins(context); return e; } -template ::type > +template < + typename ExecutionPolicy, + typename Container, + typename LoopBody, + typename Res = typename resources::get_resource::type> RAJA_INLINE concepts::enable_if_t< resources::EventProxy, concepts::negate>, @@ -562,7 +614,7 @@ forall(ExecutionPolicy&& p, Container&& c, LoopBody&& loop_body) std::forward(loop_body)); } -} // end inline namespace policy_by_value_interface +} // namespace policy_by_value_interface /*! @@ -570,20 +622,25 @@ forall(ExecutionPolicy&& p, Container&& c, LoopBody&& loop_body) * * this reduces implementation overhead and perfectly forwards all arguments */ -template ::type > +template < + typename ExecutionPolicy, + typename... Args, + typename Res = typename resources::get_resource::type> RAJA_INLINE resources::EventProxy forall(Args&&... args) { Res r = Res::get_default(); - return ::RAJA::policy_by_value_interface::forall( - ExecutionPolicy(), r, std::forward(args)...); + return ::RAJA::policy_by_value_interface::forall(ExecutionPolicy(), + r, + std::forward(args)...); } template -RAJA_INLINE concepts::enable_if_t, type_traits::is_resource> +RAJA_INLINE concepts::enable_if_t, + type_traits::is_resource> forall(Res r, Args&&... args) { - return ::RAJA::policy_by_value_interface::forall( - ExecutionPolicy(), r, std::forward(args)...); + return ::RAJA::policy_by_value_interface::forall(ExecutionPolicy(), + r, + std::forward(args)...); } /*! @@ -592,8 +649,10 @@ forall(Res r, Args&&... args) * * this reduces implementation overhead and perfectly forwards all arguments */ -template ::type > +template < + typename ExecutionPolicy, + typename... Args, + typename Res = typename resources::get_resource::type> RAJA_INLINE resources::EventProxy forall_Icount(Args&&... args) { Res r = Res::get_default(); @@ -601,7 +660,8 @@ RAJA_INLINE resources::EventProxy forall_Icount(Args&&... args) ExecutionPolicy(), r, std::forward(args)...); } template -RAJA_INLINE concepts::enable_if_t, type_traits::is_resource> +RAJA_INLINE concepts::enable_if_t, + type_traits::is_resource> forall_Icount(Res r, Args&&... args) { return ::RAJA::policy_by_value_interface::forall_Icount( @@ -611,12 +671,17 @@ forall_Icount(Res r, Args&&... args) namespace detail { -template -RAJA_INLINE camp::resources::EventProxy CallForall::operator()(T const& segment, - ExecutionPolicy, - LoopBody body, - Res r, - ForallParams f_params) const +template +RAJA_INLINE camp::resources::EventProxy CallForall::operator()( + T const& segment, + ExecutionPolicy, + LoopBody body, + Res r, + ForallParams f_params) const { // this is only called inside a region, use impl using policy::sequential::forall_impl; @@ -626,15 +691,21 @@ RAJA_INLINE camp::resources::EventProxy CallForall::operator()(T const& seg constexpr CallForallIcount::CallForallIcount(int s) : start(s) {} -template -RAJA_INLINE camp::resources::EventProxy CallForallIcount::operator()(T const& segment, - ExecutionPolicy, - LoopBody body, - Res r, - ForallParams f_params) const +template +RAJA_INLINE camp::resources::EventProxy CallForallIcount::operator()( + T const& segment, + ExecutionPolicy, + LoopBody body, + Res r, + ForallParams f_params) const { // go through wrap to unwrap icount - return wrap::forall_Icount(r, ExecutionPolicy(), segment, start, body, f_params); + return wrap::forall_Icount( + r, ExecutionPolicy(), segment, start, body, f_params); } } // namespace detail @@ -647,98 +718,112 @@ RAJA_INLINE camp::resources::EventProxy CallForallIcount::operator()(T cons // - Returns a generic event proxy only if a resource is provided // avoids overhead of constructing a typed erased resource // -template -struct dynamic_helper -{ - template - static void invoke_forall(const int pol, SEGMENT const &seg, PARAMS&&... params) +template +struct dynamic_helper { + template + static void invoke_forall(const int pol, + SEGMENT const& seg, + PARAMS&&... params) { - if(IDX==pol){ - using t_pol = typename camp::at>::type; + if (IDX == pol) { + using t_pol = typename camp::at>::type; RAJA::forall(seg, params...); return; } - dynamic_helper::invoke_forall(pol, seg, params...); + dynamic_helper::invoke_forall(pol, seg, params...); } - template - static resources::EventProxy - invoke_forall(RAJA::resources::Resource r, const int pol, SEGMENT const &seg, PARAMS&&... params) + template + static resources::EventProxy invoke_forall( + RAJA::resources::Resource r, + const int pol, + SEGMENT const& seg, + PARAMS&&... params) { - using t_pol = typename camp::at>::type; + using t_pol = typename camp::at>::type; using resource_type = typename resources::get_resource::type; - if(IDX==pol){ + if (IDX == pol) { RAJA::forall(r.get(), seg, params...); - //Return a generic event proxy from r, - //because forall returns a typed event proxy + // Return a generic event proxy from r, + // because forall returns a typed event proxy return {r}; } - return dynamic_helper::invoke_forall(r, pol, seg, params...); + return dynamic_helper::invoke_forall(r, + pol, + seg, + params...); } - }; -template -struct dynamic_helper<0, POLICY_LIST> -{ - template - static void - invoke_forall(const int pol, SEGMENT const &seg, PARAMS&&... params) +template +struct dynamic_helper<0, POLICY_LIST> { + template + static void invoke_forall(const int pol, + SEGMENT const& seg, + PARAMS&&... params) { - if(0==pol){ - using t_pol = typename camp::at>::type; + if (0 == pol) { + using t_pol = typename camp::at>::type; RAJA::forall(seg, params...); return; } RAJA_ABORT_OR_THROW("Policy enum not supported "); } - template - static resources::EventProxy - invoke_forall(RAJA::resources::Resource r, const int pol, SEGMENT const &seg, PARAMS&&... params) + template + static resources::EventProxy invoke_forall( + RAJA::resources::Resource r, + const int pol, + SEGMENT const& seg, + PARAMS&&... params) { - if(pol != 0) RAJA_ABORT_OR_THROW("Policy value out of range "); + if (pol != 0) RAJA_ABORT_OR_THROW("Policy value out of range "); - using t_pol = typename camp::at>::type; + using t_pol = typename camp::at>::type; using resource_type = typename resources::get_resource::type; RAJA::forall(r.get(), seg, params...); - //Return a generic event proxy from r, - //because forall returns a typed event proxy + // Return a generic event proxy from r, + // because forall returns a typed event proxy return {r}; } - }; -template -void dynamic_forall(const int pol, SEGMENT const &seg, PARAMS&&... params) +template +void dynamic_forall(const int pol, SEGMENT const& seg, PARAMS&&... params) { constexpr int N = camp::size::value; static_assert(N > 0, "RAJA policy list must not be empty"); - if(pol > N-1) { + if (pol > N - 1) { RAJA_ABORT_OR_THROW("Policy enum not supported"); } - dynamic_helper::invoke_forall(pol, seg, params...); + dynamic_helper::invoke_forall(pol, seg, params...); } -template -resources::EventProxy -dynamic_forall(RAJA::resources::Resource r, const int pol, SEGMENT const &seg, PARAMS&&... params) +template +resources::EventProxy dynamic_forall( + RAJA::resources::Resource r, + const int pol, + SEGMENT const& seg, + PARAMS&&... params) { constexpr int N = camp::size::value; static_assert(N > 0, "RAJA policy list must not be empty"); - if(pol > N-1) { + if (pol > N - 1) { RAJA_ABORT_OR_THROW("Policy value out of range"); } - return dynamic_helper::invoke_forall(r, pol, seg, params...); + return dynamic_helper::invoke_forall(r, + pol, + seg, + params...); } diff --git a/include/RAJA/pattern/kernel.hpp b/include/RAJA/pattern/kernel.hpp index 1875fe27d9..df045e3440 100644 --- a/include/RAJA/pattern/kernel.hpp +++ b/include/RAJA/pattern/kernel.hpp @@ -19,19 +19,15 @@ #define RAJA_pattern_kernel_HPP #include "RAJA/config.hpp" - #include "RAJA/internal/get_platform.hpp" +#include "RAJA/pattern/kernel/internal.hpp" +#include "RAJA/util/macros.hpp" #include "RAJA/util/plugins.hpp" - +#include "RAJA/util/types.hpp" #include "camp/camp.hpp" #include "camp/concepts.hpp" #include "camp/tuple.hpp" -#include "RAJA/util/macros.hpp" -#include "RAJA/util/types.hpp" - -#include "RAJA/pattern/kernel/internal.hpp" - namespace RAJA { @@ -57,9 +53,8 @@ struct IterableWrapperTuple; template struct IterableWrapperTuple> { - using type = - camp::tuple::iterator, - typename camp::decay::IndexType>...>; + using type = camp::tuple::iterator, + typename camp::decay::IndexType>...>; }; @@ -75,12 +70,12 @@ RAJA_INLINE constexpr auto make_wrapped_tuple_impl(Tuple &&t, camp::tuple_element_t>>::IndexType>...> { return camp::make_tuple( - RAJA::Span< - typename camp::decay< - camp::tuple_element_t>>::iterator, - typename camp::decay>>:: - IndexType>{camp::get(std::forward(t)).begin(), - camp::get(std::forward(t)).end()}...); + RAJA::Span>>::iterator, + typename camp::decay< + camp::tuple_element_t>>::IndexType>{ + camp::get(std::forward(t)).begin(), + camp::get(std::forward(t)).end()}...); } } // namespace internal @@ -101,10 +96,11 @@ template -RAJA_INLINE resources::EventProxy kernel_param_resource(SegmentTuple &&segments, - ParamTuple &¶ms, - Resource resource, - Bodies &&... bodies) +RAJA_INLINE resources::EventProxy kernel_param_resource( + SegmentTuple &&segments, + ParamTuple &¶ms, + Resource resource, + Bodies &&...bodies) { util::PluginContext context{util::make_context()}; @@ -133,9 +129,9 @@ RAJA_INLINE resources::EventProxy kernel_param_resource(SegmentTuple & // and only copied to provide thread-private instances. loop_data_t loop_data(make_wrapped_tuple( std::forward(segments)), - std::forward(params), - resource, - std::forward(bodies)...); + std::forward(params), + resource, + std::forward(bodies)...); util::callPostCapturePlugins(context); @@ -156,40 +152,43 @@ template -RAJA_INLINE resources::EventProxy kernel_resource(SegmentTuple &&segments, - Resource resource, - Bodies &&... bodies) +RAJA_INLINE resources::EventProxy kernel_resource( + SegmentTuple &&segments, + Resource resource, + Bodies &&...bodies) { - return RAJA::kernel_param_resource(std::forward(segments), - RAJA::make_tuple(), - resource, - std::forward(bodies)...); + return RAJA::kernel_param_resource( + std::forward(segments), + RAJA::make_tuple(), + resource, + std::forward(bodies)...); } template -RAJA_INLINE resources::EventProxy> kernel_param(SegmentTuple &&segments, - ParamTuple &¶ms, - Bodies &&... bodies) +RAJA_INLINE resources::EventProxy> +kernel_param(SegmentTuple &&segments, ParamTuple &¶ms, Bodies &&...bodies) { auto res = resources::get_default_resource(); - return RAJA::kernel_param_resource(std::forward(segments), - std::forward(params), - res, - std::forward(bodies)...); + return RAJA::kernel_param_resource( + std::forward(segments), + std::forward(params), + res, + std::forward(bodies)...); } template -RAJA_INLINE resources::EventProxy> kernel(SegmentTuple &&segments, - Bodies &&... bodies) +RAJA_INLINE resources::EventProxy> +kernel(SegmentTuple &&segments, Bodies &&...bodies) { auto res = resources::get_default_resource(); - return RAJA::kernel_param_resource(std::forward(segments), - RAJA::make_tuple(), - res, - std::forward(bodies)...); + return RAJA::kernel_param_resource( + std::forward(segments), + RAJA::make_tuple(), + res, + std::forward(bodies)...); } diff --git a/include/RAJA/pattern/kernel/Conditional.hpp b/include/RAJA/pattern/kernel/Conditional.hpp index 6b7875c4c2..77eaef8d80 100644 --- a/include/RAJA/pattern/kernel/Conditional.hpp +++ b/include/RAJA/pattern/kernel/Conditional.hpp @@ -19,13 +19,12 @@ #define RAJA_pattern_kernel_Conditional_HPP -#include "RAJA/config.hpp" - -#include "RAJA/pattern/kernel/internal.hpp" - #include #include +#include "RAJA/config.hpp" +#include "RAJA/pattern/kernel/internal.hpp" + namespace RAJA { namespace statement diff --git a/include/RAJA/pattern/kernel/For.hpp b/include/RAJA/pattern/kernel/For.hpp index 539c451673..9ca9a80ed0 100644 --- a/include/RAJA/pattern/kernel/For.hpp +++ b/include/RAJA/pattern/kernel/For.hpp @@ -18,11 +18,10 @@ #ifndef RAJA_pattern_kernel_For_HPP #define RAJA_pattern_kernel_For_HPP -#include "RAJA/config.hpp" - #include #include +#include "RAJA/config.hpp" #include "RAJA/pattern/kernel/internal.hpp" namespace RAJA @@ -59,7 +58,10 @@ namespace internal * Assigns the loop index to offset ArgumentId * */ -template +template struct ForWrapper : public GenericWrapper { using Base = GenericWrapper; @@ -85,7 +87,8 @@ template struct StatementExecutor< - statement::For, Types> { + statement::For, + Types> { template @@ -103,7 +106,11 @@ struct StatementExecutor< auto r = data.res; - forall_impl(r, ExecPolicy{}, TypedRangeSegment(0, len), for_wrapper, RAJA::expt::get_empty_forall_param_pack()); + forall_impl(r, + ExecPolicy{}, + TypedRangeSegment(0, len), + for_wrapper, + RAJA::expt::get_empty_forall_param_pack()); } }; @@ -112,11 +119,9 @@ struct StatementExecutor< * * */ -template -struct StatementExecutor< - statement::For, Types> { +template +struct StatementExecutor, + Types> { template diff --git a/include/RAJA/pattern/kernel/ForICount.hpp b/include/RAJA/pattern/kernel/ForICount.hpp index 18515c7f59..e4b2b5b44c 100644 --- a/include/RAJA/pattern/kernel/ForICount.hpp +++ b/include/RAJA/pattern/kernel/ForICount.hpp @@ -18,13 +18,12 @@ #ifndef RAJA_pattern_kernel_ForICount_HPP #define RAJA_pattern_kernel_ForICount_HPP -#include "RAJA/config.hpp" - #include #include -#include "RAJA/pattern/kernel/internal.hpp" +#include "RAJA/config.hpp" #include "RAJA/pattern/kernel/Param.hpp" +#include "RAJA/pattern/kernel/internal.hpp" namespace RAJA { @@ -44,8 +43,8 @@ template struct ForICount : public internal::ForList, - public internal::ForTraitBase, - public internal::Statement { + public internal::ForTraitBase, + public internal::Statement { static_assert(std::is_base_of::value, "Inappropriate ParamId, ParamId must be of type " @@ -64,7 +63,10 @@ namespace internal * Assigns the loop index to offset ArgumentId * Assigns the loop index to param ParamId */ -template struct ForICountWrapper : public GenericWrapper { @@ -93,7 +95,8 @@ template struct StatementExecutor< - statement::ForICount, Types> { + statement::ForICount, + Types> { template @@ -104,15 +107,19 @@ struct StatementExecutor< using NewTypes = setSegmentTypeFromData; // Create a wrapper, just in case forall_impl needs to thread_privatize - ForICountWrapper for_wrapper(data); + ForICountWrapper + for_wrapper(data); auto len = segment_length(data); using len_t = decltype(len); auto r = resources::get_resource::type::get_default(); - forall_impl(r, ExecPolicy{}, TypedRangeSegment(0, len), for_wrapper, RAJA::expt::get_empty_forall_param_pack()); + forall_impl(r, + ExecPolicy{}, + TypedRangeSegment(0, len), + for_wrapper, + RAJA::expt::get_empty_forall_param_pack()); } }; diff --git a/include/RAJA/pattern/kernel/Hyperplane.hpp b/include/RAJA/pattern/kernel/Hyperplane.hpp index 955afcecc0..bc7e752a6d 100644 --- a/include/RAJA/pattern/kernel/Hyperplane.hpp +++ b/include/RAJA/pattern/kernel/Hyperplane.hpp @@ -18,16 +18,14 @@ #ifndef RAJA_pattern_kernel_Hyperplane_HPP #define RAJA_pattern_kernel_Hyperplane_HPP -#include "RAJA/config.hpp" - #include #include -#include "camp/camp.hpp" - +#include "RAJA/config.hpp" #include "RAJA/pattern/kernel/For.hpp" #include "RAJA/util/macros.hpp" #include "RAJA/util/types.hpp" +#include "camp/camp.hpp" namespace RAJA { @@ -81,9 +79,7 @@ template -struct Hyperplane - : public internal::Statement { +struct Hyperplane : public internal::Statement { }; } // end namespace statement @@ -108,7 +104,8 @@ struct StatementExecutor, ExecPolicy, - EnclosedStmts...>, Types> { + EnclosedStmts...>, + Types> { template @@ -135,9 +132,9 @@ struct StatementExecutor(data) + - foldl(RAJA::operators::plus(), - segment_length(data)...); + idx_t hp_len = + segment_length(data) + + foldl(RAJA::operators::plus(), segment_length(data)...); /* Execute the outer loop over hyperplanes * @@ -146,7 +143,8 @@ struct StatementExecutor::type::get_default(); - forall_impl(r, HpExecPolicy{}, + forall_impl(r, + HpExecPolicy{}, TypedRangeSegment(0, hp_len), outer_wrapper, RAJA::expt::get_empty_forall_param_pack()); @@ -159,7 +157,8 @@ template struct StatementExecutor< - HyperplaneInner, EnclosedStmts...>, Types> { + HyperplaneInner, EnclosedStmts...>, + Types> { template @@ -173,7 +172,7 @@ struct StatementExecutor< // compute actual iterate for HpArgumentId // as: i0 = h - (i1 + i2 + i3 + ...) idx_t i = h - foldl(RAJA::operators::plus(), - camp::get(data.offset_tuple)...); + camp::get(data.offset_tuple)...); // get length of Hp indexed argument auto len = segment_length(data); diff --git a/include/RAJA/pattern/kernel/InitLocalMem.hpp b/include/RAJA/pattern/kernel/InitLocalMem.hpp index 21d9e3cd2a..c13e4c6fbf 100644 --- a/include/RAJA/pattern/kernel/InitLocalMem.hpp +++ b/include/RAJA/pattern/kernel/InitLocalMem.hpp @@ -18,15 +18,15 @@ #ifndef RAJA_pattern_kernel_InitLocalMem_HPP #define RAJA_pattern_kernel_InitLocalMem_HPP -#include "RAJA/config.hpp" - #include #include +#include "RAJA/config.hpp" + namespace RAJA { -//Policies for RAJA local arrays +// Policies for RAJA local arrays struct cpu_tile_mem; @@ -43,13 +43,15 @@ namespace statement * IntiLocalMem, statements...> * Will intialize the 0th array in the param tuple */ -template +template struct InitLocalMem : public internal::Statement { }; -//Policy Specialization -template -struct InitLocalMem, EnclosedStmts...> : public internal::Statement { +// Policy Specialization +template +struct InitLocalMem, + EnclosedStmts...> : public internal::Statement { }; @@ -58,23 +60,28 @@ struct InitLocalMem, EnclosedStmts namespace internal { -//Statement executor to initalize RAJA local array -template -struct StatementExecutor, EnclosedStmts...>, Types>{ - - //Execute statement list - template - static void RAJA_INLINE exec_expanded(Data && data) +// Statement executor to initalize RAJA local array +template +struct StatementExecutor, + EnclosedStmts...>, + Types> { + + // Execute statement list + template + static void RAJA_INLINE exec_expanded(Data &&data) { execute_statement_list, Types>(data); } - - //Intialize local array - //Identifies type + number of elements needed - template - static void RAJA_INLINE exec_expanded(Data && data) + + // Intialize local array + // Identifies type + number of elements needed + template + static void RAJA_INLINE exec_expanded(Data &&data) { - using varType = typename camp::tuple_element_t::param_tuple_t>::value_type; + using varType = typename camp::tuple_element_t< + Pos, + typename camp::decay::param_tuple_t>::value_type; // Initialize memory #ifdef RAJA_COMPILER_MSVC @@ -95,16 +102,14 @@ struct StatementExecutor + + template static RAJA_INLINE void exec(Data &&data) { - //Initalize local arrays + execute statements + cleanup + // Initalize local arrays + execute statements + cleanup exec_expanded(data); } - }; diff --git a/include/RAJA/pattern/kernel/Lambda.hpp b/include/RAJA/pattern/kernel/Lambda.hpp index 29d41b431e..affc4faaf8 100644 --- a/include/RAJA/pattern/kernel/Lambda.hpp +++ b/include/RAJA/pattern/kernel/Lambda.hpp @@ -18,56 +18,51 @@ #ifndef RAJA_pattern_kernel_Lambda_HPP #define RAJA_pattern_kernel_Lambda_HPP -#include "RAJA/config.hpp" - #include #include +#include "RAJA/config.hpp" +#include "RAJA/pattern/kernel/internal.hpp" +#include "RAJA/util/macros.hpp" +#include "RAJA/util/types.hpp" #include "camp/camp.hpp" #include "camp/concepts.hpp" #include "camp/tuple.hpp" -#include "RAJA/util/macros.hpp" -#include "RAJA/util/types.hpp" - -#include "RAJA/pattern/kernel/internal.hpp" - namespace RAJA { namespace internal { -struct lambda_arg_seg_t -{}; - -struct lambda_arg_param_t -{}; +struct lambda_arg_seg_t { +}; -struct lambda_arg_offset_t -{}; +struct lambda_arg_param_t { +}; -template -struct lambda_arg_value_t -{ - using type = T; +struct lambda_arg_offset_t { }; -template -struct LambdaArg -{ - static constexpr camp::idx_t value = V; +template +struct lambda_arg_value_t { + using type = T; }; -} +template +struct LambdaArg { + static constexpr camp::idx_t value = V; +}; +} // namespace internal /*! * Used in RAJA::statement::Lambda to specify that one or more segment values * should be passed into the lambda as an argument */ -template -using Segs = camp::list...>; +template +using Segs = + camp::list...>; /*! * Used in RAJA::statement::Lambda to specify that one or more segment offsets @@ -79,16 +74,18 @@ using Segs = camp::list... * In the case of tiling (with Tile) the offset is w.r.t. the beginning of the * current tile. */ -template -using Offsets = camp::list...>; +template +using Offsets = + camp::list...>; /*! * Used in RAJA::statement::Lambda to specify that one or more parameters that * should be passed into the lambda as an argument. */ -template -using Params = camp::list...>; +template +using Params = + camp::list...>; /*! * Used in RAJA::statement::Lambda to specify that one or more constant values @@ -103,8 +100,9 @@ using Params = camp::list> * invokes: lambda0( (double)3, (double) 4 ) */ -template -using ValuesT = camp::list, values>...>; +template +using ValuesT = + camp::list, values>...>; namespace statement @@ -119,7 +117,7 @@ namespace statement * RAJA::kernel(make_tuple{s0, s1, s2}, lambda0, lambda1); * */ -template +template struct Lambda : internal::Statement { static const camp::idx_t loop_body_index = BodyIdx; }; @@ -130,13 +128,6 @@ namespace internal { - - - - - - - /* * Helper that extracts a segment value for a lambda argument * @@ -146,26 +137,22 @@ namespace internal * This class allows specialization on the segment type in LoopTypes so that * fancier constructions can happen (ie vector_exec, etc.) */ -template -struct LambdaSegExtractor -{ +template +struct LambdaSegExtractor { static_assert(!std::is_same::value, - "Segment not assigned, but used in Lambda with Segs<> argument"); + "Segment not assigned, but used in Lambda with Segs<> " + "argument"); - template - RAJA_HOST_DEVICE - RAJA_INLINE - constexpr - static SegmentType extract(Data &&data) + template + RAJA_HOST_DEVICE RAJA_INLINE constexpr static SegmentType extract(Data &&data) { - return SegmentType(camp::get(data.segment_tuple).begin()[camp::get(data.offset_tuple)]); + return SegmentType(camp::get(data.segment_tuple) + .begin()[camp::get(data.offset_tuple)]); } - }; - /* * Helper that extracts a segment value for a lambda argument * @@ -175,26 +162,21 @@ struct LambdaSegExtractor * This class allows specialization on the segment type in LoopTypes so that * fancier constructions can happen (ie vector_exec, etc.) */ -template -struct LambdaOffsetExtractor -{ +template +struct LambdaOffsetExtractor { static_assert(!std::is_same::value, - "Segment not assigned, but used in Lambda with Offsets<> argument"); + "Segment not assigned, but used in Lambda with Offsets<> " + "argument"); - template - RAJA_HOST_DEVICE - RAJA_INLINE - constexpr - static OffsetType extract(Data &&data) + template + RAJA_HOST_DEVICE RAJA_INLINE constexpr static OffsetType extract(Data &&data) { return OffsetType(camp::get(data.offset_tuple)); } - }; - /* * Helper that provides first level of argument extraction * This acts as a switchboard between Segs, Offsets, and Params @@ -202,121 +184,118 @@ struct LambdaOffsetExtractor * It calls LambdaArgExtractor to perform the actual argument extraction. * This allows LambdaArgExtractor to be specialized */ -template +template struct LambdaArgSwitchboard; -template -struct LambdaArgSwitchboard> -{ +template +struct LambdaArgSwitchboard> { using OffsetType = camp::at_v; static_assert(!std::is_same::value, - "Offset not assigned, but used in Lambda with Offsets<> argument"); + "Offset not assigned, but used in Lambda with Offsets<> " + "argument"); - template - RAJA_HOST_DEVICE - RAJA_INLINE - constexpr - static OffsetType extract(Data &&data) + template + RAJA_HOST_DEVICE RAJA_INLINE constexpr static OffsetType extract(Data &&data) { - return LambdaOffsetExtractor::extract(std::forward(data)); + return LambdaOffsetExtractor::extract( + std::forward(data)); } - }; -template -struct LambdaArgSwitchboard> -{ +template +struct LambdaArgSwitchboard> { using SegmentType = camp::at_v; static_assert(!std::is_same::value, - "Segment not assigned, but used in Lambda with Segs<> argument"); + "Segment not assigned, but used in Lambda with Segs<> " + "argument"); - template - RAJA_HOST_DEVICE - RAJA_INLINE - constexpr - static SegmentType extract(Data &&data) + template + RAJA_HOST_DEVICE RAJA_INLINE constexpr static SegmentType extract(Data &&data) { - return LambdaSegExtractor::extract(std::forward(data)); + return LambdaSegExtractor::extract( + std::forward(data)); } - }; -template -struct LambdaArgSwitchboard> -{ - template - RAJA_HOST_DEVICE - RAJA_INLINE - constexpr - static auto extract(Data &&data)-> - typename std::add_lvalue_reference::param_tuple_t>>::type +template +struct LambdaArgSwitchboard> { + template + RAJA_HOST_DEVICE RAJA_INLINE constexpr static auto extract(Data &&data) -> + typename std::add_lvalue_reference::param_tuple_t>>::type { return camp::get(data.param_tuple); } }; -template -struct LambdaArgSwitchboard, value>> -{ - template - RAJA_HOST_DEVICE - RAJA_INLINE - constexpr - static T extract(Data &&) +template +struct LambdaArgSwitchboard, value>> { + template + RAJA_HOST_DEVICE RAJA_INLINE constexpr static T extract(Data &&) { return T(value); } }; - RAJA_SUPPRESS_HD_WARN -template -RAJA_INLINE RAJA_HOST_DEVICE void invoke_lambda_with_args(Data &&data, - camp::list const &) +template +RAJA_INLINE RAJA_HOST_DEVICE void invoke_lambda_with_args( + Data &&data, + camp::list const &) { camp::get(data.bodies)( LambdaArgSwitchboard::extract(data)...); } - - /*! * A RAJA::kernel statement that invokes a lambda function * with user specified arguments. */ -template +template struct StatementExecutor, Types> { template static RAJA_INLINE RAJA_HOST_DEVICE void exec(Data &&data) { - //Convert SegList, ParamList into Seg, Param types, and store in a list + // Convert SegList, ParamList into Seg, Param types, and store in a list using targList = typename camp::flatten>::type; - invoke_lambda_with_args(std::forward(data), targList{}); + invoke_lambda_with_args(std::forward(data), + targList{}); } }; - -template -RAJA_INLINE RAJA_HOST_DEVICE void invoke_lambda(Data &&data, camp::idx_seq const &, camp::idx_seq const &) +template +RAJA_INLINE RAJA_HOST_DEVICE void invoke_lambda( + Data &&data, + camp::idx_seq const &, + camp::idx_seq const &) { using AllSegs = Segs; using AllParams = Params; // invoke the expanded Lambda executor, passing in all segments and params - StatementExecutor, Types>::exec(std::forward(data)); + StatementExecutor, + Types>::exec(std::forward(data)); } @@ -335,7 +314,6 @@ struct StatementExecutor, Types> { std::forward(data), camp::make_idx_seq_t::value>{}, camp::make_idx_seq_t::value>{}); - } }; diff --git a/include/RAJA/pattern/kernel/Param.hpp b/include/RAJA/pattern/kernel/Param.hpp index 8e870ebe15..6e41382f5b 100644 --- a/include/RAJA/pattern/kernel/Param.hpp +++ b/include/RAJA/pattern/kernel/Param.hpp @@ -19,13 +19,12 @@ #define RAJA_pattern_kernel_Param_HPP -#include "RAJA/config.hpp" - -#include "RAJA/pattern/kernel/internal.hpp" - #include #include +#include "RAJA/config.hpp" +#include "RAJA/pattern/kernel/internal.hpp" + namespace RAJA { namespace internal @@ -34,7 +33,7 @@ namespace internal struct ParamBase { }; -}// end namespace internal +} // end namespace internal namespace statement { diff --git a/include/RAJA/pattern/kernel/Reduce.hpp b/include/RAJA/pattern/kernel/Reduce.hpp index 4de4922ea3..8c27efba9c 100644 --- a/include/RAJA/pattern/kernel/Reduce.hpp +++ b/include/RAJA/pattern/kernel/Reduce.hpp @@ -18,11 +18,10 @@ #ifndef RAJA_pattern_kernel_Reduce_HPP #define RAJA_pattern_kernel_Reduce_HPP -#include "RAJA/config.hpp" - #include #include +#include "RAJA/config.hpp" #include "RAJA/pattern/kernel/internal.hpp" namespace RAJA @@ -39,7 +38,8 @@ namespace statement * */ template class ReduceOperator, + template + class ReduceOperator, typename ParamId, typename... EnclosedStmts> struct Reduce : public internal::Statement { diff --git a/include/RAJA/pattern/kernel/Region.hpp b/include/RAJA/pattern/kernel/Region.hpp index 82b79ae775..33e56dde72 100644 --- a/include/RAJA/pattern/kernel/Region.hpp +++ b/include/RAJA/pattern/kernel/Region.hpp @@ -18,19 +18,19 @@ #ifndef RAJA_pattern_kernel_region_HPP #define RAJA_pattern_kernel_region_HPP -#include "RAJA/config.hpp" -#include "RAJA/pattern/region.hpp" - #include #include +#include "RAJA/config.hpp" +#include "RAJA/pattern/region.hpp" + namespace RAJA { namespace statement { -template +template struct Region : public internal::Statement { }; @@ -40,23 +40,23 @@ struct Region : public internal::Statement { namespace internal { -//Statement executor to create a region within kernel +// Statement executor to create a region within kernel -//Note: RAJA region's lambda must capture by reference otherwise -//internal function calls are undefined. -template -struct StatementExecutor, Types> { +// Note: RAJA region's lambda must capture by reference otherwise +// internal function calls are undefined. +template +struct StatementExecutor, + Types> { -template -static RAJA_INLINE void exec(Data &&data) -{ + template + static RAJA_INLINE void exec(Data &&data) + { - RAJA::region([&]() { + RAJA::region([&]() { using data_t = camp::decay; execute_statement_list, Types>(data_t(data)); }); -} - + } }; diff --git a/include/RAJA/pattern/kernel/Tile.hpp b/include/RAJA/pattern/kernel/Tile.hpp index 43f72e0545..54a6da0244 100644 --- a/include/RAJA/pattern/kernel/Tile.hpp +++ b/include/RAJA/pattern/kernel/Tile.hpp @@ -18,18 +18,16 @@ #ifndef RAJA_pattern_kernel_Tile_HPP #define RAJA_pattern_kernel_Tile_HPP -#include "RAJA/config.hpp" - #include #include -#include "camp/camp.hpp" -#include "camp/concepts.hpp" -#include "camp/tuple.hpp" - +#include "RAJA/config.hpp" #include "RAJA/pattern/kernel/internal.hpp" #include "RAJA/util/macros.hpp" #include "RAJA/util/types.hpp" +#include "camp/camp.hpp" +#include "camp/concepts.hpp" +#include "camp/tuple.hpp" namespace RAJA { @@ -39,9 +37,7 @@ struct TileSize { RAJA_HOST_DEVICE RAJA_INLINE - constexpr TileSize(camp::idx_t size_) : size{size_} - { - } + constexpr TileSize(camp::idx_t size_) : size{size_} {} }; namespace statement @@ -75,7 +71,6 @@ struct tile_dynamic { }; - namespace internal { @@ -84,7 +79,10 @@ namespace internal * Assigns the tile segment to segment ArgumentId * */ -template +template struct TileWrapper : public GenericWrapper { using Base = GenericWrapper; @@ -107,8 +105,7 @@ template struct IterableTiler { using value_type = camp::decay; - struct iterate - { + struct iterate { value_type s; Index_type i; }; @@ -222,7 +219,8 @@ template struct StatementExecutor< - statement::Tile, EPol, EnclosedStmts...>, Types> { + statement::Tile, EPol, EnclosedStmts...>, + Types> { template static RAJA_INLINE void exec(Data &data) @@ -238,24 +236,29 @@ struct StatementExecutor< IterableTiler tiled_iterable(segment, chunk_size); // Wrap in case forall_impl needs to thread_privatize - TileWrapper tile_wrapper(data); + TileWrapper tile_wrapper(data); // Loop over tiles, executing enclosed statement list auto r = resources::get_resource::type::get_default(); - forall_impl(r, EPol{}, tiled_iterable, tile_wrapper, RAJA::expt::get_empty_forall_param_pack()); + forall_impl(r, + EPol{}, + tiled_iterable, + tile_wrapper, + RAJA::expt::get_empty_forall_param_pack()); // Set range back to original values camp::get(data.segment_tuple) = tiled_iterable.it; } }; -template +template struct StatementExecutor< - statement::Tile, EPol, EnclosedStmts...>, Types> { + statement:: + Tile, EPol, EnclosedStmts...>, + Types> { template static RAJA_INLINE void exec(Data &data) @@ -265,20 +268,24 @@ struct StatementExecutor< // Get the tiling policies chunk size auto chunk_size = camp::get(data.param_tuple); - static_assert(camp::concepts::metalib::is_same::value, - "Extracted parameter must be of type TileSize."); + static_assert( + camp::concepts::metalib::is_same::value, + "Extracted parameter must be of type TileSize."); // Create a tile iterator IterableTiler tiled_iterable(segment, chunk_size.size); // Wrap in case forall_impl needs to thread_privatize - TileWrapper tile_wrapper(data); + TileWrapper tile_wrapper(data); // Loop over tiles, executing enclosed statement list auto r = resources::get_resource::type::get_default(); - forall_impl(r, EPol{}, tiled_iterable, tile_wrapper, RAJA::expt::get_empty_forall_param_pack()); - + forall_impl(r, + EPol{}, + tiled_iterable, + tile_wrapper, + RAJA::expt::get_empty_forall_param_pack()); + // Set range back to original values camp::get(data.segment_tuple) = tiled_iterable.it; } diff --git a/include/RAJA/pattern/kernel/TileTCount.hpp b/include/RAJA/pattern/kernel/TileTCount.hpp index 2653e992c7..2b91186da6 100644 --- a/include/RAJA/pattern/kernel/TileTCount.hpp +++ b/include/RAJA/pattern/kernel/TileTCount.hpp @@ -18,18 +18,16 @@ #ifndef RAJA_pattern_kernel_TileTCount_HPP #define RAJA_pattern_kernel_TileTCount_HPP -#include "RAJA/config.hpp" - #include #include -#include "camp/camp.hpp" -#include "camp/concepts.hpp" -#include "camp/tuple.hpp" - +#include "RAJA/config.hpp" #include "RAJA/pattern/kernel/internal.hpp" #include "RAJA/util/macros.hpp" #include "RAJA/util/types.hpp" +#include "camp/camp.hpp" +#include "camp/concepts.hpp" +#include "camp/tuple.hpp" namespace RAJA { @@ -66,9 +64,13 @@ namespace internal * Assigns the tile segment to segment ArgumentId * Assigns the tile index to param ParamId */ -template -struct TileTCountWrapper : public GenericWrapper { +struct TileTCountWrapper + : public GenericWrapper { using Base = GenericWrapper; using Base::Base; @@ -79,17 +81,16 @@ struct TileTCountWrapper : public GenericWrapper { // Assign the tile's segment to the tuple camp::get(Base::data.segment_tuple) = si.s; - + // Assign the tile's index Base::data.template assign_param(si.i); - + // Execute enclosed statements Base::exec(); } }; - /*! * A generic RAJA::kernel forall_impl executor for statement::TileTCount * @@ -102,7 +103,8 @@ template struct StatementExecutor< - statement::TileTCount, Types> { + statement::TileTCount, + Types> { template @@ -119,12 +121,16 @@ struct StatementExecutor< IterableTiler tiled_iterable(segment, chunk_size); // Wrap in case forall_impl needs to thread_privatize - TileTCountWrapper tile_wrapper(data); + TileTCountWrapper + tile_wrapper(data); // Loop over tiles, executing enclosed statement list auto r = resources::get_resource::type::get_default(); - forall_impl(r, EPol{}, tiled_iterable, tile_wrapper, RAJA::expt::get_empty_forall_param_pack()); + forall_impl(r, + EPol{}, + tiled_iterable, + tile_wrapper, + RAJA::expt::get_empty_forall_param_pack()); // Set range back to original values camp::get(data.segment_tuple) = tiled_iterable.it; diff --git a/include/RAJA/pattern/kernel/internal/LoopData.hpp b/include/RAJA/pattern/kernel/internal/LoopData.hpp index 9667a55538..86c2b67e96 100644 --- a/include/RAJA/pattern/kernel/internal/LoopData.hpp +++ b/include/RAJA/pattern/kernel/internal/LoopData.hpp @@ -19,20 +19,17 @@ #ifndef RAJA_pattern_kernel_internal_LoopData_HPP #define RAJA_pattern_kernel_internal_LoopData_HPP -#include "RAJA/config.hpp" +#include +#include +#include "RAJA/config.hpp" #include "RAJA/index/IndexSet.hpp" -#include "RAJA/util/macros.hpp" -#include "RAJA/util/types.hpp" - -#include "camp/camp.hpp" - #include "RAJA/pattern/detail/privatizer.hpp" #include "RAJA/pattern/kernel/internal/StatementList.hpp" #include "RAJA/pattern/kernel/internal/Template.hpp" - -#include -#include +#include "RAJA/util/macros.hpp" +#include "RAJA/util/types.hpp" +#include "camp/camp.hpp" namespace RAJA { @@ -40,25 +37,21 @@ namespace internal { - - - // Universal base of all For wrappers for type traits - struct ForList { - }; - struct ForBase { - }; - struct CollapseBase { - }; - template - struct ForTraitBase : public ForBase { - constexpr static camp::idx_t index_val = ArgumentId; - using index = camp::num; - using index_type = camp::nil; // default to invalid type - using policy_type = Policy; - using type = ForTraitBase; // make camp::value compatible - }; - - +// Universal base of all For wrappers for type traits +struct ForList { +}; +struct ForBase { +}; +struct CollapseBase { +}; +template +struct ForTraitBase : public ForBase { + constexpr static camp::idx_t index_val = ArgumentId; + using index = camp::num; + using index_type = camp::nil; // default to invalid type + using policy_type = Policy; + using type = ForTraitBase; // make camp::value compatible +}; template @@ -100,8 +93,6 @@ using index_types_from_segments = value_type_list_from_segments>::type; - - template ::value>; vector_sizes_t vector_sizes; - RAJA_INLINE RAJA_HOST_DEVICE constexpr - LoopData(SegmentTuple const &s, ParamTuple const &p, Resource r, Bodies const &... b) + RAJA_INLINE RAJA_HOST_DEVICE constexpr LoopData(SegmentTuple const &s, + ParamTuple const &p, + Resource r, + Bodies const &...b) : segment_tuple(s), param_tuple(p), res(r), bodies(b...) { } @@ -155,50 +148,37 @@ struct LoopData { template RAJA_HOST_DEVICE RAJA_INLINE void assign_param(IndexT const &i) { - using param_t = camp::at_v; + using param_t = + camp::at_v; camp::get(param_tuple) = param_t(i); } template - RAJA_HOST_DEVICE RAJA_INLINE - auto get_param() -> - camp::at_v + RAJA_HOST_DEVICE RAJA_INLINE auto get_param() + -> camp::at_v { return camp::get(param_tuple); } - RAJA_HOST_DEVICE RAJA_INLINE - Resource get_resource() - { - return res; - } - - + RAJA_HOST_DEVICE RAJA_INLINE Resource get_resource() { return res; } }; - - template -using segment_diff_type = - typename std::iterator_traits< - typename camp::at_v::iterator>::difference_type; - - +using segment_diff_type = typename std::iterator_traits< + typename camp::at_v::iterator>::difference_type; template -RAJA_INLINE RAJA_HOST_DEVICE auto segment_length(Data const &data) -> - segment_diff_type +RAJA_INLINE RAJA_HOST_DEVICE auto segment_length(Data const &data) + -> segment_diff_type { return camp::get(data.segment_tuple).end() - camp::get(data.segment_tuple).begin(); } - - template struct GenericWrapper : GenericWrapperBase { using data_t = camp::decay; @@ -209,7 +189,10 @@ struct GenericWrapper : GenericWrapperBase { constexpr explicit GenericWrapper(data_t &d) : data{d} {} RAJA_INLINE - void exec() { execute_statement_list, Types>(data); } + void exec() + { + execute_statement_list, Types>(data); + } }; @@ -236,7 +219,6 @@ struct NestedPrivatizer { }; - } // end namespace internal } // end namespace RAJA diff --git a/include/RAJA/pattern/kernel/internal/LoopTypes.hpp b/include/RAJA/pattern/kernel/internal/LoopTypes.hpp index 7f77df4214..271ffb7214 100644 --- a/include/RAJA/pattern/kernel/internal/LoopTypes.hpp +++ b/include/RAJA/pattern/kernel/internal/LoopTypes.hpp @@ -29,63 +29,69 @@ namespace internal { -template +template struct LoopTypes; -template +template struct LoopTypes, camp::list> { - using Self = LoopTypes, camp::list>; + using Self = + LoopTypes, camp::list>; static constexpr size_t s_num_segments = sizeof...(SegmentTypes); // This ensures that you don't double-loop over a segment within the same // loop nesting static_assert(s_num_segments == sizeof...(OffsetTypes), - "Number of segments and offsets must match"); + "Number of segments and offsets must match"); using segment_types_t = camp::list; using offset_types_t = camp::list; }; -template -using makeInitialLoopTypes = - LoopTypes::value>, - list_of_n::value>>; +template +using makeInitialLoopTypes = LoopTypes< + list_of_n::value>, + list_of_n::value>>; -template +template struct SetSegmentTypeHelper; -template -struct SetSegmentTypeHelper> -{ - using segment_list = typename Types::segment_types_t; - using offset_list = typename Types::offset_types_t; - - static_assert(std::is_same, void>::value, - "Segment was already assigned: Probably looping over same segment in loop nest"); - - using type = LoopTypes< - camp::list>::type...>, - camp::list>::type...>>; - +template +struct SetSegmentTypeHelper> { + using segment_list = typename Types::segment_types_t; + using offset_list = typename Types::offset_types_t; + + static_assert(std::is_same, void>::value, + "Segment was already assigned: Probably looping over same " + "segment in loop nest"); + + using type = LoopTypes< + camp::list< + typename std::conditional>::type...>, + camp::list< + typename std::conditional>::type...>>; }; -template -using setSegmentType = - typename SetSegmentTypeHelper>::type; +template +using setSegmentType = typename SetSegmentTypeHelper< + Types, + Segment, + T, + camp::make_idx_seq_t>::type; -template -using setSegmentTypeFromData = - setSegmentType::index_types_t, Segment>>; +template +using setSegmentTypeFromData = setSegmentType< + Types, + Segment, + camp::at_v::index_types_t, Segment>>; } // end namespace internal diff --git a/include/RAJA/pattern/kernel/internal/Statement.hpp b/include/RAJA/pattern/kernel/internal/Statement.hpp index 48ca828a68..e9dc91437f 100644 --- a/include/RAJA/pattern/kernel/internal/Statement.hpp +++ b/include/RAJA/pattern/kernel/internal/Statement.hpp @@ -18,9 +18,10 @@ #ifndef RAJA_pattern_kernel_internal_Statement_HPP #define RAJA_pattern_kernel_internal_Statement_HPP -#include "RAJA/pattern/kernel/internal/StatementList.hpp" -#include #include +#include + +#include "RAJA/pattern/kernel/internal/StatementList.hpp" namespace RAJA { @@ -28,11 +29,12 @@ namespace internal { - template struct Statement { - static_assert(std::is_same::value || sizeof...(EnclosedStmts) > 0, - "Executable statement with no enclosed statements, this is almost certainly a bug"); + static_assert(std::is_same::value || + sizeof...(EnclosedStmts) > 0, + "Executable statement with no enclosed statements, this is " + "almost certainly a bug"); Statement() = delete; using enclosed_statements_t = StatementList; @@ -40,13 +42,10 @@ struct Statement { }; - - template struct StatementExecutor; - } // end namespace internal } // end namespace RAJA diff --git a/include/RAJA/pattern/kernel/internal/StatementList.hpp b/include/RAJA/pattern/kernel/internal/StatementList.hpp index 5c0d71afb4..2055a94674 100644 --- a/include/RAJA/pattern/kernel/internal/StatementList.hpp +++ b/include/RAJA/pattern/kernel/internal/StatementList.hpp @@ -18,12 +18,12 @@ #ifndef RAJA_pattern_kernel_internal_StatementList_HPP #define RAJA_pattern_kernel_internal_StatementList_HPP +#include + #include "RAJA/config.hpp" #include "RAJA/util/macros.hpp" #include "camp/camp.hpp" -#include - namespace RAJA { namespace internal @@ -35,8 +35,6 @@ template struct StatementExecutor; - - template using StatementList = camp::list; @@ -47,7 +45,8 @@ struct StatementListExecutor; template + typename StmtList, + typename Types> struct StatementListExecutor { template @@ -61,8 +60,10 @@ struct StatementListExecutor { StatementExecutor::exec(std::forward(data)); // call our next statement - StatementListExecutor::exec( - std::forward(data)); + StatementListExecutor::exec(std::forward(data)); } }; @@ -89,7 +90,6 @@ RAJA_INLINE void execute_statement_list(Data &&data) } - } // end namespace internal } // end namespace RAJA diff --git a/include/RAJA/pattern/kernel/internal/Template.hpp b/include/RAJA/pattern/kernel/internal/Template.hpp index c750b95986..b7ac5c864f 100644 --- a/include/RAJA/pattern/kernel/internal/Template.hpp +++ b/include/RAJA/pattern/kernel/internal/Template.hpp @@ -31,17 +31,15 @@ namespace detail // Helper class to convert a camp::idx_t into some type T // used in template expansion in ListOfNHelper template -struct SeqToType -{ +struct SeqToType { using type = T; }; template struct ListOfNHelper; -template -struct ListOfNHelper > -{ +template +struct ListOfNHelper> { using type = camp::list::type...>; }; @@ -49,13 +47,12 @@ struct ListOfNHelper > template struct TupleOfNHelper; -template -struct TupleOfNHelper > -{ +template +struct TupleOfNHelper> { using type = camp::tuple::type...>; }; -} // namespace detail +} // namespace detail /* * This creates a camp::list with N types, each one being T. @@ -64,7 +61,8 @@ struct TupleOfNHelper > * */ template -using list_of_n = typename detail::ListOfNHelper>::type; +using list_of_n = + typename detail::ListOfNHelper>::type; /* @@ -74,8 +72,8 @@ using list_of_n = typename detail::ListOfNHelper>::ty * */ template -using tuple_of_n = typename detail::TupleOfNHelper>::type; - +using tuple_of_n = + typename detail::TupleOfNHelper>::type; } // end namespace internal diff --git a/include/RAJA/pattern/launch/launch_core.hpp b/include/RAJA/pattern/launch/launch_core.hpp index ff10f04dae..f67073c25b 100644 --- a/include/RAJA/pattern/launch/launch_core.hpp +++ b/include/RAJA/pattern/launch/launch_core.hpp @@ -28,7 +28,7 @@ #include "camp/concepts.hpp" #include "camp/tuple.hpp" -//Odd dependecy with atomics is breaking CI builds +// Odd dependecy with atomics is breaking CI builds //#include "RAJA/util/View.hpp" #if defined(RAJA_GPU_DEVICE_COMPILE_PASS_ACTIVE) && !defined(RAJA_ENABLE_SYCL) @@ -41,7 +41,7 @@ namespace RAJA { // GPU or CPU threads available -//strongly type the ExecPlace (guards agaist errors) +// strongly type the ExecPlace (guards agaist errors) enum struct ExecPlace : int { HOST, DEVICE, NUM_PLACES }; struct null_launch_t { @@ -138,8 +138,12 @@ struct LaunchParams { RAJA_INLINE LaunchParams() = default; - LaunchParams(Teams in_teams, Threads in_threads, size_t in_shared_mem_size = 0) - : teams(in_teams), threads(in_threads), shared_mem_size(in_shared_mem_size) {}; + LaunchParams(Teams in_teams, + Threads in_threads, + size_t in_shared_mem_size = 0) + : teams(in_teams), + threads(in_threads), + shared_mem_size(in_shared_mem_size){}; private: RAJA_HOST_DEVICE @@ -154,9 +158,8 @@ struct LaunchParams { class LaunchContext { public: - - //Bump style allocator used to - //get memory from the pool + // Bump style allocator used to + // get memory from the pool size_t shared_mem_offset; void *shared_mem_ptr; @@ -166,39 +169,41 @@ class LaunchContext #endif RAJA_HOST_DEVICE LaunchContext() - : shared_mem_offset(0), shared_mem_ptr(nullptr) + : shared_mem_offset(0), shared_mem_ptr(nullptr) { } - //TODO handle alignment - template - RAJA_HOST_DEVICE T* getSharedMemory(size_t bytes) + // TODO handle alignment + template + RAJA_HOST_DEVICE T *getSharedMemory(size_t bytes) { - //Calculate offset in bytes with a char pointer - void* mem_ptr = static_cast(shared_mem_ptr) + shared_mem_offset; + // Calculate offset in bytes with a char pointer + void *mem_ptr = static_cast(shared_mem_ptr) + shared_mem_offset; - shared_mem_offset += bytes*sizeof(T); + shared_mem_offset += bytes * sizeof(T); - //convert to desired type - return static_cast(mem_ptr); + // convert to desired type + return static_cast(mem_ptr); } /* //Odd dependecy with atomics is breaking CI builds - template - RAJA_HOST_DEVICE auto getSharedMemoryView(size_t bytes, arg idx, args... idxs) + template RAJA_HOST_DEVICE auto + getSharedMemoryView(size_t bytes, arg idx, args... idxs) { T * mem_ptr = &((T*) shared_mem_ptr)[shared_mem_offset]; shared_mem_offset += bytes*sizeof(T); - return RAJA::View>(mem_ptr, idx, idxs...); + return RAJA::View>(mem_ptr, idx, + idxs...); } */ RAJA_HOST_DEVICE void releaseSharedMemory() { - //On the cpu/gpu we want to restart the count + // On the cpu/gpu we want to restart the count shared_mem_offset = 0; } @@ -218,19 +223,24 @@ class LaunchContext template struct LaunchExecute; -//Policy based launch with support to new reducers... -template -void launch(LaunchParams const &launch_params, const char *kernel_name, ReduceParams&&... rest_of_launch_args) +// Policy based launch with support to new reducers... +template +void launch(LaunchParams const &launch_params, + const char *kernel_name, + ReduceParams &&...rest_of_launch_args) { - //Get reducers - auto reducers = expt::make_forall_param_pack(std::forward(rest_of_launch_args)...); + // Get reducers + auto reducers = expt::make_forall_param_pack( + std::forward(rest_of_launch_args)...); - auto&& launch_body = expt::get_lambda(std::forward(rest_of_launch_args)...); + auto &&launch_body = + expt::get_lambda(std::forward(rest_of_launch_args)...); - //Take the first policy as we assume the second policy is not user defined. - //We rely on the user to pair launch and loop policies correctly. - util::PluginContext context{util::make_context()}; + // Take the first policy as we assume the second policy is not user defined. + // We rely on the user to pair launch and loop policies correctly. + util::PluginContext context{ + util::make_context()}; util::callPreCapturePlugins(context); using RAJA::util::trigger_updates_before; @@ -242,29 +252,36 @@ void launch(LaunchParams const &launch_params, const char *kernel_name, ReducePa using launch_t = LaunchExecute; - using Res = typename resources::get_resource::type; + using Res = typename resources::get_resource< + typename LAUNCH_POLICY::host_policy_t>::type; - launch_t::exec(Res::get_default(), launch_params, kernel_name, p_body, reducers); + launch_t::exec( + Res::get_default(), launch_params, kernel_name, p_body, reducers); util::callPostLaunchPlugins(context); } -//Duplicate of code above on account that we need to support the case in which a kernel_name is not given -template -void launch(LaunchParams const &launch_params, ReduceParams&&... rest_of_launch_args) +// Duplicate of code above on account that we need to support the case in which +// a kernel_name is not given +template +void launch(LaunchParams const &launch_params, + ReduceParams &&...rest_of_launch_args) { const char *kernel_name = nullptr; - //Get reducers - auto reducers = expt::make_forall_param_pack(std::forward(rest_of_launch_args)...); + // Get reducers + auto reducers = expt::make_forall_param_pack( + std::forward(rest_of_launch_args)...); - auto&& launch_body = expt::get_lambda(std::forward(rest_of_launch_args)...); + auto &&launch_body = + expt::get_lambda(std::forward(rest_of_launch_args)...); - //Take the first policy as we assume the second policy is not user defined. - //We rely on the user to pair launch and loop policies correctly. - util::PluginContext context{util::make_context()}; + // Take the first policy as we assume the second policy is not user defined. + // We rely on the user to pair launch and loop policies correctly. + util::PluginContext context{ + util::make_context()}; util::callPreCapturePlugins(context); using RAJA::util::trigger_updates_before; @@ -276,15 +293,17 @@ void launch(LaunchParams const &launch_params, ReduceParams&&... rest_of_launch_ using launch_t = LaunchExecute; - using Res = typename resources::get_resource::type; + using Res = typename resources::get_resource< + typename LAUNCH_POLICY::host_policy_t>::type; - launch_t::exec(Res::get_default(), launch_params, kernel_name, p_body, reducers); + launch_t::exec( + Res::get_default(), launch_params, kernel_name, p_body, reducers); util::callPostLaunchPlugins(context); } //================================================= -//Run time based policy launch +// Run time based policy launch //================================================= template void launch(ExecPlace place, LaunchParams const ¶ms, BODY const &body) @@ -293,131 +312,174 @@ void launch(ExecPlace place, LaunchParams const ¶ms, BODY const &body) } template -void launch(ExecPlace place, const LaunchParams ¶ms, const char *kernel_name, BODY const &body) +void launch(ExecPlace place, + const LaunchParams ¶ms, + const char *kernel_name, + BODY const &body) { - //Forward to single policy launch API - simplifies testing of plugins + // Forward to single policy launch API - simplifies testing of plugins switch (place) { case ExecPlace::HOST: { - using Res = typename resources::get_resource::type; - launch>(Res::get_default(), params, kernel_name, body); + using Res = typename resources::get_resource< + typename POLICY_LIST::host_policy_t>::type; + launch>( + Res::get_default(), params, kernel_name, body); break; } #if defined(RAJA_GPU_ACTIVE) - case ExecPlace::DEVICE: { - using Res = typename resources::get_resource::type; - launch>(Res::get_default(), params, kernel_name, body); + case ExecPlace::DEVICE: { + using Res = typename resources::get_resource< + typename POLICY_LIST::device_policy_t>::type; + launch>( + Res::get_default(), params, kernel_name, body); break; } #endif default: RAJA_ABORT_OR_THROW("Unknown launch place or device is not enabled"); } - } -//Run-time API for new reducer interface +// Run-time API for new reducer interface template -void launch(ExecPlace place, const LaunchParams &launch_params, const char *kernel_name, ReduceParams&&... rest_of_launch_args) +void launch(ExecPlace place, + const LaunchParams &launch_params, + const char *kernel_name, + ReduceParams &&...rest_of_launch_args) { - //Forward to single policy launch API - simplifies testing of plugins + // Forward to single policy launch API - simplifies testing of plugins switch (place) { case ExecPlace::HOST: { - using Res = typename resources::get_resource::type; - launch> - (Res::get_default(), launch_params, kernel_name, std::forward(rest_of_launch_args)...); + using Res = typename resources::get_resource< + typename POLICY_LIST::host_policy_t>::type; + launch>( + Res::get_default(), + launch_params, + kernel_name, + std::forward(rest_of_launch_args)...); break; } #if defined(RAJA_GPU_ACTIVE) - case ExecPlace::DEVICE: { - using Res = typename resources::get_resource::type; - launch> - (Res::get_default(), launch_params, kernel_name, std::forward(rest_of_launch_args)...); + case ExecPlace::DEVICE: { + using Res = typename resources::get_resource< + typename POLICY_LIST::device_policy_t>::type; + launch>( + Res::get_default(), + launch_params, + kernel_name, + std::forward(rest_of_launch_args)...); break; } #endif default: RAJA_ABORT_OR_THROW("Unknown launch place or device is not enabled"); } - } -//Run-time API for new reducer interface with support of the case without a new kernel name +// Run-time API for new reducer interface with support of the case without a new +// kernel name template -void launch(ExecPlace place, const LaunchParams &launch_params, ReduceParams&&... rest_of_launch_args) - //BODY const &body) +void launch(ExecPlace place, + const LaunchParams &launch_params, + ReduceParams &&...rest_of_launch_args) +// BODY const &body) { const char *kernel_name = nullptr; - //Forward to single policy launch API - simplifies testing of plugins + // Forward to single policy launch API - simplifies testing of plugins switch (place) { case ExecPlace::HOST: { - using Res = typename resources::get_resource::type; - launch> - (Res::get_default(), launch_params, kernel_name, std::forward(rest_of_launch_args)...); + using Res = typename resources::get_resource< + typename POLICY_LIST::host_policy_t>::type; + launch>( + Res::get_default(), + launch_params, + kernel_name, + std::forward(rest_of_launch_args)...); break; } #if defined(RAJA_GPU_ACTIVE) - case ExecPlace::DEVICE: { - using Res = typename resources::get_resource::type; - launch> - (Res::get_default(), launch_params, kernel_name, std::forward(rest_of_launch_args)...); + case ExecPlace::DEVICE: { + using Res = typename resources::get_resource< + typename POLICY_LIST::device_policy_t>::type; + launch>( + Res::get_default(), + launch_params, + kernel_name, + std::forward(rest_of_launch_args)...); break; } #endif default: RAJA_ABORT_OR_THROW("Unknown launch place or device is not enabled"); } - } -// Helper function to retrieve a resource based on the run-time policy - if a device is active -#if defined(RAJA_ENABLE_CUDA) || defined(RAJA_ENABLE_HIP) || defined(RAJA_ENABLE_SYCL) -template -RAJA::resources::Resource Get_Runtime_Resource(T host_res, U device_res, RAJA::ExecPlace device){ - if(device == RAJA::ExecPlace::DEVICE) {return RAJA::resources::Resource(device_res);} - else { return RAJA::resources::Resource(host_res); } +// Helper function to retrieve a resource based on the run-time policy - if a +// device is active +#if defined(RAJA_ENABLE_CUDA) || defined(RAJA_ENABLE_HIP) || \ + defined(RAJA_ENABLE_SYCL) +template +RAJA::resources::Resource Get_Runtime_Resource(T host_res, + U device_res, + RAJA::ExecPlace device) +{ + if (device == RAJA::ExecPlace::DEVICE) { + return RAJA::resources::Resource(device_res); + } else { + return RAJA::resources::Resource(host_res); + } } #endif -template -RAJA::resources::Resource Get_Host_Resource(T host_res, RAJA::ExecPlace device){ - if(device == RAJA::ExecPlace::DEVICE) {RAJA_ABORT_OR_THROW("Device is not enabled");} +template +RAJA::resources::Resource Get_Host_Resource(T host_res, RAJA::ExecPlace device) +{ + if (device == RAJA::ExecPlace::DEVICE) { + RAJA_ABORT_OR_THROW("Device is not enabled"); + } return RAJA::resources::Resource(host_res); } -//Launch API which takes team resource struct and supports new reducers -template -resources::EventProxy -launch(RAJA::resources::Resource res, LaunchParams const &launch_params, - const char *kernel_name, ReduceParams&&... rest_of_launch_args) +// Launch API which takes team resource struct and supports new reducers +template +resources::EventProxy launch( + RAJA::resources::Resource res, + LaunchParams const &launch_params, + const char *kernel_name, + ReduceParams &&...rest_of_launch_args) { - //Get reducers - auto reducers = expt::make_forall_param_pack(std::forward(rest_of_launch_args)...); + // Get reducers + auto reducers = expt::make_forall_param_pack( + std::forward(rest_of_launch_args)...); - auto&& launch_body = expt::get_lambda(std::forward(rest_of_launch_args)...); + auto &&launch_body = + expt::get_lambda(std::forward(rest_of_launch_args)...); ExecPlace place; - if(res.get_platform() == RAJA::Platform::host) { + if (res.get_platform() == RAJA::Platform::host) { place = RAJA::ExecPlace::HOST; } else { place = RAJA::ExecPlace::DEVICE; } // - //Configure plugins + // Configure plugins // #if defined(RAJA_GPU_ACTIVE) - util::PluginContext context{place == ExecPlace::HOST ? - util::make_context() : - util::make_context()}; + util::PluginContext context{ + place == ExecPlace::HOST + ? util::make_context() + : util::make_context()}; #else - util::PluginContext context{util::make_context()}; + util::PluginContext context{ + util::make_context()}; #endif util::callPreCapturePlugins(context); @@ -432,14 +494,16 @@ launch(RAJA::resources::Resource res, LaunchParams const &launch_params, switch (place) { case ExecPlace::HOST: { using launch_t = LaunchExecute; - resources::EventProxy e_proxy = launch_t::exec(res, launch_params, kernel_name, p_body, reducers); + resources::EventProxy e_proxy = + launch_t::exec(res, launch_params, kernel_name, p_body, reducers); util::callPostLaunchPlugins(context); return e_proxy; } #if defined(RAJA_GPU_ACTIVE) case ExecPlace::DEVICE: { using launch_t = LaunchExecute; - resources::EventProxy e_proxy = launch_t::exec(res, launch_params, kernel_name, p_body, reducers); + resources::EventProxy e_proxy = + launch_t::exec(res, launch_params, kernel_name, p_body, reducers); util::callPostLaunchPlugins(context); return e_proxy; } @@ -456,36 +520,42 @@ launch(RAJA::resources::Resource res, LaunchParams const &launch_params, } -//Duplicate of API above on account that we need to handle the case that a kernel name is not provided -template -resources::EventProxy -launch(RAJA::resources::Resource res, LaunchParams const &launch_params, - ReduceParams&&... rest_of_launch_args) +// Duplicate of API above on account that we need to handle the case that a +// kernel name is not provided +template +resources::EventProxy launch( + RAJA::resources::Resource res, + LaunchParams const &launch_params, + ReduceParams &&...rest_of_launch_args) { const char *kernel_name = nullptr; - //Get reducers - auto reducers = expt::make_forall_param_pack(std::forward(rest_of_launch_args)...); + // Get reducers + auto reducers = expt::make_forall_param_pack( + std::forward(rest_of_launch_args)...); - auto&& launch_body = expt::get_lambda(std::forward(rest_of_launch_args)...); + auto &&launch_body = + expt::get_lambda(std::forward(rest_of_launch_args)...); ExecPlace place; - if(res.get_platform() == RAJA::Platform::host) { + if (res.get_platform() == RAJA::Platform::host) { place = RAJA::ExecPlace::HOST; } else { place = RAJA::ExecPlace::DEVICE; } // - //Configure plugins + // Configure plugins // #if defined(RAJA_GPU_ACTIVE) - util::PluginContext context{place == ExecPlace::HOST ? - util::make_context() : - util::make_context()}; + util::PluginContext context{ + place == ExecPlace::HOST + ? util::make_context() + : util::make_context()}; #else - util::PluginContext context{util::make_context()}; + util::PluginContext context{ + util::make_context()}; #endif util::callPreCapturePlugins(context); @@ -500,14 +570,16 @@ launch(RAJA::resources::Resource res, LaunchParams const &launch_params, switch (place) { case ExecPlace::HOST: { using launch_t = LaunchExecute; - resources::EventProxy e_proxy = launch_t::exec(res, launch_params, kernel_name, p_body, reducers); + resources::EventProxy e_proxy = + launch_t::exec(res, launch_params, kernel_name, p_body, reducers); util::callPostLaunchPlugins(context); return e_proxy; } #if defined(RAJA_GPU_ACTIVE) case ExecPlace::DEVICE: { using launch_t = LaunchExecute; - resources::EventProxy e_proxy = launch_t::exec(res, launch_params, kernel_name, p_body, reducers); + resources::EventProxy e_proxy = + launch_t::exec(res, launch_params, kernel_name, p_body, reducers); util::callPostLaunchPlugins(context); return e_proxy; } @@ -523,7 +595,7 @@ launch(RAJA::resources::Resource res, LaunchParams const &launch_params, return resources::EventProxy(res); } -template +template #if defined(RAJA_GPU_DEVICE_COMPILE_PASS_ACTIVE) using loop_policy = typename POLICY_LIST::device_policy_t; #else @@ -546,8 +618,7 @@ RAJA_HOST_DEVICE RAJA_INLINE void loop(CONTEXT const &ctx, BODY const &body) { - LoopExecute, SEGMENT>::exec(ctx, - segment, body); + LoopExecute, SEGMENT>::exec(ctx, segment, body); } template RAJA_HOST_DEVICE RAJA_INLINE void loop_icount(CONTEXT const &ctx, - SEGMENT const &segment, - BODY const &body) + SEGMENT const &segment, + BODY const &body) { LoopICountExecute, SEGMENT>::exec(ctx, - segment, body); + segment, + body); } namespace expt @@ -578,7 +650,9 @@ RAJA_HOST_DEVICE RAJA_INLINE void loop(CONTEXT const &ctx, { LoopExecute, SEGMENT>::exec(ctx, - segment0, segment1, body); + segment0, + segment1, + body); } RAJA_SUPPRESS_HD_WARN @@ -587,13 +661,15 @@ template RAJA_HOST_DEVICE RAJA_INLINE void loop_icount(CONTEXT const &ctx, - SEGMENT const &segment0, - SEGMENT const &segment1, - BODY const &body) + SEGMENT const &segment0, + SEGMENT const &segment1, + BODY const &body) { LoopICountExecute, SEGMENT>::exec(ctx, - segment0, segment1, body); + segment0, + segment1, + body); } RAJA_SUPPRESS_HD_WARN @@ -608,11 +684,8 @@ RAJA_HOST_DEVICE RAJA_INLINE void loop(CONTEXT const &ctx, BODY const &body) { - LoopExecute, SEGMENT>::exec(ctx, - segment0, - segment1, - segment2, - body); + LoopExecute, SEGMENT>::exec( + ctx, segment0, segment1, segment2, body); } RAJA_SUPPRESS_HD_WARN @@ -621,17 +694,17 @@ template RAJA_HOST_DEVICE RAJA_INLINE void loop_icount(CONTEXT const &ctx, - SEGMENT const &segment0, - SEGMENT const &segment1, - SEGMENT const &segment2, - BODY const &body) + SEGMENT const &segment0, + SEGMENT const &segment1, + SEGMENT const &segment2, + BODY const &body) { - LoopICountExecute, SEGMENT>::exec(ctx, - segment0, segment1, segment2, body); + LoopICountExecute, SEGMENT>::exec( + ctx, segment0, segment1, segment2, body); } -} //namespace expt +} // namespace expt template struct TileExecute; @@ -651,7 +724,9 @@ RAJA_HOST_DEVICE RAJA_INLINE void tile(CONTEXT const &ctx, { TileExecute, SEGMENT>::exec(ctx, - tile_size, segment, body); + tile_size, + segment, + body); } template RAJA_HOST_DEVICE RAJA_INLINE void tile_tcount(CONTEXT const &ctx, - TILE_T tile_size, - SEGMENT const &segment, - BODY const &body) + TILE_T tile_size, + SEGMENT const &segment, + BODY const &body) { TileTCountExecute, SEGMENT>::exec(ctx, - tile_size, segment, body); + tile_size, + segment, + body); } namespace expt @@ -684,8 +761,8 @@ RAJA_HOST_DEVICE RAJA_INLINE void tile(CONTEXT const &ctx, BODY const &body) { - TileExecute, SEGMENT>::exec(ctx, - tile_size0, tile_size1, segment0, segment1, body); + TileExecute, SEGMENT>::exec( + ctx, tile_size0, tile_size1, segment0, segment1, body); } template RAJA_HOST_DEVICE RAJA_INLINE void tile_tcount(CONTEXT const &ctx, - TILE_T tile_size0, - TILE_T tile_size1, - SEGMENT const &segment0, - SEGMENT const &segment1, - BODY const &body) + TILE_T tile_size0, + TILE_T tile_size1, + SEGMENT const &segment0, + SEGMENT const &segment1, + BODY const &body) { - TileTCountExecute, SEGMENT>::exec(ctx, - tile_size0, tile_size1, segment0, segment1, body); + TileTCountExecute, SEGMENT>::exec( + ctx, tile_size0, tile_size1, segment0, segment1, body); } template , SEGMENT>::exec(ctx, - tile_size0, tile_size1, tile_size2, - segment0, segment1, segment2, body); + tile_size0, + tile_size1, + tile_size2, + segment0, + segment1, + segment2, + body); } template RAJA_HOST_DEVICE RAJA_INLINE void tile_tcount(CONTEXT const &ctx, - TILE_T tile_size0, - TILE_T tile_size1, - TILE_T tile_size2, - SEGMENT const &segment0, - SEGMENT const &segment1, - SEGMENT const &segment2, - BODY const &body) + TILE_T tile_size0, + TILE_T tile_size1, + TILE_T tile_size2, + SEGMENT const &segment0, + SEGMENT const &segment1, + SEGMENT const &segment2, + BODY const &body) { TileTCountExecute, SEGMENT>::exec(ctx, - tile_size0, tile_size1, tile_size2, - segment0, segment1, segment2, body); + tile_size0, + tile_size1, + tile_size2, + segment0, + segment1, + segment2, + body); } -} //namespace expt +} // namespace expt } // namespace RAJA #endif diff --git a/include/RAJA/pattern/multi_reduce.hpp b/include/RAJA/pattern/multi_reduce.hpp index 3fbe36877c..65e32c9656 100644 --- a/include/RAJA/pattern/multi_reduce.hpp +++ b/include/RAJA/pattern/multi_reduce.hpp @@ -19,7 +19,6 @@ #define RAJA_multi_reduce_HPP #include "RAJA/config.hpp" - #include "RAJA/util/Operators.hpp" #include "RAJA/util/macros.hpp" @@ -156,7 +155,7 @@ struct MultiReduceSum; */ template struct MultiReduceBitOr; - + /*! ****************************************************************************** @@ -171,7 +170,8 @@ struct MultiReduceBitOr; Index_ptr bins = ...; Real_ptr bit_vals = ...; - MultiReduceBitAnd my_bits(num_bins, init_val); + MultiReduceBitAnd my_bits(num_bins, + init_val); forall( ..., [=] (Index_type i) { my_bits[bins[i]] &= (data[i]); @@ -188,7 +188,7 @@ struct MultiReduceBitOr; template struct MultiReduceBitAnd; -} //namespace RAJA +} // namespace RAJA #endif // closing endif for header file include guard diff --git a/include/RAJA/pattern/params/forall.hpp b/include/RAJA/pattern/params/forall.hpp index 5a656206f5..eefcdf9652 100644 --- a/include/RAJA/pattern/params/forall.hpp +++ b/include/RAJA/pattern/params/forall.hpp @@ -1,19 +1,18 @@ #ifndef FORALL_PARAM_HPP #define FORALL_PARAM_HPP -#include "RAJA/policy/sequential/params/reduce.hpp" -#include "RAJA/policy/sequential/params/kernel_name.hpp" -#include "RAJA/policy/openmp/params/reduce.hpp" -#include "RAJA/policy/openmp/params/kernel_name.hpp" -#include "RAJA/policy/openmp_target/params/reduce.hpp" -#include "RAJA/policy/openmp_target/params/kernel_name.hpp" -#include "RAJA/policy/cuda/params/reduce.hpp" #include "RAJA/policy/cuda/params/kernel_name.hpp" -#include "RAJA/policy/hip/params/reduce.hpp" +#include "RAJA/policy/cuda/params/reduce.hpp" #include "RAJA/policy/hip/params/kernel_name.hpp" -#include "RAJA/policy/sycl/params/reduce.hpp" +#include "RAJA/policy/hip/params/reduce.hpp" +#include "RAJA/policy/openmp/params/kernel_name.hpp" +#include "RAJA/policy/openmp/params/reduce.hpp" +#include "RAJA/policy/openmp_target/params/kernel_name.hpp" +#include "RAJA/policy/openmp_target/params/reduce.hpp" +#include "RAJA/policy/sequential/params/kernel_name.hpp" +#include "RAJA/policy/sequential/params/reduce.hpp" #include "RAJA/policy/sycl/params/kernel_name.hpp" - +#include "RAJA/policy/sycl/params/reduce.hpp" #include "RAJA/util/CombiningAdapter.hpp" namespace RAJA @@ -21,348 +20,443 @@ namespace RAJA namespace expt { - // - // - // Forall Parameter Packing type - // - // - struct ParamMultiplexer; - - template - struct ForallParamPack { - - friend struct ParamMultiplexer; - - using Base = camp::tuple; - Base param_tup; - - static constexpr size_t param_tup_sz = camp::tuple_size::value; - using params_seq = camp::make_idx_seq_t< param_tup_sz >; - - private: - - // Init - template - static constexpr void detail_init(EXEC_POL, camp::idx_seq, ForallParamPack& f_params, Args&& ...args) { - CAMP_EXPAND(expt::detail::init( camp::get(f_params.param_tup), std::forward(args)... )); - } - - // Combine - template - RAJA_HOST_DEVICE - static constexpr void detail_combine(EXEC_POL, camp::idx_seq, ForallParamPack& out, const ForallParamPack& in ) { - CAMP_EXPAND(detail::combine( camp::get(out.param_tup), camp::get(in.param_tup))); - } - - template - RAJA_HOST_DEVICE - static constexpr void detail_combine(EXEC_POL, camp::idx_seq, ForallParamPack& f_params ) { - CAMP_EXPAND(detail::combine( camp::get(f_params.param_tup) )); - } - - // Resolve - template - static constexpr void detail_resolve(EXEC_POL, camp::idx_seq, ForallParamPack& f_params, Args&& ...args) { - CAMP_EXPAND(detail::resolve( camp::get(f_params.param_tup), std::forward(args)... )); - } - - // Used to construct the argument TYPES that will be invoked with the lambda. - template - static constexpr auto LAMBDA_ARG_TUP_T() { return camp::tuple<>{}; }; - template - static constexpr auto LAMBDA_ARG_TUP_T() { return typename First::ARG_TUP_T(); }; - template - static constexpr auto LAMBDA_ARG_TUP_T() { return camp::tuple_cat_pair(typename First::ARG_TUP_T(), LAMBDA_ARG_TUP_T()); }; - - using lambda_arg_tuple_t = decltype(LAMBDA_ARG_TUP_T()); - - //Use the size of param_tup to generate the argument list. - RAJA_HOST_DEVICE constexpr auto LAMBDA_ARG_TUP_V(camp::num<0>) { return camp::make_tuple(); } - RAJA_HOST_DEVICE constexpr auto LAMBDA_ARG_TUP_V(camp::num<1>) { return camp::get(param_tup).get_lambda_arg_tup(); } - template - RAJA_HOST_DEVICE constexpr auto LAMBDA_ARG_TUP_V(camp::num) { - return camp::tuple_cat_pair( camp::get(param_tup).get_lambda_arg_tup(), LAMBDA_ARG_TUP_V(camp::num()) ); - } - - public: - ForallParamPack(){} - - RAJA_HOST_DEVICE constexpr lambda_arg_tuple_t lambda_args() {return LAMBDA_ARG_TUP_V(camp::num());} - - using lambda_arg_seq = camp::make_idx_seq_t::value>; - - template - ForallParamPack(camp::tuple&& t) : param_tup(std::move(t)) {}; - }; // struct ForallParamPack - - - - //=========================================================================== - // - // - // ParamMultiplexer is how we hook into the individual calls within forall_impl. - // - // - struct ParamMultiplexer { - template> - static void constexpr init( ForallParamPack& f_params, Args&& ...args) { - FP::detail_init(EXEC_POL(),typename FP::params_seq(), f_params, std::forward(args)... ); - } - template> - static void constexpr combine(ForallParamPack& f_params, Args&& ...args){ - FP::detail_combine(EXEC_POL(), typename FP::params_seq(), f_params, std::forward(args)... ); - } - template> - static void constexpr resolve( ForallParamPack& f_params, Args&& ...args){ - FP::detail_resolve(EXEC_POL(), typename FP::params_seq(), f_params, std::forward(args)... ); - } - }; - //=========================================================================== +// +// +// Forall Parameter Packing type +// +// +struct ParamMultiplexer; +template +struct ForallParamPack { + friend struct ParamMultiplexer; - //=========================================================================== - // - // - // ForallParamPack generators. - // - // - RAJA_INLINE static auto get_empty_forall_param_pack(){ - static ForallParamPack<> p; - return p; - } + using Base = camp::tuple; + Base param_tup; - namespace detail { - // all_true trick to perform variadic expansion in static asserts. - // https://stackoverflow.com/questions/36933176/how-do-you-static-assert-the-values-in-a-parameter-pack-of-a-variadic-template - template struct bool_pack; - template - using all_true = std::is_same, bool_pack>; + static constexpr size_t param_tup_sz = camp::tuple_size::value; + using params_seq = camp::make_idx_seq_t; - template - using check_types_derive_base = all_true::value...>; - } // namespace detail +private: + // Init + template + static constexpr void detail_init(EXEC_POL, + camp::idx_seq, + ForallParamPack& f_params, + Args&&... args) + { + CAMP_EXPAND(expt::detail::init(camp::get(f_params.param_tup), + std::forward(args)...)); + } + // Combine + template + RAJA_HOST_DEVICE static constexpr void detail_combine( + EXEC_POL, + camp::idx_seq, + ForallParamPack& out, + const ForallParamPack& in) + { + CAMP_EXPAND(detail::combine(camp::get(out.param_tup), + camp::get(in.param_tup))); + } + + template + RAJA_HOST_DEVICE static constexpr void detail_combine( + EXEC_POL, + camp::idx_seq, + ForallParamPack& f_params) + { + CAMP_EXPAND(detail::combine(camp::get(f_params.param_tup))); + } - template - constexpr auto make_forall_param_pack_from_tuple(camp::tuple&& tuple) { - static_assert(detail::check_types_derive_base...>::value, - "Forall optional arguments do not derive ForallParamBase. Please see Reducer, ReducerLoc and KernelName for examples.") ; - return ForallParamPack...>(std::move(tuple)); + // Resolve + template + static constexpr void detail_resolve(EXEC_POL, + camp::idx_seq, + ForallParamPack& f_params, + Args&&... args) + { + CAMP_EXPAND(detail::resolve(camp::get(f_params.param_tup), + std::forward(args)...)); } - + // Used to construct the argument TYPES that will be invoked with the lambda. + template + static constexpr auto LAMBDA_ARG_TUP_T() + { + return camp::tuple<>{}; + }; + template + static constexpr auto LAMBDA_ARG_TUP_T() + { + return typename First::ARG_TUP_T(); + }; + template + static constexpr auto LAMBDA_ARG_TUP_T() + { + return camp::tuple_cat_pair(typename First::ARG_TUP_T(), + LAMBDA_ARG_TUP_T()); + }; - namespace detail { - // Maybe we should do a lot of these with structs... - template - constexpr auto tuple_from_seq (const camp::idx_seq&, TupleType&& tuple){ - return camp::forward_as_tuple( camp::get< Seq >(std::forward(tuple))... ); - }; + using lambda_arg_tuple_t = decltype(LAMBDA_ARG_TUP_T()); - template - constexpr auto strip_last_elem(camp::tuple&& tuple){ - return tuple_from_seq(camp::make_idx_seq_t{},std::move(tuple)); - }; - } // namespace detail + // Use the size of param_tup to generate the argument list. + RAJA_HOST_DEVICE constexpr auto LAMBDA_ARG_TUP_V(camp::num<0>) + { + return camp::make_tuple(); + } + RAJA_HOST_DEVICE constexpr auto LAMBDA_ARG_TUP_V(camp::num<1>) + { + return camp::get(param_tup).get_lambda_arg_tup(); + } + template + RAJA_HOST_DEVICE constexpr auto LAMBDA_ARG_TUP_V(camp::num) + { + return camp::tuple_cat_pair( + camp::get(param_tup).get_lambda_arg_tup(), + LAMBDA_ARG_TUP_V(camp::num())); + } +public: + ForallParamPack() {} - // Make a tuple of the param pack except the final element... - template - constexpr auto make_forall_param_pack(Args&&... args){ - // We assume the last element of the pack is the lambda so we need to strip it from the list. - auto stripped_arg_tuple = detail::strip_last_elem( camp::forward_as_tuple(std::forward(args)...) ); - return make_forall_param_pack_from_tuple(std::move(stripped_arg_tuple)); + RAJA_HOST_DEVICE constexpr lambda_arg_tuple_t lambda_args() + { + return LAMBDA_ARG_TUP_V(camp::num()); } - //=========================================================================== - - - - //=========================================================================== - // - // - // Callable should be the last argument in the param pack, just extract it... - // - // - template - constexpr auto&& get_lambda(Args&&... args){ - return camp::get( camp::forward_as_tuple(std::forward(args)...) ); - } - //=========================================================================== - - - - //=========================================================================== - // - // - // Checking expected argument list against the assumed lambda. - // - // - namespace detail { - - // - // - // Lambda traits Utilities - // - // - template - struct lambda_traits; - - template - struct lambda_traits - { // non-const specialization - using arg_type = First; - }; - template - struct lambda_traits - { // const specialization - using arg_type = First; - }; - - template - typename lambda_traits::arg_type* lambda_arg_helper(T); - - - // - // - // List manipulation Utilities - // - // - template - constexpr auto list_remove_pointer(const camp::list&){ - return camp::list::type>...>{}; - } - - template - constexpr auto list_add_lvalue_ref(const camp::list&){ - return camp::list::type...>{}; - } - - template - constexpr auto tuple_to_list(const camp::tuple&) { - return camp::list{}; - } - - // TODO : Change to std::is_invocable at c++17 - template - struct is_invocable : - std::is_constructible< - std::function, - std::reference_wrapper::type> - >{}; - - template - using void_t = void; - - template - struct has_empty_op : std::false_type{}; - - template - struct has_empty_op)>> : std::true_type{}; - - template - struct get_lambda_index_type { - typedef typename std::remove_pointer< - decltype(lambda_arg_helper( - &camp::decay::operator()) - ) - >::type type; - }; - - // If LAMBDA::operator() is not available this probably isn't a generic lambda and we can't extract and check args. - template - constexpr concepts::enable_if>> check_invocable(LAMBDA&&, const camp::list&) {} - - template - constexpr concepts::enable_if> check_invocable(LAMBDA&&, const camp::list&) { -#if !defined(RAJA_ENABLE_HIP) - static_assert(is_invocable::type, EXPECTED_ARGS...>::value, "LAMBDA Not invocable w/ EXPECTED_ARGS. Ordering and types must match between RAJA::expt::Reduce() and ValOp arguments."); -#endif - } - } // namespace detail + using lambda_arg_seq = + camp::make_idx_seq_t::value>; + + template + ForallParamPack(camp::tuple&& t) : param_tup(std::move(t)){}; +}; // struct ForallParamPack + + +//=========================================================================== +// +// +// ParamMultiplexer is how we hook into the individual calls within forall_impl. +// +// +struct ParamMultiplexer { + template > + static void constexpr init(ForallParamPack& f_params, + Args&&... args) + { + FP::detail_init(EXEC_POL(), + typename FP::params_seq(), + f_params, + std::forward(args)...); + } + template > + static void constexpr combine(ForallParamPack& f_params, + Args&&... args) + { + FP::detail_combine(EXEC_POL(), + typename FP::params_seq(), + f_params, + std::forward(args)...); + } + template > + static void constexpr resolve(ForallParamPack& f_params, + Args&&... args) + { + FP::detail_resolve(EXEC_POL(), + typename FP::params_seq(), + f_params, + std::forward(args)...); + } +}; +//=========================================================================== + +//=========================================================================== +// +// +// ForallParamPack generators. +// +// +RAJA_INLINE static auto get_empty_forall_param_pack() +{ + static ForallParamPack<> p; + return p; +} - template - constexpr - void - check_forall_optional_args(Lambda&& l, ForallParams& fpp) { +namespace detail +{ +// all_true trick to perform variadic expansion in static asserts. +// https://stackoverflow.com/questions/36933176/how-do-you-static-assert-the-values-in-a-parameter-pack-of-a-variadic-template +template +struct bool_pack; +template +using all_true = std::is_same, bool_pack>; - using expected_arg_type_list = decltype( detail::list_add_lvalue_ref( - detail::list_remove_pointer( - detail::tuple_to_list( - fpp.lambda_args() - ) - ) - )); +template +using check_types_derive_base = + all_true::value...>; +} // namespace detail - detail::check_invocable(std::forward(l), expected_arg_type_list{}); - } - //=========================================================================== - +template +constexpr auto make_forall_param_pack_from_tuple(camp::tuple&& tuple) +{ + static_assert(detail::check_types_derive_base...>::value, + "Forall optional arguments do not derive ForallParamBase. " + "Please see Reducer, ReducerLoc and KernelName for examples."); + return ForallParamPack...>(std::move(tuple)); +} - //=========================================================================== - // - // - // Type trailts for SFINAE work. - // - // - namespace type_traits - { - template struct is_ForallParamPack : std::false_type {}; - template struct is_ForallParamPack> : std::true_type {}; - template struct is_ForallParamPack_empty : std::true_type {}; - template struct is_ForallParamPack_empty> : std::false_type {}; - template <> struct is_ForallParamPack_empty> : std::true_type {}; - } - //=========================================================================== - - - - //=========================================================================== - // - // - // Invoke Forall with Params. - // - // - namespace detail { - template - RAJA_HOST_DEVICE - constexpr - auto get_lambda_args(FP& fpp) - -> decltype( *camp::get( fpp.lambda_args() ) ) { - return ( *camp::get( fpp.lambda_args() ) ); - } - - CAMP_SUPPRESS_HD_WARN - template - RAJA_HOST_DEVICE constexpr auto invoke_with_order(Params&& params, - Fn&& f, - camp::idx_seq, - Ts&&... extra) - { - return f(std::forward(extra...), ( get_lambda_args(params) )...); - } - } // namespace detail - - //CAMP_SUPPRESS_HD_WARN - template - RAJA_HOST_DEVICE constexpr auto invoke_body(Params&& params, Fn&& f, Ts&&... extra) - { - return detail::invoke_with_order( - camp::forward(params), - camp::forward(f), - typename camp::decay::lambda_arg_seq(), - camp::forward(extra)...); - } - //=========================================================================== +namespace detail +{ +// Maybe we should do a lot of these with structs... +template +constexpr auto tuple_from_seq(const camp::idx_seq&, TupleType&& tuple) +{ + return camp::forward_as_tuple( + camp::get(std::forward(tuple))...); +}; + +template +constexpr auto strip_last_elem(camp::tuple&& tuple) +{ + return tuple_from_seq(camp::make_idx_seq_t{}, + std::move(tuple)); +}; +} // namespace detail -} // namespace expt -} // namespace RAJA -#endif // FORALL_PARAM_HPP +// Make a tuple of the param pack except the final element... +template +constexpr auto make_forall_param_pack(Args&&... args) +{ + // We assume the last element of the pack is the lambda so we need to strip it + // from the list. + auto stripped_arg_tuple = detail::strip_last_elem( + camp::forward_as_tuple(std::forward(args)...)); + return make_forall_param_pack_from_tuple(std::move(stripped_arg_tuple)); +} +//=========================================================================== + + +//=========================================================================== +// +// +// Callable should be the last argument in the param pack, just extract it... +// +// +template +constexpr auto&& get_lambda(Args&&... args) +{ + return camp::get( + camp::forward_as_tuple(std::forward(args)...)); +} +//=========================================================================== + + +//=========================================================================== +// +// +// Checking expected argument list against the assumed lambda. +// +// +namespace detail +{ + +// +// +// Lambda traits Utilities +// +// +template +struct lambda_traits; + +template +struct lambda_traits { // non-const specialization + using arg_type = First; +}; +template +struct lambda_traits { // const specialization + using arg_type = First; +}; + +template +typename lambda_traits::arg_type* lambda_arg_helper(T); + + +// +// +// List manipulation Utilities +// +// +template +constexpr auto list_remove_pointer(const camp::list&) +{ + return camp::list::type>...>{}; +} + +template +constexpr auto list_add_lvalue_ref(const camp::list&) +{ + return camp::list::type...>{}; +} + +template +constexpr auto tuple_to_list(const camp::tuple&) +{ + return camp::list{}; +} + +// TODO : Change to std::is_invocable at c++17 +template +struct is_invocable + : std::is_constructible< + std::function, + std::reference_wrapper::type>> { +}; + +template +using void_t = void; + +template +struct has_empty_op : std::false_type { +}; + +template +struct has_empty_op)>> + : std::true_type { +}; + +template +struct get_lambda_index_type { + typedef typename std::remove_pointer::operator()))>::type type; +}; + +// If LAMBDA::operator() is not available this probably isn't a generic lambda +// and we can't extract and check args. +template +constexpr concepts::enable_if>> +check_invocable(LAMBDA&&, const camp::list&) +{ +} + +template +constexpr concepts::enable_if> check_invocable( + LAMBDA&&, + const camp::list&) +{ +#if !defined(RAJA_ENABLE_HIP) + static_assert(is_invocable::type, + EXPECTED_ARGS...>::value, + "LAMBDA Not invocable w/ EXPECTED_ARGS. Ordering and types " + "must match between RAJA::expt::Reduce() and ValOp arguments."); +#endif +} + +} // namespace detail + + +template +constexpr void check_forall_optional_args(Lambda&& l, ForallParams& fpp) +{ + + using expected_arg_type_list = decltype(detail::list_add_lvalue_ref( + detail::list_remove_pointer(detail::tuple_to_list(fpp.lambda_args())))); + + detail::check_invocable(std::forward(l), expected_arg_type_list{}); +} +//=========================================================================== + + +//=========================================================================== +// +// +// Type trailts for SFINAE work. +// +// +namespace type_traits +{ +template +struct is_ForallParamPack : std::false_type { +}; +template +struct is_ForallParamPack> : std::true_type { +}; + +template +struct is_ForallParamPack_empty : std::true_type { +}; +template +struct is_ForallParamPack_empty> + : std::false_type { +}; +template <> +struct is_ForallParamPack_empty> : std::true_type { +}; +} // namespace type_traits +//=========================================================================== + + +//=========================================================================== +// +// +// Invoke Forall with Params. +// +// +namespace detail +{ +template +RAJA_HOST_DEVICE constexpr auto get_lambda_args(FP& fpp) + -> decltype(*camp::get(fpp.lambda_args())) +{ + return (*camp::get(fpp.lambda_args())); +} + +CAMP_SUPPRESS_HD_WARN +template +RAJA_HOST_DEVICE constexpr auto invoke_with_order(Params&& params, + Fn&& f, + camp::idx_seq, + Ts&&... extra) +{ + return f(std::forward(extra...), + (get_lambda_args(params))...); +} +} // namespace detail + +// CAMP_SUPPRESS_HD_WARN +template +RAJA_HOST_DEVICE constexpr auto invoke_body(Params&& params, + Fn&& f, + Ts&&... extra) +{ + return detail::invoke_with_order( + camp::forward(params), + camp::forward(f), + typename camp::decay::lambda_arg_seq(), + camp::forward(extra)...); +} +//=========================================================================== + +} // namespace expt +} // namespace RAJA + +#endif // FORALL_PARAM_HPP diff --git a/include/RAJA/pattern/params/kernel_name.hpp b/include/RAJA/pattern/params/kernel_name.hpp index e768d8dd59..5b69c89a71 100644 --- a/include/RAJA/pattern/params/kernel_name.hpp +++ b/include/RAJA/pattern/params/kernel_name.hpp @@ -10,23 +10,19 @@ namespace expt namespace detail { - struct KernelName : public ForallParamBase { - RAJA_HOST_DEVICE KernelName() {} - KernelName(const char* name_in) : name(name_in) {} - const char* name; - }; +struct KernelName : public ForallParamBase { + RAJA_HOST_DEVICE KernelName() {} + KernelName(const char* name_in) : name(name_in) {} + const char* name; +}; -} // namespace detail - -inline auto KernelName(const char * n) -{ - return detail::KernelName(n); -} -} // namespace expt +} // namespace detail +inline auto KernelName(const char* n) { return detail::KernelName(n); } +} // namespace expt -} // namespace RAJA +} // namespace RAJA -#endif // KERNEL_NAME_HPP +#endif // KERNEL_NAME_HPP diff --git a/include/RAJA/pattern/params/params_base.hpp b/include/RAJA/pattern/params/params_base.hpp index 98380f6ffc..debfd66282 100644 --- a/include/RAJA/pattern/params/params_base.hpp +++ b/include/RAJA/pattern/params/params_base.hpp @@ -7,129 +7,253 @@ namespace RAJA namespace expt { - template - struct ValLoc { - using index_type = IndexType; - using value_type = T; - - ValLoc() = default; - RAJA_HOST_DEVICE constexpr explicit ValLoc(value_type v) : val(v) {} - RAJA_HOST_DEVICE constexpr ValLoc(value_type v, index_type l) : val(v), loc(l) {} - - ValLoc(ValLoc const &) = default; - ValLoc(ValLoc &&) = default; - ValLoc& operator=(ValLoc const &) = default; - ValLoc& operator=(ValLoc &&) = default; - - RAJA_HOST_DEVICE constexpr bool operator<(const ValLoc& rhs) const { return val < rhs.val; } - RAJA_HOST_DEVICE constexpr bool operator>(const ValLoc& rhs) const { return val > rhs.val; } - - RAJA_HOST_DEVICE constexpr const value_type& getVal() const {return val;} - RAJA_HOST_DEVICE constexpr const index_type& getLoc() const {return loc;} - - RAJA_HOST_DEVICE void set(T inval, IndexType inindex) {val = inval; loc = inindex;} - RAJA_HOST_DEVICE void setVal(T inval) {val = inval;} - RAJA_HOST_DEVICE void setLoc(IndexType inindex) {loc = inindex;} - - value_type val; - index_type loc = -1; - }; - - template class Op> - struct ValOp { - using value_type = T; - using op_type = Op; - - ValOp() = default; - RAJA_HOST_DEVICE constexpr explicit ValOp(value_type v) : val(v) {} - - ValOp(ValOp const &) = default; - ValOp(ValOp &&) = default; - ValOp& operator=(ValOp const &) = default; - ValOp& operator=(ValOp &&) = default; - - template >::value> * = nullptr> - RAJA_HOST_DEVICE constexpr ValOp & min(value_type v) { if (v < val) { val = v; } return *this; } - template >::value> * = nullptr> - RAJA_HOST_DEVICE constexpr ValOp & max(value_type v) { if (v > val) { val = v; } return *this; } - - template >::value> * = nullptr> - RAJA_HOST_DEVICE constexpr ValOp & operator+=(const value_type& rhs) { val += rhs; return *this; } - - template >::value> * = nullptr> - RAJA_HOST_DEVICE constexpr ValOp & operator&=(const value_type& rhs) { val &= rhs; return *this; } - - template >::value> * = nullptr> - RAJA_HOST_DEVICE constexpr ValOp & operator|=(const value_type& rhs) { val |= rhs; return *this; } - - template >::value> * = nullptr> - RAJA_HOST_DEVICE ValOp & operator&=(value_type& rhs) { val &= rhs; return *this; } - - template >::value> * = nullptr> - RAJA_HOST_DEVICE ValOp & operator|=(value_type& rhs) { val |= rhs; return *this; } - - RAJA_HOST_DEVICE constexpr bool operator<(const ValOp& rhs) const { val < rhs.val; return *this; } - RAJA_HOST_DEVICE constexpr bool operator>(const ValOp& rhs) const { val > rhs.val; return *this; } - - value_type val = op_type::identity(); - }; - - template class Op> - struct ValOp , Op> { - using index_type = IndexType; - using value_type = ValLoc; - using op_type = Op; - using valloc_value_type = typename value_type::value_type; - using valloc_index_type = typename value_type::index_type; - - ValOp() = default; - RAJA_HOST_DEVICE constexpr explicit ValOp(value_type v) : val(v) {} - RAJA_HOST_DEVICE constexpr ValOp(valloc_value_type v, valloc_index_type l) : val(v, l) {} - - ValOp(ValOp const &) = default; - ValOp(ValOp &&) = default; - ValOp& operator=(ValOp const &) = default; - ValOp& operator=(ValOp &&) = default; - - template >::value> * = nullptr> - RAJA_HOST_DEVICE constexpr ValOp & min(value_type v) { if (v < val) { val = v; } return *this; } - - template >::value> * = nullptr> - RAJA_HOST_DEVICE constexpr ValOp & max(value_type v) { if (v > val) { val = v; } return *this; } - - template >::value> * = nullptr> - RAJA_HOST_DEVICE constexpr ValOp & minloc(valloc_value_type v, valloc_index_type l) { return min(value_type(v,l)); } - - template >::value> * = nullptr> - RAJA_HOST_DEVICE constexpr ValOp & maxloc(valloc_value_type v, valloc_index_type l) { return max(value_type(v,l)); } - - RAJA_HOST_DEVICE constexpr bool operator<(const ValOp& rhs) const { return val < rhs.val; } - RAJA_HOST_DEVICE constexpr bool operator>(const ValOp& rhs) const { return val > rhs.val; } - - value_type val = op_type::identity(); - }; - - template class Op> - using ValLocOp = ValOp, Op>; +template +struct ValLoc { + using index_type = IndexType; + using value_type = T; + + ValLoc() = default; + RAJA_HOST_DEVICE constexpr explicit ValLoc(value_type v) : val(v) {} + RAJA_HOST_DEVICE constexpr ValLoc(value_type v, index_type l) : val(v), loc(l) + { + } + + ValLoc(ValLoc const&) = default; + ValLoc(ValLoc&&) = default; + ValLoc& operator=(ValLoc const&) = default; + ValLoc& operator=(ValLoc&&) = default; + + RAJA_HOST_DEVICE constexpr bool operator<(const ValLoc& rhs) const + { + return val < rhs.val; + } + RAJA_HOST_DEVICE constexpr bool operator>(const ValLoc& rhs) const + { + return val > rhs.val; + } + + RAJA_HOST_DEVICE constexpr const value_type& getVal() const { return val; } + RAJA_HOST_DEVICE constexpr const index_type& getLoc() const { return loc; } + + RAJA_HOST_DEVICE void set(T inval, IndexType inindex) + { + val = inval; + loc = inindex; + } + RAJA_HOST_DEVICE void setVal(T inval) { val = inval; } + RAJA_HOST_DEVICE void setLoc(IndexType inindex) { loc = inindex; } + + value_type val; + index_type loc = -1; +}; + +template class Op> +struct ValOp { + using value_type = T; + using op_type = Op; + + ValOp() = default; + RAJA_HOST_DEVICE constexpr explicit ValOp(value_type v) : val(v) {} + + ValOp(ValOp const&) = default; + ValOp(ValOp&&) = default; + ValOp& operator=(ValOp const&) = default; + ValOp& operator=(ValOp&&) = default; + + template < + typename U = op_type, + std::enable_if_t< + std::is_same>::value>* = nullptr> + RAJA_HOST_DEVICE constexpr ValOp& min(value_type v) + { + if (v < val) { + val = v; + } + return *this; + } + template < + typename U = op_type, + std::enable_if_t< + std::is_same>::value>* = nullptr> + RAJA_HOST_DEVICE constexpr ValOp& max(value_type v) + { + if (v > val) { + val = v; + } + return *this; + } + + template < + typename U = op_type, + std::enable_if_t< + std::is_same>::value>* = nullptr> + RAJA_HOST_DEVICE constexpr ValOp& operator+=(const value_type& rhs) + { + val += rhs; + return *this; + } + + template < + typename U = op_type, + std::enable_if_t< + std::is_same>::value>* = nullptr> + RAJA_HOST_DEVICE constexpr ValOp& operator&=(const value_type& rhs) + { + val &= rhs; + return *this; + } + + template < + typename U = op_type, + std::enable_if_t< + std::is_same>::value>* = nullptr> + RAJA_HOST_DEVICE constexpr ValOp& operator|=(const value_type& rhs) + { + val |= rhs; + return *this; + } + + template < + typename U = op_type, + std::enable_if_t< + std::is_same>::value>* = nullptr> + RAJA_HOST_DEVICE ValOp& operator&=(value_type& rhs) + { + val &= rhs; + return *this; + } + + template < + typename U = op_type, + std::enable_if_t< + std::is_same>::value>* = nullptr> + RAJA_HOST_DEVICE ValOp& operator|=(value_type& rhs) + { + val |= rhs; + return *this; + } + + RAJA_HOST_DEVICE constexpr bool operator<(const ValOp& rhs) const + { + val < rhs.val; + return *this; + } + RAJA_HOST_DEVICE constexpr bool operator>(const ValOp& rhs) const + { + val > rhs.val; + return *this; + } + + value_type val = op_type::identity(); +}; + +template + class Op> +struct ValOp, Op> { + using index_type = IndexType; + using value_type = ValLoc; + using op_type = Op; + using valloc_value_type = typename value_type::value_type; + using valloc_index_type = typename value_type::index_type; + + ValOp() = default; + RAJA_HOST_DEVICE constexpr explicit ValOp(value_type v) : val(v) {} + RAJA_HOST_DEVICE constexpr ValOp(valloc_value_type v, valloc_index_type l) + : val(v, l) + { + } + + ValOp(ValOp const&) = default; + ValOp(ValOp&&) = default; + ValOp& operator=(ValOp const&) = default; + ValOp& operator=(ValOp&&) = default; + + template >:: + value>* = nullptr> + RAJA_HOST_DEVICE constexpr ValOp& min(value_type v) + { + if (v < val) { + val = v; + } + return *this; + } + + template >:: + value>* = nullptr> + RAJA_HOST_DEVICE constexpr ValOp& max(value_type v) + { + if (v > val) { + val = v; + } + return *this; + } + + template >:: + value>* = nullptr> + RAJA_HOST_DEVICE constexpr ValOp& minloc(valloc_value_type v, + valloc_index_type l) + { + return min(value_type(v, l)); + } + + template >:: + value>* = nullptr> + RAJA_HOST_DEVICE constexpr ValOp& maxloc(valloc_value_type v, + valloc_index_type l) + { + return max(value_type(v, l)); + } + + RAJA_HOST_DEVICE constexpr bool operator<(const ValOp& rhs) const + { + return val < rhs.val; + } + RAJA_HOST_DEVICE constexpr bool operator>(const ValOp& rhs) const + { + return val > rhs.val; + } + + value_type val = op_type::identity(); +}; + +template + class Op> +using ValLocOp = ValOp, Op>; namespace detail { - struct ForallParamBase { +struct ForallParamBase { - // Some of this can be made virtual in c++20, for now must be defined in each child class - // if any arguments to the forall lambda are needed (e.g. KernelName is excluded.) - using ARG_TUP_T = camp::tuple<>; - using ARG_LIST_T = typename ARG_TUP_T::TList; - RAJA_HOST_DEVICE ARG_TUP_T get_lambda_arg_tup() { return camp::make_tuple(); } - static constexpr size_t num_lambda_args = camp::tuple_size::value; - - }; + // Some of this can be made virtual in c++20, for now must be defined in each + // child class if any arguments to the forall lambda are needed (e.g. + // KernelName is excluded.) + using ARG_TUP_T = camp::tuple<>; + using ARG_LIST_T = typename ARG_TUP_T::TList; + RAJA_HOST_DEVICE ARG_TUP_T get_lambda_arg_tup() { return camp::make_tuple(); } + static constexpr size_t num_lambda_args = camp::tuple_size::value; +}; -} // namespace detail +} // namespace detail -} // namespace expt +} // namespace expt -} // namespace RAJA +} // namespace RAJA -#endif // RAJA_PARAMS_BASE +#endif // RAJA_PARAMS_BASE diff --git a/include/RAJA/pattern/params/reducer.hpp b/include/RAJA/pattern/params/reducer.hpp index 78b6d7714d..89e020ec55 100644 --- a/include/RAJA/pattern/params/reducer.hpp +++ b/include/RAJA/pattern/params/reducer.hpp @@ -20,19 +20,21 @@ namespace operators template struct limits> { - RAJA_INLINE RAJA_HOST_DEVICE static constexpr RAJA::expt::ValLoc min() + RAJA_INLINE RAJA_HOST_DEVICE static constexpr RAJA::expt::ValLoc + min() { return RAJA::expt::ValLoc(RAJA::operators::limits::min()); } - RAJA_INLINE RAJA_HOST_DEVICE static constexpr RAJA::expt::ValLoc max() + RAJA_INLINE RAJA_HOST_DEVICE static constexpr RAJA::expt::ValLoc + max() { return RAJA::expt::ValLoc(RAJA::operators::limits::max()); } }; -} // namespace operators +} // namespace operators -} // namespace RAJA +} // namespace RAJA namespace RAJA { @@ -43,159 +45,197 @@ namespace detail { #if defined(RAJA_CUDA_ACTIVE) - using device_mem_pool_t = RAJA::cuda::device_mempool_type; +using device_mem_pool_t = RAJA::cuda::device_mempool_type; #elif defined(RAJA_HIP_ACTIVE) - using device_mem_pool_t = RAJA::hip::device_mempool_type; +using device_mem_pool_t = RAJA::hip::device_mempool_type; #elif defined(RAJA_SYCL_ACTIVE) - using device_mem_pool_t = RAJA::sycl::device_mempool_type; +using device_mem_pool_t = RAJA::sycl::device_mempool_type; #endif - // - // - // Basic Reducer - // - // - - // Basic data type Reducer - // T must be a basic data type - // VOp must be ValOp - template - struct Reducer : public ForallParamBase { - using op = Op; - using value_type = T; // This is a basic data type - - Reducer() = default; - - // Basic data type constructor - RAJA_HOST_DEVICE Reducer(value_type *target_in) : m_valop(VOp{}), target(target_in){} - - Reducer(Reducer const &) = default; - Reducer(Reducer &&) = default; - Reducer& operator=(Reducer const &) = default; - Reducer& operator=(Reducer &&) = default; - - // Internal ValOp object that is used within RAJA::forall/launch - VOp m_valop = VOp{}; - - // Points to the user specified result variable - value_type *target = nullptr; - - // combineTarget() performs the final op on the target data and location in resolve() - RAJA_HOST_DEVICE void combineTarget(value_type in) - { - value_type temp = op{}(*target, in); - *target = temp; - } - - RAJA_HOST_DEVICE - value_type & - getVal() { return m_valop.val; } - -#if defined(RAJA_CUDA_ACTIVE) || defined(RAJA_HIP_ACTIVE) || defined(RAJA_SYCL_ACTIVE) - // Device related attributes. - value_type * devicetarget = nullptr; - RAJA::detail::SoAPtr device_mem; - unsigned int * device_count = nullptr; +// +// +// Basic Reducer +// +// + +// Basic data type Reducer +// T must be a basic data type +// VOp must be ValOp +template +struct Reducer : public ForallParamBase { + using op = Op; + using value_type = T; // This is a basic data type + + Reducer() = default; + + // Basic data type constructor + RAJA_HOST_DEVICE Reducer(value_type *target_in) + : m_valop(VOp{}), target(target_in) + { + } + + Reducer(Reducer const &) = default; + Reducer(Reducer &&) = default; + Reducer &operator=(Reducer const &) = default; + Reducer &operator=(Reducer &&) = default; + + // Internal ValOp object that is used within RAJA::forall/launch + VOp m_valop = VOp{}; + + // Points to the user specified result variable + value_type *target = nullptr; + + // combineTarget() performs the final op on the target data and location in + // resolve() + RAJA_HOST_DEVICE void combineTarget(value_type in) + { + value_type temp = op{}(*target, in); + *target = temp; + } + + RAJA_HOST_DEVICE + value_type &getVal() { return m_valop.val; } + +#if defined(RAJA_CUDA_ACTIVE) || defined(RAJA_HIP_ACTIVE) || \ + defined(RAJA_SYCL_ACTIVE) + // Device related attributes. + value_type *devicetarget = nullptr; + RAJA::detail::SoAPtr device_mem; + unsigned int *device_count = nullptr; #endif - // These are types and parameters extracted from this struct, and given to the forall. - using ARG_TUP_T = camp::tuple; - RAJA_HOST_DEVICE ARG_TUP_T get_lambda_arg_tup() { return camp::make_tuple(&m_valop); } - - using ARG_LIST_T = typename ARG_TUP_T::TList; - static constexpr size_t num_lambda_args = camp::tuple_size::value ; - }; - - // Partial specialization of Reducer for ValLoc - // T is a deduced basic data type - // I is a deduced index type - template class Op> - struct Reducer, ValLoc, ValLoc>, ValLoc, ValOp, Op>> : public ForallParamBase { - using target_value_type = T; - using target_index_type = I; - using value_type = ValLoc; - using op = Op; - using VOp = ValOp, Op>; - - Reducer() = default; - - // ValLoc constructor - // Note that the target_ variables point to the val and loc within the user defined target ValLoc - RAJA_HOST_DEVICE Reducer(value_type *target_in) : m_valop(VOp{}), target_value(&target_in->val), target_index(&target_in->loc) {} - - // Dual input constructor for ReduceLoc<>(data, index) case - // The target_ variables point to vars defined by the user - RAJA_HOST_DEVICE Reducer(target_value_type *data_in, target_index_type *index_in) : m_valop(VOp{}), target_value(data_in), target_index(index_in) {} - - Reducer(Reducer const &) = default; - Reducer(Reducer &&) = default; - Reducer& operator=(Reducer const &) = default; - Reducer& operator=(Reducer &&) = default; - - // The ValLoc within m_valop is initialized with data and location values from either a ValLoc, or dual data and location values, passed into the constructor - VOp m_valop = VOp{}; - - // Points to either dual value and index defined by the user, or value and index within a ValLoc defined by the user - target_value_type *target_value = nullptr; - target_index_type *target_index = nullptr; - - // combineTarget() performs the final op on the target data and location in resolve() - RAJA_HOST_DEVICE void combineTarget(value_type in) - { - // Create a different temp ValLoc solely for combining - value_type temp(*target_value, *target_index); - temp = op{}(temp, in); - *target_value = temp.val; - *target_index = temp.loc; - } - - RAJA_HOST_DEVICE - value_type & - getVal() { return m_valop.val; } - -#if defined(RAJA_CUDA_ACTIVE) || defined(RAJA_HIP_ACTIVE) || defined(RAJA_SYCL_ACTIVE) - // Device related attributes. - value_type * devicetarget = nullptr; - RAJA::detail::SoAPtr device_mem; - unsigned int * device_count = nullptr; + // These are types and parameters extracted from this struct, and given to the + // forall. + using ARG_TUP_T = camp::tuple; + RAJA_HOST_DEVICE ARG_TUP_T get_lambda_arg_tup() + { + return camp::make_tuple(&m_valop); + } + + using ARG_LIST_T = typename ARG_TUP_T::TList; + static constexpr size_t num_lambda_args = camp::tuple_size::value; +}; + +// Partial specialization of Reducer for ValLoc +// T is a deduced basic data type +// I is a deduced index type +template + class Op> +struct Reducer, ValLoc, ValLoc>, + ValLoc, + ValOp, Op>> : public ForallParamBase { + using target_value_type = T; + using target_index_type = I; + using value_type = ValLoc; + using op = Op; + using VOp = ValOp, Op>; + + Reducer() = default; + + // ValLoc constructor + // Note that the target_ variables point to the val and loc within the user + // defined target ValLoc + RAJA_HOST_DEVICE Reducer(value_type *target_in) + : m_valop(VOp{}), + target_value(&target_in->val), + target_index(&target_in->loc) + { + } + + // Dual input constructor for ReduceLoc<>(data, index) case + // The target_ variables point to vars defined by the user + RAJA_HOST_DEVICE Reducer(target_value_type *data_in, + target_index_type *index_in) + : m_valop(VOp{}), target_value(data_in), target_index(index_in) + { + } + + Reducer(Reducer const &) = default; + Reducer(Reducer &&) = default; + Reducer &operator=(Reducer const &) = default; + Reducer &operator=(Reducer &&) = default; + + // The ValLoc within m_valop is initialized with data and location values from + // either a ValLoc, or dual data and location values, passed into the + // constructor + VOp m_valop = VOp{}; + + // Points to either dual value and index defined by the user, or value and + // index within a ValLoc defined by the user + target_value_type *target_value = nullptr; + target_index_type *target_index = nullptr; + + // combineTarget() performs the final op on the target data and location in + // resolve() + RAJA_HOST_DEVICE void combineTarget(value_type in) + { + // Create a different temp ValLoc solely for combining + value_type temp(*target_value, *target_index); + temp = op{}(temp, in); + *target_value = temp.val; + *target_index = temp.loc; + } + + RAJA_HOST_DEVICE + value_type &getVal() { return m_valop.val; } + +#if defined(RAJA_CUDA_ACTIVE) || defined(RAJA_HIP_ACTIVE) || \ + defined(RAJA_SYCL_ACTIVE) + // Device related attributes. + value_type *devicetarget = nullptr; + RAJA::detail::SoAPtr device_mem; + unsigned int *device_count = nullptr; #endif - // These are types and parameters extracted from this struct, and given to the forall. - using ARG_TUP_T = camp::tuple; - RAJA_HOST_DEVICE ARG_TUP_T get_lambda_arg_tup() { return camp::make_tuple(&m_valop); } + // These are types and parameters extracted from this struct, and given to the + // forall. + using ARG_TUP_T = camp::tuple; + RAJA_HOST_DEVICE ARG_TUP_T get_lambda_arg_tup() + { + return camp::make_tuple(&m_valop); + } - using ARG_LIST_T = typename ARG_TUP_T::TList; - static constexpr size_t num_lambda_args = camp::tuple_size::value ; - }; + using ARG_LIST_T = typename ARG_TUP_T::TList; + static constexpr size_t num_lambda_args = camp::tuple_size::value; +}; -} // namespace detail +} // namespace detail // Standard use case. template