diff --git a/include/ddc/for_each.hpp b/include/ddc/for_each.hpp index 4856a3888..c6fc358f7 100644 --- a/include/ddc/for_each.hpp +++ b/include/ddc/for_each.hpp @@ -33,6 +33,23 @@ void for_each_serial( } } +template +KOKKOS_FUNCTION void annotated_for_each_serial( + std::array const& begin, + std::array const& end, + Functor const& f, + Is const&... is) noexcept +{ + static constexpr std::size_t I = sizeof...(Is); + if constexpr (I == N) { + f(RetType(is...)); + } else { + for (Element ii = begin[I]; ii < end[I]; ++ii) { + annotated_for_each_serial(begin, end, f, is..., ii); + } + } +} + } // namespace detail /** iterates over a nD domain in serial @@ -49,4 +66,21 @@ void for_each(DiscreteDomain const& domain, Functor&& f) noexcept detail::for_each_serial>(begin, end, std::forward(f)); } +/** iterates over a nD domain in serial. Can be called from a device kernel. + * @param[in] domain the domain over which to iterate + * @param[in] f a functor taking an index as parameter + */ +template +KOKKOS_FUNCTION void annotated_for_each( + DiscreteDomain const& domain, + Functor&& f) noexcept +{ + DiscreteElement const ddc_begin = domain.front(); + DiscreteElement const ddc_end = domain.front() + domain.extents(); + std::array const begin = detail::array(ddc_begin); + std::array const end = detail::array(ddc_end); + detail::annotated_for_each_serial< + DiscreteElement>(begin, end, std::forward(f)); +} + } // namespace ddc diff --git a/include/ddc/transform_reduce.hpp b/include/ddc/transform_reduce.hpp index ac0747cf9..ab494a66c 100644 --- a/include/ddc/transform_reduce.hpp +++ b/include/ddc/transform_reduce.hpp @@ -52,6 +52,50 @@ T transform_reduce_serial( DDC_IF_NVCC_THEN_POP } +/** A serial reduction over a nD domain. Can be called from a device kernel. + * @param[in] domain the range over which to apply the algorithm + * @param[in] neutral the neutral element of the reduction operation + * @param[in] reduce a binary FunctionObject that will be applied in unspecified order to the + * results of transform, the results of other reduce and neutral. + * @param[in] transform a unary FunctionObject that will be applied to each element of the input + * range. The return type must be acceptable as input to reduce + * @param[in] dcoords discrete elements from dimensions already in a loop + */ +template < + class... DDims, + class T, + class BinaryReductionOp, + class UnaryTransformOp, + class... DCoords> +KOKKOS_FUNCTION T annotated_transform_reduce_serial( + DiscreteDomain const& domain, + [[maybe_unused]] T const neutral, + BinaryReductionOp const& reduce, + UnaryTransformOp const& transform, + DCoords const&... dcoords) noexcept +{ + DDC_IF_NVCC_THEN_PUSH_AND_SUPPRESS(implicit_return_from_non_void_function) + if constexpr (sizeof...(DCoords) == sizeof...(DDims)) { + return transform(DiscreteElement(dcoords...)); + } else { + using CurrentDDim = type_seq_element_t>; + T result = neutral; + for (DiscreteElement const ii : select(domain)) { + result = reduce( + result, + annotated_transform_reduce_serial( + domain, + neutral, + reduce, + transform, + dcoords..., + ii)); + } + return result; + } + DDC_IF_NVCC_THEN_POP +} + } // namespace detail /** A reduction over a nD domain in serial @@ -76,4 +120,26 @@ T transform_reduce( std::forward(transform)); } +/** A reduction over a nD domain in serial. Can be called from a device kernel. + * @param[in] domain the range over which to apply the algorithm + * @param[in] neutral the neutral element of the reduction operation + * @param[in] reduce a binary FunctionObject that will be applied in unspecified order to the + * results of transform, the results of other reduce and neutral. + * @param[in] transform a unary FunctionObject that will be applied to each element of the input + * range. The return type must be acceptable as input to reduce + */ +template +KOKKOS_FUNCTION T annotated_transform_reduce( + DiscreteDomain const& domain, + T neutral, + BinaryReductionOp&& reduce, + UnaryTransformOp&& transform) noexcept +{ + return detail::annotated_transform_reduce_serial( + domain, + neutral, + std::forward(reduce), + std::forward(transform)); +} + } // namespace ddc diff --git a/tests/for_each.cpp b/tests/for_each.cpp index 6c6555731..a9a2e7d16 100644 --- a/tests/for_each.cpp +++ b/tests/for_each.cpp @@ -10,6 +10,8 @@ #include +#include + namespace DDC_HIP_5_7_ANONYMOUS_NAMESPACE_WORKAROUND(FOR_EACH_CPP) { using DElem0D = ddc::DiscreteElement<>; @@ -81,3 +83,71 @@ TEST(ForEachSerialHost, TwoDimensions) ddc::for_each(dom, [=](DElemXY const ixy) { view(ixy) += 1; }); EXPECT_EQ(std::count(storage.begin(), storage.end(), 1), dom.size()); } + +void TestAnnotatedForEachSerialDevice1D(ddc::ChunkSpan< + int, + DDomX, + Kokkos::layout_right, + typename Kokkos::DefaultExecutionSpace::memory_space> view) +{ + ddc::parallel_for_each( + Kokkos::DefaultExecutionSpace(), + DDom0D(), + KOKKOS_LAMBDA(DElem0D) { + ddc::annotated_for_each(view.domain(), [=](DElemX const ix) { view(ix) = 1; }); + }); +} + +TEST(AnnotatedForEachSerialDevice, OneDimension) +{ + DDomX const dom(lbound_x, nelems_x); + Kokkos::View const + storage("", dom.size()); + ddc::ChunkSpan< + int, + DDomX, + Kokkos::layout_right, + typename Kokkos::DefaultExecutionSpace::memory_space> const view(storage.data(), dom); + TestAnnotatedForEachSerialDevice1D(view); + EXPECT_EQ( + Kokkos::Experimental:: + count(Kokkos::DefaultExecutionSpace(), + Kokkos::Experimental::begin(storage), + Kokkos::Experimental::end(storage), + 1), + dom.size()); +} + +void TestAnnotatedForEachSerialDevice2D(ddc::ChunkSpan< + int, + DDomXY, + Kokkos::layout_right, + typename Kokkos::DefaultExecutionSpace::memory_space> view) +{ + ddc::parallel_for_each( + Kokkos::DefaultExecutionSpace(), + DDom0D(), + KOKKOS_LAMBDA(DElem0D) { + ddc::annotated_for_each(view.domain(), [=](DElemXY const ixy) { view(ixy) = 1; }); + }); +} + +TEST(AnnotatedForEachSerialDevice, TwoDimensions) +{ + DDomXY const dom(lbound_x_y, nelems_x_y); + Kokkos::View const + storage("", dom.size()); + ddc::ChunkSpan< + int, + DDomXY, + Kokkos::layout_right, + typename Kokkos::DefaultExecutionSpace::memory_space> const view(storage.data(), dom); + TestAnnotatedForEachSerialDevice2D(view); + EXPECT_EQ( + Kokkos::Experimental:: + count(Kokkos::DefaultExecutionSpace(), + Kokkos::Experimental::begin(storage), + Kokkos::Experimental::end(storage), + 1), + dom.size()); +} diff --git a/tests/transform_reduce.cpp b/tests/transform_reduce.cpp index f16d76f04..649fc1b33 100644 --- a/tests/transform_reduce.cpp +++ b/tests/transform_reduce.cpp @@ -8,6 +8,8 @@ #include +#include + using DElem0D = ddc::DiscreteElement<>; using DVect0D = ddc::DiscreteVector<>; using DDom0D = ddc::DiscreteDomain<>; @@ -74,3 +76,39 @@ TEST(TransformReduce, TwoDimensions) ddc::transform_reduce(dom, 0, ddc::reducer::sum(), chunk), dom.size() * (dom.size() - 1) / 2); } + +int TestAnnotatedTransformReduce(ddc::ChunkSpan< + int, + DDomXY, + Kokkos::layout_right, + typename Kokkos::DefaultExecutionSpace::memory_space> chunk) +{ + Kokkos::View const count(""); + ddc::parallel_for_each( + Kokkos::DefaultExecutionSpace(), + DDom0D(), + KOKKOS_LAMBDA(DElem0D) { + count() = ddc::annotated_transform_reduce( + chunk.domain(), + 0, + ddc::reducer::sum(), + chunk); + }); + Kokkos::View const count_host + = Kokkos::create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace(), count); + return count_host(); +} + +TEST(AnnotatedTransformReduce, TwoDimensions) +{ + DDomXY const dom(lbound_x_y, nelems_x_y); + Kokkos::View const + storage("", dom.size()); + Kokkos::Experimental::fill(Kokkos::DefaultExecutionSpace(), storage, 1); + ddc::ChunkSpan< + int, + DDomXY, + Kokkos::layout_right, + typename Kokkos::DefaultExecutionSpace::memory_space> const chunk(storage.data(), dom); + EXPECT_EQ(TestAnnotatedTransformReduce(chunk), dom.size()); +}