From 022fb8d352af03fcd69aab5b8096c93344d036ec Mon Sep 17 00:00:00 2001 From: Christian Trott Date: Tue, 30 Apr 2024 08:28:41 -0600 Subject: [PATCH 01/13] Fix layout_padded required_span_size, add submdspan friend declaration --- include/experimental/__p2642_bits/layout_padded.hpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/include/experimental/__p2642_bits/layout_padded.hpp b/include/experimental/__p2642_bits/layout_padded.hpp index 1b02233f..3da81d20 100644 --- a/include/experimental/__p2642_bits/layout_padded.hpp +++ b/include/experimental/__p2642_bits/layout_padded.hpp @@ -388,7 +388,7 @@ class layout_left_padded::mapping { for (rank_type r = 1; r < extents_type::rank(); ++r) { value *= exts.extent(r); } - return value; + return value - (padded_stride.value(0) - exts.extent(0)); } } @@ -494,6 +494,17 @@ class layout_left_padded::mapping { return !(left == right); } #endif + + // [mdspan.submdspan.mapping], submdspan mapping specialization + template + constexpr auto submdspan_mapping_impl( + SliceSpecifiers... slices) const; + + template + friend constexpr auto submdspan_mapping( + const mapping& src, SliceSpecifiers... slices) { + return src.submdspan_mapping_impl(slices...); + } }; template From 3a28cd913743f4530a7f2c3628422a4bd6a5823e Mon Sep 17 00:00:00 2001 From: Christian Trott Date: Tue, 30 Apr 2024 10:15:46 -0600 Subject: [PATCH 02/13] Store intermediate --- examples/CMakeLists.txt | 1 + .../__p2630_bits/submdspan_mapping.hpp | 72 ++++++++++++++++--- tests/test_layout_padded_left.cpp | 1 + tests/test_submdspan.cpp | 3 +- 4 files changed, 68 insertions(+), 9 deletions(-) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 32981b6c..e97e9dc5 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -16,3 +16,4 @@ add_subdirectory(dot_product) add_subdirectory(tiled_layout) add_subdirectory(restrict_accessor) add_subdirectory(aligned_accessor) +add_subdirectory(submdspan) diff --git a/include/experimental/__p2630_bits/submdspan_mapping.hpp b/include/experimental/__p2630_bits/submdspan_mapping.hpp index 543a0919..56779318 100644 --- a/include/experimental/__p2630_bits/submdspan_mapping.hpp +++ b/include/experimental/__p2630_bits/submdspan_mapping.hpp @@ -31,6 +31,9 @@ template struct submdspan_mapping_result { }; namespace detail { +using detail::first_of; +using detail::stride_of; +using detail::inv_map_rank; // We use const Slice& and not Slice&& because the various // submdspan_mapping_impl overloads use their slices arguments @@ -93,12 +96,33 @@ construct_sub_strides(const SrcMapping &src_mapping, namespace detail { // Figure out whether to preserve layout_left -template -struct preserve_layout_left_mapping; +template +struct deduce_layout_left_submapping; + +template +struct deduce_layout_left_submapping< + IndexType, std::index_sequence, SubRank, SliceSpecifiers...> { + + constexpr static int NumGaps = + (((Idx>0 && std::is_convertible_v)?1:0) + ... + 0); + constexpr static bool value = + // Preserve layout for rank 0 + (SubRank == 0) || + ( + // Slice specifiers up to subrank need to be full_extent_t - except + // for the last one which could also be tuple but not a strided index + // range slice specifiers after subrank are integrals + ((Idx > SubRank - 1) || // these are only integral slice specifiers + (std::is_same_v) || + ((Idx == SubRank - 1) && + std::is_convertible_v>)) && + ...); +}; template -struct preserve_layout_left_mapping, SubRank, +struct preserve_layout_left_padded_mapping, SubRank, SliceSpecifiers...> { + constexpr static bool value = // Preserve layout for rank 0 (SubRank == 0) || @@ -106,7 +130,7 @@ struct preserve_layout_left_mapping, SubRank, // Slice specifiers up to subrank need to be full_extent_t - except // for the last one which could also be tuple but not a strided index // range slice specifiers after subrank are integrals - ((Idx > SubRank - 1) || // these are only integral slice specifiers + ((Idx > SubRank - 1 + NumGaps) || // these are only integral slice specifiers (std::is_same_v) || ((Idx == SubRank - 1) && std::is_convertible_v>)) && @@ -229,6 +253,31 @@ struct preserve_layout_right_mapping, SubRank, std::is_convertible_v>)) && ...); }; +/* +template +struct preserve_layout_right_padded_mapping; + +template +struct preserve_layout_right_padded_mapping, SubRank, + SliceSpecifiers...> { + constexpr static size_t SrcRank = sizeof...(SliceSpecifiers); + constexpr static bool value = + // Preserve layout for rank 0 + (SubRank == 0) || + ( + // The last subrank slice specifiers need to be full_extent_t - except + // for the srcrank-subrank one which could also be tuple but not a + // strided index range slice specifiers before srcrank-subrank are + // integrals + ((Idx < SrcRank - SubRank) || // these are only integral slice specifiers + (std::is_same_v) || + ((Idx == SrcRank - 1) && std::is_convertible_v>) + ((Idx == SrcRank - SubRank) && + std::is_convertible_v>)) && + ...); +}; +*/ + } // namespace detail // Suppress spurious warning with NVCC about no return statement. @@ -250,11 +299,12 @@ struct preserve_layout_right_mapping, SubRank, #pragma diagnostic push #pragma diag_suppress = implicit_return_from_non_void_function #endif +template template template MDSPAN_INLINE_FUNCTION constexpr auto -layout_right::mapping::submdspan_mapping_impl( +layout_right_padded::mapping::submdspan_mapping_impl( SliceSpecifiers... slices) const { // get sub extents using src_ext_t = Extents; @@ -262,11 +312,11 @@ layout_right::mapping::submdspan_mapping_impl( using dst_ext_t = decltype(dst_ext); // determine new layout type - constexpr bool preserve_layout = detail::preserve_layout_right_mapping< + constexpr bool preserve_layout = detail::preserve_layout_right_padded_mapping< decltype(std::make_index_sequence()), dst_ext_t::rank(), SliceSpecifiers...>::value; using dst_layout_t = - std::conditional_t; + std::conditional_t; using dst_mapping_t = typename dst_layout_t::template mapping; // Figure out if any slice's lower bound equals the corresponding extent. @@ -281,7 +331,13 @@ layout_right::mapping::submdspan_mapping_impl( if constexpr (std::is_same_v) { // layout_right case - return submdspan_mapping_result{dst_mapping_t(dst_ext), offset}; + return submdspan_mapping_result{ + dst_mapping_t(dst_ext), + static_cast(this->operator()(detail::first_of(slices)...))}; + } else if constexpr (std::is_same_v) { + return submdspan_mapping_result{ + dst_mapping_t(dst_ext, extent(src_ext_t::rank()-1)), + static_cast(this->operator()(detail::first_of(slices)...))}; } else { // layout_stride case auto inv_map = detail::inv_map_rank( diff --git a/tests/test_layout_padded_left.cpp b/tests/test_layout_padded_left.cpp index 2e7fe546..15f072ba 100644 --- a/tests/test_layout_padded_left.cpp +++ b/tests/test_layout_padded_left.cpp @@ -482,3 +482,4 @@ TEST(LayoutRightTests, access) { Kokkos::extents>({}, 4); ASSERT_EQ(mapping6(), 0); } + diff --git a/tests/test_submdspan.cpp b/tests/test_submdspan.cpp index c565056c..49d03501 100644 --- a/tests/test_submdspan.cpp +++ b/tests/test_submdspan.cpp @@ -145,10 +145,11 @@ using submdspan_test_types = , std::tuple, args_t<6,4,5,6,7,8>, Kokkos::extents, int, int, int, std::pair, Kokkos::full_extent_t, Kokkos::full_extent_t> , std::tuple, args_t<6,4,5,6,7,8>, Kokkos::extents, int, int, int, int, std::pair, Kokkos::full_extent_t> , std::tuple, args_t<6,4,5,6,7,8>, Kokkos::extents, int, int, int, int, int, Kokkos::full_extent_t> + // LayoutLeft to layout_left_padded + , std::tuple, Kokkos::dextents, args_t<10,20>, Kokkos::dextents, std::pair, Kokkos::full_extent_t> // LayoutLeft to LayoutStride , std::tuple, args_t<10>, Kokkos::dextents, Kokkos::strided_slice> , std::tuple, args_t<10,20>, Kokkos::dextents, Kokkos::strided_slice, int> - , std::tuple, args_t<10,20>, Kokkos::dextents, std::pair, Kokkos::full_extent_t> , std::tuple, args_t<10,20>, Kokkos::dextents, std::pair, Kokkos::strided_slice> , std::tuple, args_t<10,20>, Kokkos::dextents, Kokkos::strided_slice, std::pair> , std::tuple, args_t<10,20>, Kokkos::dextents, Kokkos::strided_slice, Kokkos::strided_slice> From 36c3a73908002f9c9d7f0c4c1a42f70cdf3abb54 Mon Sep 17 00:00:00 2001 From: Christian Trott Date: Thu, 2 May 2024 14:20:24 -0600 Subject: [PATCH 03/13] SOmething working again --- .../__p2630_bits/submdspan_mapping.hpp | 90 +++++++++++-------- 1 file changed, 53 insertions(+), 37 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan_mapping.hpp b/include/experimental/__p2630_bits/submdspan_mapping.hpp index 56779318..b3f3c248 100644 --- a/include/experimental/__p2630_bits/submdspan_mapping.hpp +++ b/include/experimental/__p2630_bits/submdspan_mapping.hpp @@ -96,46 +96,53 @@ construct_sub_strides(const SrcMapping &src_mapping, namespace detail { // Figure out whether to preserve layout_left -template +template struct deduce_layout_left_submapping; -template +template struct deduce_layout_left_submapping< - IndexType, std::index_sequence, SubRank, SliceSpecifiers...> { + IndexType, SubRank, std::index_sequence, + SliceSpecifiers...> { + + using CountRange = + index_sequence_scan_impl<0,(std::is_convertible_v?0:1)...>; + //__static_partial_sums...>; constexpr static int NumGaps = - (((Idx>0 && std::is_convertible_v)?1:0) + ... + 0); - constexpr static bool value = - // Preserve layout for rank 0 - (SubRank == 0) || - ( - // Slice specifiers up to subrank need to be full_extent_t - except - // for the last one which could also be tuple but not a strided index - // range slice specifiers after subrank are integrals - ((Idx > SubRank - 1) || // these are only integral slice specifiers - (std::is_same_v) || - ((Idx == SubRank - 1) && - std::is_convertible_v>)) && - ...); -}; + (((Idx>0 && CountRange::get(Idx)==1 && std::is_convertible_v)?1:0) + ... + 0); -template -struct preserve_layout_left_padded_mapping, SubRank, - SliceSpecifiers...> { + constexpr static bool layout_left_value = + // Use layout_left for rank 0 + (SubRank == 0) || + // Use layout_left for rank 1 if leftmost slice specifier is range like + ((SubRank == 1) && ((Idx==0 || CountRange::get(Idx)==1) && ...)) || + // Preserve if leftmost SubRank-1 slices are full_extent_t and + // the leftmost SubRank slices are ranges + (((Idx < SubRank - 1 && std::is_same_v) || + (Idx == SubRank -1) || // technically SliceSpecifier needs to be a range, but its implied by the last condition + (Idx >= SubRank && CountRange::get(Idx) == SubRank)) && ...); - constexpr static bool value = - // Preserve layout for rank 0 + constexpr static bool layout_left_padded_value = + // Use layout_left_padded for rank 0 (SubRank == 0) || + // Use layout_left_padded for rank 1 if leftmost slice specifier is range like + ((SubRank == 1) && ((Idx==0 || CountRange::get(Idx)==1) && ...)) || + // layout_left_padded case for SubRank > 1 ( - // Slice specifiers up to subrank need to be full_extent_t - except - // for the last one which could also be tuple but not a strided index - // range slice specifiers after subrank are integrals - ((Idx > SubRank - 1 + NumGaps) || // these are only integral slice specifiers - (std::is_same_v) || - ((Idx == SubRank - 1) && - std::is_convertible_v>)) && - ...); + // leftmost must be range + ( + (Idx == 0) || + (Idx > 0 && Idx <= NumGaps && CountRange::get(Idx) == 1) || + (Idx > NumGaps && Idx < NumGaps+SubRank-1 && std::is_same_v) || + (Idx == NumGaps + SubRank -1 && + (std::is_same_v || + std::is_convertible_v>)) || + (Idx >= NumGaps + SubRank && CountRange::get(Idx) == SubRank) + ) && ... ); }; + } // namespace detail // Suppress spurious warning with NVCC about no return statement. @@ -170,11 +177,14 @@ layout_left::mapping::submdspan_mapping_impl(SliceSpecifiers... slices) using dst_ext_t = decltype(dst_ext); // figure out sub layout type - constexpr bool preserve_layout = detail::preserve_layout_left_mapping< - decltype(std::make_index_sequence()), dst_ext_t::rank(), - SliceSpecifiers...>::value; + using deduce_layout = detail::deduce_layout_left_submapping< + typename dst_ext_t::index_type, dst_ext_t::rank(), decltype(std::make_index_sequence()), + SliceSpecifiers...>; + using dst_layout_t = - std::conditional_t; + std::conditional_t, layout_stride>>; using dst_mapping_t = typename dst_layout_t::template mapping; // Figure out if any slice's lower bound equals the corresponding extent. @@ -189,7 +199,13 @@ layout_left::mapping::submdspan_mapping_impl(SliceSpecifiers... slices) if constexpr (std::is_same_v) { // layout_left case - return submdspan_mapping_result{dst_mapping_t(dst_ext), offset}; + return submdspan_mapping_result{ + dst_mapping_t(dst_ext), + static_cast(this->operator()(detail::first_of(slices)...))}; + } else if constexpr (std::is_same_v>) { + return submdspan_mapping_result{ + dst_mapping_t(dst_ext, stride(1)), + static_cast(this->operator()(detail::first_of(slices)...))}; } else { // layout_stride case auto inv_map = detail::inv_map_rank( @@ -279,7 +295,7 @@ struct preserve_layout_right_padded_mapping, SubRank */ } // namespace detail - +/* // Suppress spurious warning with NVCC about no return statement. // This is a known issue in NVCC and NVC++ // Depending on the CUDA and GCC version we need both the builtin @@ -372,7 +388,7 @@ layout_right_padded::mapping::submdspan_mapping_impl( #elif defined __NVCOMPILER #pragma diagnostic pop #endif - +*/ //********************************** // layout_stride submdspan_mapping //********************************* From 516f4f047059f3fdc98fd6c4d4a29cccb06c5b6c Mon Sep 17 00:00:00 2001 From: Christian Trott Date: Thu, 30 May 2024 10:35:08 -0600 Subject: [PATCH 04/13] Working on layout_right -> layout_right_padded --- .../__p2630_bits/submdspan_mapping.hpp | 160 +++++++++++++++++- 1 file changed, 156 insertions(+), 4 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan_mapping.hpp b/include/experimental/__p2630_bits/submdspan_mapping.hpp index b3f3c248..de6f90c6 100644 --- a/include/experimental/__p2630_bits/submdspan_mapping.hpp +++ b/include/experimental/__p2630_bits/submdspan_mapping.hpp @@ -75,7 +75,7 @@ any_slice_out_of_bounds(const extents& exts, std::make_index_sequence(), exts, slices...); } - + // constructs sub strides template MDSPAN_INLINE_FUNCTION @@ -184,7 +184,7 @@ layout_left::mapping::submdspan_mapping_impl(SliceSpecifiers... slices) using dst_layout_t = std::conditional_t, layout_stride>>; + MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_left_padded, layout_stride>>; using dst_mapping_t = typename dst_layout_t::template mapping; // Figure out if any slice's lower bound equals the corresponding extent. @@ -202,9 +202,9 @@ layout_left::mapping::submdspan_mapping_impl(SliceSpecifiers... slices) return submdspan_mapping_result{ dst_mapping_t(dst_ext), static_cast(this->operator()(detail::first_of(slices)...))}; - } else if constexpr (std::is_same_v>) { + } else if constexpr (std::is_same_v>) { return submdspan_mapping_result{ - dst_mapping_t(dst_ext, stride(1)), + dst_mapping_t(dst_ext, stride(1+deduce_layout::NumGaps)), static_cast(this->operator()(detail::first_of(slices)...))}; } else { // layout_stride case @@ -246,6 +246,156 @@ layout_left::mapping::submdspan_mapping_impl(SliceSpecifiers... slices) //********************************* namespace detail { +// Figure out whether to preserve layout_right +template +struct deduce_layout_right_submapping; + +template +struct deduce_layout_right_submapping< + IndexType, SubRank, std::index_sequence, + SliceSpecifiers...> { + + static constexpr size_t Rank = sizeof...(Idx); + using CountRange = + index_sequence_scan_impl<0,(std::is_convertible_v?0:1)...>; + //__static_partial_sums...>; + constexpr static int NumGaps = + (((Idx)?1:0) + ... + 0); + + constexpr static bool layout_right_value = + // Use layout_right for rank 0 + (SubRank == 0) || + // Use layout_right for rank 1 if rightmost slice specifier is range like + ((SubRank == 1) && ((CountRange::get(Idx)==0) && ...)) || + // Preserve if rightmost SubRank-1 slices are full_extent_t and + // the rightmost SubRank slices are ranges + (((Idx >= Rank - SubRank && std::is_same_v) || + (Idx <= Rank - SubRank && CountRange::get(Idx) == 0)) && ...); + + constexpr static bool layout_right_padded_value = ( + // Use layout_right_padded for rank 0 + (SubRank == 0) || + // Use layout_right_padded for rank 1 if rightmost slice specifier is range like + ((SubRank == 1) && ((CountRange::get(Idx)==0) && ...)) || + // layout_right_padded case for SubRank > 1 + ( + // rightmost must be range + ( + (Idx == 0) || + (Idx < Rank - NumGaps - SubRank && CountRange::get(Idx) == 0) || + (Idx > Rank - NumGaps - SubRank && Idx < Rank - NumGaps - 1 && std::is_same_v) || + (Idx == Rank - NumGaps - SubRank && + (std::is_same_v || + std::is_convertible_v>)) || + (Idx >= Rank - NumGaps - 1 && CountRange::get(Idx) == SubRank - 1) + ) && ... )); +}; + +} // namespace detail + +// Suppress spurious warning with NVCC about no return statement. +// This is a known issue in NVCC and NVC++ +// Depending on the CUDA and GCC version we need both the builtin +// and the diagnostic push. I tried really hard to find something shorter +// but no luck ... +#if defined __NVCC__ + #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ + #pragma nv_diagnostic push + #pragma nv_diag_suppress = implicit_return_from_non_void_function + #else + #ifdef __CUDA_ARCH__ + #pragma diagnostic push + #pragma diag_suppress implicit_return_from_non_void_function + #endif + #endif +#elif defined __NVCOMPILER + #pragma diagnostic push + #pragma diag_suppress = implicit_return_from_non_void_function +#endif +// Actual submdspan mapping call +template +template +MDSPAN_INLINE_FUNCTION +constexpr auto +layout_right::mapping::submdspan_mapping_impl(SliceSpecifiers... slices) const { + + // compute sub extents + using src_ext_t = Extents; + auto dst_ext = submdspan_extents(extents(), slices...); + using dst_ext_t = decltype(dst_ext); + + // figure out sub layout type + using deduce_layout = detail::deduce_layout_right_submapping< + typename dst_ext_t::index_type, dst_ext_t::rank(), decltype(std::make_index_sequence()), + SliceSpecifiers...>; + + using dst_layout_t = + std::conditional_t, layout_stride>>; + using dst_mapping_t = typename dst_layout_t::template mapping; + + // Figure out if any slice's lower bound equals the corresponding extent. + // If so, bypass evaluating the layout mapping. This fixes LWG Issue 4060. + const bool out_of_bounds = + detail::any_slice_out_of_bounds(this->extents(), slices...); + auto offset = static_cast( + out_of_bounds ? + this->required_span_size() : + this->operator()(detail::first_of(slices)...) + ); + + if constexpr (std::is_same_v) { + // layout_right case + return submdspan_mapping_result{ + dst_mapping_t(dst_ext), + static_cast(this->operator()(detail::first_of(slices)...))}; + } else if constexpr (std::is_same_v>) { + return submdspan_mapping_result{ + dst_mapping_t(dst_ext, stride(src_ext_t::rank() - 2 - deduce_layout::NumGaps)), + static_cast(this->operator()(detail::first_of(slices)...))}; + } else { + // layout_stride case + auto inv_map = detail::inv_map_rank( + std::integral_constant(), + std::index_sequence<>(), + slices...); + return submdspan_mapping_result{ + dst_mapping_t(dst_ext, detail::construct_sub_strides( + *this, inv_map, + // HIP needs deduction guides to have markups so we need to be explicit + // NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have the issue + // But Clang-CUDA also doesn't accept the use of deduction guide so disable it for CUDA alltogether + #if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA) + std::tuple{detail::stride_of(slices)...})), + #else + std::tuple{detail::stride_of(slices)...})), + #endif + offset}; + } +#if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__) + __builtin_unreachable(); +#endif +} +#if defined __NVCC__ + #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ + #pragma nv_diagnostic pop + #else + #ifdef __CUDA_ARCH__ + #pragma diagnostic pop + #endif + #endif +#elif defined __NVCOMPILER + #pragma diagnostic pop +#endif +#if 0 +//********************************** +// layout_right submdspan_mapping +//********************************* +namespace detail { + // Figure out whether to preserve layout_right template struct preserve_layout_right_mapping; @@ -389,6 +539,8 @@ layout_right_padded::mapping::submdspan_mapping_impl( #pragma diagnostic pop #endif */ +#endif + //********************************** // layout_stride submdspan_mapping //********************************* From 8ee72308e51b862a7b7c2ca6ffe9916fc9c18f27 Mon Sep 17 00:00:00 2001 From: Christian Trott Date: Fri, 31 May 2024 14:01:13 -0600 Subject: [PATCH 05/13] Fix submdspan tests --- .../__p2630_bits/submdspan_mapping.hpp | 194 +++--------------- tests/foo_customizations.hpp | 37 ++-- tests/test_submdspan.cpp | 3 +- tests/test_submdspan_static_slice.cpp | 28 +-- 4 files changed, 63 insertions(+), 199 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan_mapping.hpp b/include/experimental/__p2630_bits/submdspan_mapping.hpp index de6f90c6..489182b1 100644 --- a/include/experimental/__p2630_bits/submdspan_mapping.hpp +++ b/include/experimental/__p2630_bits/submdspan_mapping.hpp @@ -117,23 +117,23 @@ struct deduce_layout_left_submapping< // Use layout_left for rank 0 (SubRank == 0) || // Use layout_left for rank 1 if leftmost slice specifier is range like - ((SubRank == 1) && ((Idx==0 || CountRange::get(Idx)==1) && ...)) || + ((SubRank == 1) && ((Idx > 0 || std::is_same_v || std::is_convertible_v>) && ...)) || // Preserve if leftmost SubRank-1 slices are full_extent_t and // the leftmost SubRank slices are ranges (((Idx < SubRank - 1 && std::is_same_v) || - (Idx == SubRank -1) || // technically SliceSpecifier needs to be a range, but its implied by the last condition + (Idx == SubRank -1 && (std::is_same_v || std::is_convertible_v>)) || (Idx >= SubRank && CountRange::get(Idx) == SubRank)) && ...); constexpr static bool layout_left_padded_value = // Use layout_left_padded for rank 0 (SubRank == 0) || // Use layout_left_padded for rank 1 if leftmost slice specifier is range like - ((SubRank == 1) && ((Idx==0 || CountRange::get(Idx)==1) && ...)) || + ((SubRank == 1) && ((Idx > 0 || std::is_same_v || std::is_convertible_v>) && ...)) || // layout_left_padded case for SubRank > 1 ( // leftmost must be range ( - (Idx == 0) || + (Idx == 0 && (std::is_same_v || std::is_convertible_v>)) || (Idx > 0 && Idx <= NumGaps && CountRange::get(Idx) == 1) || (Idx > NumGaps && Idx < NumGaps+SubRank-1 && std::is_same_v) || (Idx == NumGaps + SubRank -1 && @@ -201,11 +201,11 @@ layout_left::mapping::submdspan_mapping_impl(SliceSpecifiers... slices) // layout_left case return submdspan_mapping_result{ dst_mapping_t(dst_ext), - static_cast(this->operator()(detail::first_of(slices)...))}; + offset}; } else if constexpr (std::is_same_v>) { return submdspan_mapping_result{ dst_mapping_t(dst_ext, stride(1+deduce_layout::NumGaps)), - static_cast(this->operator()(detail::first_of(slices)...))}; + offset}; } else { // layout_stride case auto inv_map = detail::inv_map_rank( @@ -268,28 +268,44 @@ struct deduce_layout_right_submapping< // Use layout_right for rank 0 (SubRank == 0) || // Use layout_right for rank 1 if rightmost slice specifier is range like - ((SubRank == 1) && ((CountRange::get(Idx)==0) && ...)) || + ((SubRank == 1) && + ( + ((Idx < Rank - 1) || + ((Idx == Rank - 1) && (std::is_same_v || + std::is_convertible_v>)) + ) && ... + ) + ) || // Preserve if rightmost SubRank-1 slices are full_extent_t and // the rightmost SubRank slices are ranges (((Idx >= Rank - SubRank && std::is_same_v) || - (Idx <= Rank - SubRank && CountRange::get(Idx) == 0)) && ...); + (Idx == Rank - SubRank && std::is_convertible_v>) || + (Idx < Rank - SubRank && CountRange::get(Idx) == 0)) && ...); constexpr static bool layout_right_padded_value = ( // Use layout_right_padded for rank 0 (SubRank == 0) || // Use layout_right_padded for rank 1 if rightmost slice specifier is range like - ((SubRank == 1) && ((CountRange::get(Idx)==0) && ...)) || + ((SubRank == 1) && + ( + ((Idx < Rank - 1) || + ((Idx == Rank - 1) && (std::is_same_v || + std::is_convertible_v>)) + ) && ... + ) + ) || // layout_right_padded case for SubRank > 1 ( // rightmost must be range ( - (Idx == 0) || (Idx < Rank - NumGaps - SubRank && CountRange::get(Idx) == 0) || - (Idx > Rank - NumGaps - SubRank && Idx < Rank - NumGaps - 1 && std::is_same_v) || (Idx == Rank - NumGaps - SubRank && (std::is_same_v || std::is_convertible_v>)) || - (Idx >= Rank - NumGaps - 1 && CountRange::get(Idx) == SubRank - 1) + (Idx > Rank - NumGaps - SubRank && Idx < Rank - NumGaps - 1 && std::is_same_v) || + (Idx >= Rank - NumGaps - 1 && Idx < Rank -1 && CountRange::get(Idx) == SubRank - 1) || + (Idx == Rank - 1 && (std::is_same_v || + std::is_convertible_v>)) ) && ... )); }; @@ -351,159 +367,11 @@ layout_right::mapping::submdspan_mapping_impl(SliceSpecifiers... slices // layout_right case return submdspan_mapping_result{ dst_mapping_t(dst_ext), - static_cast(this->operator()(detail::first_of(slices)...))}; + offset}; } else if constexpr (std::is_same_v>) { return submdspan_mapping_result{ dst_mapping_t(dst_ext, stride(src_ext_t::rank() - 2 - deduce_layout::NumGaps)), - static_cast(this->operator()(detail::first_of(slices)...))}; - } else { - // layout_stride case - auto inv_map = detail::inv_map_rank( - std::integral_constant(), - std::index_sequence<>(), - slices...); - return submdspan_mapping_result{ - dst_mapping_t(dst_ext, detail::construct_sub_strides( - *this, inv_map, - // HIP needs deduction guides to have markups so we need to be explicit - // NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have the issue - // But Clang-CUDA also doesn't accept the use of deduction guide so disable it for CUDA alltogether - #if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA) - std::tuple{detail::stride_of(slices)...})), - #else - std::tuple{detail::stride_of(slices)...})), - #endif - offset}; - } -#if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__) - __builtin_unreachable(); -#endif -} -#if defined __NVCC__ - #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ - #pragma nv_diagnostic pop - #else - #ifdef __CUDA_ARCH__ - #pragma diagnostic pop - #endif - #endif -#elif defined __NVCOMPILER - #pragma diagnostic pop -#endif -#if 0 -//********************************** -// layout_right submdspan_mapping -//********************************* -namespace detail { - -// Figure out whether to preserve layout_right -template -struct preserve_layout_right_mapping; - -template -struct preserve_layout_right_mapping, SubRank, - SliceSpecifiers...> { - constexpr static size_t SrcRank = sizeof...(SliceSpecifiers); - constexpr static bool value = - // Preserve layout for rank 0 - (SubRank == 0) || - ( - // The last subrank slice specifiers need to be full_extent_t - except - // for the srcrank-subrank one which could also be tuple but not a - // strided index range slice specifiers before srcrank-subrank are - // integrals - ((Idx < - SrcRank - SubRank) || // these are only integral slice specifiers - (std::is_same_v) || - ((Idx == SrcRank - SubRank) && - std::is_convertible_v>)) && - ...); -}; -/* -template -struct preserve_layout_right_padded_mapping; - -template -struct preserve_layout_right_padded_mapping, SubRank, - SliceSpecifiers...> { - constexpr static size_t SrcRank = sizeof...(SliceSpecifiers); - constexpr static bool value = - // Preserve layout for rank 0 - (SubRank == 0) || - ( - // The last subrank slice specifiers need to be full_extent_t - except - // for the srcrank-subrank one which could also be tuple but not a - // strided index range slice specifiers before srcrank-subrank are - // integrals - ((Idx < SrcRank - SubRank) || // these are only integral slice specifiers - (std::is_same_v) || - ((Idx == SrcRank - 1) && std::is_convertible_v>) - ((Idx == SrcRank - SubRank) && - std::is_convertible_v>)) && - ...); -}; -*/ - -} // namespace detail -/* -// Suppress spurious warning with NVCC about no return statement. -// This is a known issue in NVCC and NVC++ -// Depending on the CUDA and GCC version we need both the builtin -// and the diagnostic push. I tried really hard to find something shorter -// but no luck ... -#if defined __NVCC__ - #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ - #pragma nv_diagnostic push - #pragma nv_diag_suppress = implicit_return_from_non_void_function - #else - #ifdef __CUDA_ARCH__ - #pragma diagnostic push - #pragma diag_suppress implicit_return_from_non_void_function - #endif - #endif -#elif defined __NVCOMPILER - #pragma diagnostic push - #pragma diag_suppress = implicit_return_from_non_void_function -#endif -template -template -template -MDSPAN_INLINE_FUNCTION -constexpr auto -layout_right_padded::mapping::submdspan_mapping_impl( - SliceSpecifiers... slices) const { - // get sub extents - using src_ext_t = Extents; - auto dst_ext = submdspan_extents(extents(), slices...); - using dst_ext_t = decltype(dst_ext); - - // determine new layout type - constexpr bool preserve_layout = detail::preserve_layout_right_padded_mapping< - decltype(std::make_index_sequence()), dst_ext_t::rank(), - SliceSpecifiers...>::value; - using dst_layout_t = - std::conditional_t; - using dst_mapping_t = typename dst_layout_t::template mapping; - - // Figure out if any slice's lower bound equals the corresponding extent. - // If so, bypass evaluating the layout mapping. This fixes LWG Issue 4060. - const bool out_of_bounds = - detail::any_slice_out_of_bounds(this->extents(), slices...); - auto offset = static_cast( - out_of_bounds ? - this->required_span_size() : - this->operator()(detail::first_of(slices)...) - ); - - if constexpr (std::is_same_v) { - // layout_right case - return submdspan_mapping_result{ - dst_mapping_t(dst_ext), - static_cast(this->operator()(detail::first_of(slices)...))}; - } else if constexpr (std::is_same_v) { - return submdspan_mapping_result{ - dst_mapping_t(dst_ext, extent(src_ext_t::rank()-1)), - static_cast(this->operator()(detail::first_of(slices)...))}; + offset}; } else { // layout_stride case auto inv_map = detail::inv_map_rank( @@ -538,8 +406,6 @@ layout_right_padded::mapping::submdspan_mapping_impl( #elif defined __NVCOMPILER #pragma diagnostic pop #endif -*/ -#endif //********************************** // layout_stride submdspan_mapping diff --git a/tests/foo_customizations.hpp b/tests/foo_customizations.hpp index e04bb44e..cf37d5e5 100644 --- a/tests/foo_customizations.hpp +++ b/tests/foo_customizations.hpp @@ -225,31 +225,28 @@ class layout_foo::mapping { } #endif + template + friend constexpr auto submdspan_mapping( + const mapping& src, SliceSpecifiers... slices) { + // use the fact that layout_foo is layout_right with rank 1 or rank 2 + // i.e. we don't need to implement everything here, we just reuse submdspan_mapping for layout_right + Kokkos::layout_right::mapping compatible_mapping(src.extents()); + auto sub_right = submdspan_mapping(compatible_mapping, slices...); + if constexpr (std::is_same_v) { + // NVCC does not like deduction here, so get the extents type explicitly + using sub_ext_t = std::remove_const_t>; + auto sub_mapping = layout_foo::mapping(sub_right.mapping.extents()); + return Kokkos::submdspan_mapping_result{sub_mapping, sub_right.offset}; + } else { + return sub_right; + } + } + private: _MDSPAN_NO_UNIQUE_ADDRESS extents_type __extents{}; }; -#if MDSPAN_HAS_CXX_17 -template -MDSPAN_INLINE_FUNCTION -constexpr auto -submdspan_mapping(const layout_foo::mapping &src_mapping, - SliceSpecifiers... slices) { - // use the fact that layout_foo is layout_right with rank 1 or rank 2 - // i.e. we don't need to implement everything here, we just reuse submdspan_mapping for layout_right - Kokkos::layout_right::mapping compatible_mapping(src_mapping.extents()); - auto sub_right = submdspan_mapping(compatible_mapping, slices...); - if constexpr (std::is_same_v) { - // NVCC does not like deduction here, so get the extents type explicitly - using sub_ext_t = std::remove_const_t>; - auto sub_mapping = layout_foo::mapping(sub_right.mapping.extents()); - return Kokkos::submdspan_mapping_result{sub_mapping, sub_right.offset}; - } else { - return sub_right; - } -} -#endif } #endif diff --git a/tests/test_submdspan.cpp b/tests/test_submdspan.cpp index 49d03501..8df5ea64 100644 --- a/tests/test_submdspan.cpp +++ b/tests/test_submdspan.cpp @@ -155,11 +155,12 @@ using submdspan_test_types = , std::tuple, args_t<10,20>, Kokkos::dextents, Kokkos::strided_slice, Kokkos::strided_slice> , std::tuple, args_t<6,4,5,6,7,8>, Kokkos::extents, Kokkos::full_extent_t, int, std::pair, int, int, Kokkos::full_extent_t> , std::tuple, args_t<6,4,5,6,7,8>, Kokkos::extents, int, Kokkos::full_extent_t, std::pair, int, Kokkos::full_extent_t, int> + // layout_right to layout_right_padded + , std::tuple, Kokkos::dextents, args_t<10,20>, Kokkos::dextents, Kokkos::full_extent_t, std::pair> // layout_right to layout_stride , std::tuple, args_t<10>, Kokkos::dextents, Kokkos::strided_slice> , std::tuple, args_t<10>, Kokkos::extents, Kokkos::strided_slice,std::integral_constant>> , std::tuple, args_t<10,20>, Kokkos::dextents, Kokkos::strided_slice, int> - , std::tuple, args_t<10,20>, Kokkos::dextents, Kokkos::full_extent_t, std::pair> , std::tuple, args_t<10,20>, Kokkos::dextents, std::pair, Kokkos::strided_slice> , std::tuple, args_t<10,20>, Kokkos::dextents, Kokkos::strided_slice, std::pair> , std::tuple, args_t<10,20>, Kokkos::dextents, Kokkos::strided_slice, Kokkos::strided_slice> diff --git a/tests/test_submdspan_static_slice.cpp b/tests/test_submdspan_static_slice.cpp index d0567a89..e7427e49 100644 --- a/tests/test_submdspan_static_slice.cpp +++ b/tests/test_submdspan_static_slice.cpp @@ -175,7 +175,7 @@ TEST(TestMdspan, SubmdspanStaticSlice_Left_iddd_FullIndexFull) { { using expected_extents_type = Kokkos::dextents; - using expected_layout_type = Kokkos::layout_stride; + using expected_layout_type = Kokkos::Experimental::layout_left_padded; using expected_output_mdspan_type = Kokkos::mdspan; auto runTest = [&] (auto integralConstant) { @@ -202,7 +202,7 @@ TEST(TestMdspan, SubmdspanStaticSlice_Left_i345_FullIndexFull) { { using expected_extents_type = Kokkos::extents; - using expected_layout_type = Kokkos::layout_stride; + using expected_layout_type = Kokkos::Experimental::layout_left_padded; using expected_output_mdspan_type = Kokkos::mdspan; auto runTest = [&] (auto integralConstant) { @@ -382,7 +382,7 @@ TEST(TestMdspan, SubmdspanStaticSlice_Left_iddd_TupleFullTuple) { { using expected_extents_type = Kokkos::dextents; - using expected_layout_type = Kokkos::layout_stride; + using expected_layout_type = Kokkos::Experimental::layout_left_padded; using expected_output_mdspan_type = Kokkos::mdspan; auto runTest = [&] (auto sliceSpec0, auto sliceSpec1) { @@ -396,7 +396,7 @@ TEST(TestMdspan, SubmdspanStaticSlice_Left_iddd_TupleFullTuple) { } { using expected_extents_type = Kokkos::extents; - using expected_layout_type = Kokkos::layout_stride; + using expected_layout_type = Kokkos::Experimental::layout_left_padded; using expected_output_mdspan_type = Kokkos::mdspan; auto runTest = [&] (auto sliceSpec0, auto sliceSpec1) { @@ -424,7 +424,7 @@ TEST(TestMdspan, SubmdspanStaticSlice_Left_i345_TupleFullTuple) { { using expected_extents_type = Kokkos::extents; - using expected_layout_type = Kokkos::layout_stride; + using expected_layout_type = Kokkos::Experimental::layout_left_padded; using expected_output_mdspan_type = Kokkos::mdspan; auto runTest = [&] (auto sliceSpec0, auto sliceSpec1) { @@ -438,7 +438,7 @@ TEST(TestMdspan, SubmdspanStaticSlice_Left_i345_TupleFullTuple) { } { using expected_extents_type = Kokkos::extents; - using expected_layout_type = Kokkos::layout_stride; + using expected_layout_type = Kokkos::Experimental::layout_left_padded; using expected_output_mdspan_type = Kokkos::mdspan; auto runTest = [&] (auto sliceSpec0, auto sliceSpec1) { @@ -466,7 +466,7 @@ TEST(TestMdspan, SubmdspanStaticSlice_Right_iddd_TupleFullTuple) { { using expected_extents_type = Kokkos::dextents; - using expected_layout_type = Kokkos::layout_stride; + using expected_layout_type = Kokkos::Experimental::layout_right_padded; using expected_output_mdspan_type = Kokkos::mdspan; auto runTest = [&] (auto sliceSpec0, auto sliceSpec1) { @@ -480,7 +480,7 @@ TEST(TestMdspan, SubmdspanStaticSlice_Right_iddd_TupleFullTuple) { } { using expected_extents_type = Kokkos::extents; - using expected_layout_type = Kokkos::layout_stride; + using expected_layout_type = Kokkos::Experimental::layout_right_padded; using expected_output_mdspan_type = Kokkos::mdspan; auto runTest = [&] (auto sliceSpec0, auto sliceSpec1) { @@ -508,7 +508,7 @@ TEST(TestMdspan, SubmdspanStaticSlice_Right_i345_TupleFullTuple) { { using expected_extents_type = Kokkos::extents; - using expected_layout_type = Kokkos::layout_stride; + using expected_layout_type = Kokkos::Experimental::layout_right_padded; using expected_output_mdspan_type = Kokkos::mdspan; auto runTest = [&] (auto sliceSpec0, auto sliceSpec1) { @@ -522,7 +522,7 @@ TEST(TestMdspan, SubmdspanStaticSlice_Right_i345_TupleFullTuple) { } { using expected_extents_type = Kokkos::extents; - using expected_layout_type = Kokkos::layout_stride; + using expected_layout_type = Kokkos::Experimental::layout_right_padded; using expected_output_mdspan_type = Kokkos::mdspan; auto runTest = [&] (auto sliceSpec0, auto sliceSpec1) { @@ -598,7 +598,7 @@ TEST(TestMdspan, SubmdspanStaticSlice_Right_idd_FullTuple) { input_extents_type input_extents{3, 4}; { using expected_extents_type = Kokkos::dextents; - using expected_layout_type = Kokkos::layout_stride; + using expected_layout_type = Kokkos::Experimental::layout_right_padded; using expected_output_mdspan_type = Kokkos::mdspan; auto runTest = [&](auto sliceSpec) { @@ -611,7 +611,7 @@ TEST(TestMdspan, SubmdspanStaticSlice_Right_idd_FullTuple) { } { using expected_extents_type = Kokkos::extents; - using expected_layout_type = Kokkos::layout_stride; + using expected_layout_type = Kokkos::Experimental::layout_right_padded; using expected_output_mdspan_type = Kokkos::mdspan; auto runTest = [&](auto sliceSpec) { @@ -633,7 +633,7 @@ TEST(TestMdspan, SubmdspanStaticSlice_Left_idd_TupleFull) { input_extents_type input_extents{3, 4}; { using expected_extents_type = Kokkos::dextents; - using expected_layout_type = Kokkos::layout_stride; + using expected_layout_type = Kokkos::Experimental::layout_left_padded; using expected_output_mdspan_type = Kokkos::mdspan; auto runTest = [&](auto sliceSpec) { @@ -646,7 +646,7 @@ TEST(TestMdspan, SubmdspanStaticSlice_Left_idd_TupleFull) { } { using expected_extents_type = Kokkos::extents; - using expected_layout_type = Kokkos::layout_stride; + using expected_layout_type = Kokkos::Experimental::layout_left_padded; using expected_output_mdspan_type = Kokkos::mdspan; auto runTest = [&](auto sliceSpec) { From c25618ddbd83404b61d87ac2a37e13c10518fba8 Mon Sep 17 00:00:00 2001 From: Christian Trott Date: Fri, 31 May 2024 14:21:21 -0600 Subject: [PATCH 06/13] Remove unnecessary diffs --- examples/CMakeLists.txt | 1 - include/experimental/__p2630_bits/submdspan_mapping.hpp | 4 ---- include/experimental/__p2642_bits/layout_padded.hpp | 2 +- tests/test_layout_padded_left.cpp | 1 - 4 files changed, 1 insertion(+), 7 deletions(-) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index e97e9dc5..32981b6c 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -16,4 +16,3 @@ add_subdirectory(dot_product) add_subdirectory(tiled_layout) add_subdirectory(restrict_accessor) add_subdirectory(aligned_accessor) -add_subdirectory(submdspan) diff --git a/include/experimental/__p2630_bits/submdspan_mapping.hpp b/include/experimental/__p2630_bits/submdspan_mapping.hpp index 489182b1..2ab1f420 100644 --- a/include/experimental/__p2630_bits/submdspan_mapping.hpp +++ b/include/experimental/__p2630_bits/submdspan_mapping.hpp @@ -31,10 +31,6 @@ template struct submdspan_mapping_result { }; namespace detail { -using detail::first_of; -using detail::stride_of; -using detail::inv_map_rank; - // We use const Slice& and not Slice&& because the various // submdspan_mapping_impl overloads use their slices arguments // multiple times. This makes perfect forwarding not useful, but we diff --git a/include/experimental/__p2642_bits/layout_padded.hpp b/include/experimental/__p2642_bits/layout_padded.hpp index 3da81d20..99e24fa4 100644 --- a/include/experimental/__p2642_bits/layout_padded.hpp +++ b/include/experimental/__p2642_bits/layout_padded.hpp @@ -388,7 +388,7 @@ class layout_left_padded::mapping { for (rank_type r = 1; r < extents_type::rank(); ++r) { value *= exts.extent(r); } - return value - (padded_stride.value(0) - exts.extent(0)); + return value; } } diff --git a/tests/test_layout_padded_left.cpp b/tests/test_layout_padded_left.cpp index 15f072ba..2e7fe546 100644 --- a/tests/test_layout_padded_left.cpp +++ b/tests/test_layout_padded_left.cpp @@ -482,4 +482,3 @@ TEST(LayoutRightTests, access) { Kokkos::extents>({}, 4); ASSERT_EQ(mapping6(), 0); } - From 598bd7f6f074c453dc0893c771d89264d4a79611 Mon Sep 17 00:00:00 2001 From: Christian Trott Date: Fri, 31 May 2024 14:23:10 -0600 Subject: [PATCH 07/13] clang-format submdspan_mapping file --- .../__p2630_bits/submdspan_mapping.hpp | 477 ++++++++++-------- 1 file changed, 253 insertions(+), 224 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan_mapping.hpp b/include/experimental/__p2630_bits/submdspan_mapping.hpp index 2ab1f420..2456ee59 100644 --- a/include/experimental/__p2630_bits/submdspan_mapping.hpp +++ b/include/experimental/__p2630_bits/submdspan_mapping.hpp @@ -17,8 +17,8 @@ #pragma once #include -#include #include +#include #include // index_sequence namespace MDSPAN_IMPL_STANDARD_NAMESPACE { @@ -46,39 +46,29 @@ one_slice_out_of_bounds(const IndexType& ext, const Slice& slice) return static_cast(detail::first_of(slice)) == static_cast(ext); } -template -MDSPAN_INLINE_FUNCTION -constexpr bool +template +MDSPAN_INLINE_FUNCTION constexpr bool any_slice_out_of_bounds_helper(std::index_sequence, - const extents& exts, - const Slices& ... slices) -{ + const extents &exts, + const Slices &... slices) { return _MDSPAN_FOLD_OR( - (one_slice_out_of_bounds(exts.extent(RankIndices), slices)) - ); + (one_slice_out_of_bounds(exts.extent(RankIndices), slices))); } -template -MDSPAN_INLINE_FUNCTION -constexpr bool -any_slice_out_of_bounds(const extents& exts, - const Slices& ... slices) -{ +template +MDSPAN_INLINE_FUNCTION constexpr bool +any_slice_out_of_bounds(const extents &exts, + const Slices &... slices) { return any_slice_out_of_bounds_helper( - std::make_index_sequence(), - exts, slices...); + std::make_index_sequence(), exts, slices...); } // constructs sub strides template -MDSPAN_INLINE_FUNCTION -constexpr auto -construct_sub_strides(const SrcMapping &src_mapping, - std::index_sequence, - const std::tuple &slices_stride_factor) { +MDSPAN_INLINE_FUNCTION constexpr auto construct_sub_strides( + const SrcMapping &src_mapping, std::index_sequence, + const std::tuple &slices_stride_factor) { using index_type = typename SrcMapping::index_type; return std::array{ (static_cast(src_mapping.stride(InvMapIdxs)) * @@ -96,47 +86,65 @@ template struct deduce_layout_left_submapping; -template +template struct deduce_layout_left_submapping< - IndexType, SubRank, std::index_sequence, - SliceSpecifiers...> { - - - using CountRange = - index_sequence_scan_impl<0,(std::is_convertible_v?0:1)...>; - //__static_partial_sums...>; - constexpr static int NumGaps = - (((Idx>0 && CountRange::get(Idx)==1 && std::is_convertible_v)?1:0) + ... + 0); + IndexType, SubRank, std::index_sequence, SliceSpecifiers...> { + + using CountRange = index_sequence_scan_impl< + 0, (std::is_convertible_v ? 0 : 1)...>; + //__static_partial_sums...>; + constexpr static int NumGaps = + (((Idx > 0 && CountRange::get(Idx) == 1 && + std::is_convertible_v) + ? 1 + : 0) + + ... + 0); constexpr static bool layout_left_value = // Use layout_left for rank 0 (SubRank == 0) || // Use layout_left for rank 1 if leftmost slice specifier is range like - ((SubRank == 1) && ((Idx > 0 || std::is_same_v || std::is_convertible_v>) && ...)) || + ((SubRank == 1) && + ((Idx > 0 || std::is_same_v || + std::is_convertible_v>)&&...)) || // Preserve if leftmost SubRank-1 slices are full_extent_t and // the leftmost SubRank slices are ranges (((Idx < SubRank - 1 && std::is_same_v) || - (Idx == SubRank -1 && (std::is_same_v || std::is_convertible_v>)) || - (Idx >= SubRank && CountRange::get(Idx) == SubRank)) && ...); + (Idx == SubRank - 1 && + (std::is_same_v || + std::is_convertible_v>)) || + (Idx >= SubRank && CountRange::get(Idx) == SubRank)) && + ...); constexpr static bool layout_left_padded_value = // Use layout_left_padded for rank 0 (SubRank == 0) || - // Use layout_left_padded for rank 1 if leftmost slice specifier is range like - ((SubRank == 1) && ((Idx > 0 || std::is_same_v || std::is_convertible_v>) && ...)) || + // Use layout_left_padded for rank 1 if leftmost slice specifier is range + // like + ((SubRank == 1) && + ((Idx > 0 || std::is_same_v || + std::is_convertible_v>)&&...)) || // layout_left_padded case for SubRank > 1 ( - // leftmost must be range - ( - (Idx == 0 && (std::is_same_v || std::is_convertible_v>)) || - (Idx > 0 && Idx <= NumGaps && CountRange::get(Idx) == 1) || - (Idx > NumGaps && Idx < NumGaps+SubRank-1 && std::is_same_v) || - (Idx == NumGaps + SubRank -1 && + // leftmost must be range + ((Idx == 0 && (std::is_same_v || - std::is_convertible_v>)) || - (Idx >= NumGaps + SubRank && CountRange::get(Idx) == SubRank) - ) && ... ); + std::is_convertible_v>)) || + (Idx > 0 && Idx <= NumGaps && CountRange::get(Idx) == 1) || + (Idx > NumGaps && Idx < NumGaps + SubRank - 1 && + std::is_same_v) || + (Idx == NumGaps + SubRank - 1 && + (std::is_same_v || + std::is_convertible_v>)) || + (Idx >= NumGaps + SubRank && CountRange::get(Idx) == SubRank)) && + ...); }; } // namespace detail @@ -147,25 +155,25 @@ struct deduce_layout_left_submapping< // and the diagnostic push. I tried really hard to find something shorter // but no luck ... #if defined __NVCC__ - #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ - #pragma nv_diagnostic push - #pragma nv_diag_suppress = implicit_return_from_non_void_function - #else - #ifdef __CUDA_ARCH__ - #pragma diagnostic push - #pragma diag_suppress implicit_return_from_non_void_function - #endif - #endif +#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ +#pragma nv_diagnostic push +#pragma nv_diag_suppress = implicit_return_from_non_void_function +#else +#ifdef __CUDA_ARCH__ +#pragma diagnostic push +#pragma diag_suppress implicit_return_from_non_void_function +#endif +#endif #elif defined __NVCOMPILER - #pragma diagnostic push - #pragma diag_suppress = implicit_return_from_non_void_function +#pragma diagnostic push +#pragma diag_suppress = implicit_return_from_non_void_function #endif // Actual submdspan mapping call template template -MDSPAN_INLINE_FUNCTION -constexpr auto -layout_left::mapping::submdspan_mapping_impl(SliceSpecifiers... slices) const { +MDSPAN_INLINE_FUNCTION constexpr auto +layout_left::mapping::submdspan_mapping_impl( + SliceSpecifiers... slices) const { // compute sub extents using src_ext_t = Extents; @@ -174,67 +182,70 @@ layout_left::mapping::submdspan_mapping_impl(SliceSpecifiers... slices) // figure out sub layout type using deduce_layout = detail::deduce_layout_left_submapping< - typename dst_ext_t::index_type, dst_ext_t::rank(), decltype(std::make_index_sequence()), - SliceSpecifiers...>; - - using dst_layout_t = - std::conditional_t, layout_stride>>; + typename dst_ext_t::index_type, dst_ext_t::rank(), + decltype(std::make_index_sequence()), + SliceSpecifiers...>; + + using dst_layout_t = std::conditional_t< + deduce_layout::layout_left_value, layout_left, + std::conditional_t< + deduce_layout::layout_left_padded_value, + MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_left_padded, + layout_stride>>; using dst_mapping_t = typename dst_layout_t::template mapping; // Figure out if any slice's lower bound equals the corresponding extent. // If so, bypass evaluating the layout mapping. This fixes LWG Issue 4060. const bool out_of_bounds = - detail::any_slice_out_of_bounds(this->extents(), slices...); + detail::any_slice_out_of_bounds(this->extents(), slices...); auto offset = static_cast( - out_of_bounds ? - this->required_span_size() : - this->operator()(detail::first_of(slices)...) - ); + out_of_bounds ? this->required_span_size() + : this->operator()(detail::first_of(slices)...)); if constexpr (std::is_same_v) { // layout_left case + return submdspan_mapping_result{dst_mapping_t(dst_ext), + offset}; + } else if constexpr (std::is_same_v>) { return submdspan_mapping_result{ - dst_mapping_t(dst_ext), - offset}; - } else if constexpr (std::is_same_v>) { - return submdspan_mapping_result{ - dst_mapping_t(dst_ext, stride(1+deduce_layout::NumGaps)), - offset}; + dst_mapping_t(dst_ext, stride(1 + deduce_layout::NumGaps)), offset}; } else { // layout_stride case - auto inv_map = detail::inv_map_rank( - std::integral_constant(), - std::index_sequence<>(), - slices...); - return submdspan_mapping_result{ - dst_mapping_t(dst_ext, detail::construct_sub_strides( - *this, inv_map, - // HIP needs deduction guides to have markups so we need to be explicit - // NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have the issue - // But Clang-CUDA also doesn't accept the use of deduction guide so disable it for CUDA alltogether - #if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA) - std::tuple{detail::stride_of(slices)...})), - #else - std::tuple{detail::stride_of(slices)...})), - #endif - offset}; + auto inv_map = detail::inv_map_rank(std::integral_constant(), + std::index_sequence<>(), slices...); + return submdspan_mapping_result { + dst_mapping_t(dst_ext, + detail::construct_sub_strides( + *this, inv_map, +// HIP needs deduction guides to have markups so we need to be explicit +// NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have +// the issue But Clang-CUDA also doesn't accept the use of deduction guide so +// disable it for CUDA alltogether +#if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA) + std::tuple{ + detail::stride_of(slices)...})), +#else + std::tuple{detail::stride_of(slices)...})), +#endif + offset + }; } #if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__) __builtin_unreachable(); #endif } #if defined __NVCC__ - #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ - #pragma nv_diagnostic pop - #else - #ifdef __CUDA_ARCH__ - #pragma diagnostic pop - #endif - #endif +#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ +#pragma nv_diagnostic pop +#else +#ifdef __CUDA_ARCH__ +#pragma diagnostic pop +#endif +#endif #elif defined __NVCOMPILER - #pragma diagnostic pop +#pragma diagnostic pop #endif //********************************** @@ -247,62 +258,73 @@ template struct deduce_layout_right_submapping; -template +template struct deduce_layout_right_submapping< - IndexType, SubRank, std::index_sequence, - SliceSpecifiers...> { - - static constexpr size_t Rank = sizeof...(Idx); - using CountRange = - index_sequence_scan_impl<0,(std::is_convertible_v?0:1)...>; - //__static_partial_sums...>; - constexpr static int NumGaps = - (((Idx)?1:0) + ... + 0); + IndexType, SubRank, std::index_sequence, SliceSpecifiers...> { + + static constexpr size_t Rank = sizeof...(Idx); + using CountRange = index_sequence_scan_impl< + 0, (std::is_convertible_v ? 0 : 1)...>; + //__static_partial_sums...>; + constexpr static int NumGaps = + (((Idx < Rank - 1 && CountRange::get(Idx) == SubRank - 1 && + std::is_convertible_v) + ? 1 + : 0) + + ... + 0); constexpr static bool layout_right_value = // Use layout_right for rank 0 (SubRank == 0) || // Use layout_right for rank 1 if rightmost slice specifier is range like - ((SubRank == 1) && - ( - ((Idx < Rank - 1) || - ((Idx == Rank - 1) && (std::is_same_v || - std::is_convertible_v>)) - ) && ... - ) - ) || + ((SubRank == 1) && + (((Idx < Rank - 1) || + ((Idx == Rank - 1) && + (std::is_same_v || + std::is_convertible_v>))) && + ...)) || // Preserve if rightmost SubRank-1 slices are full_extent_t and // the rightmost SubRank slices are ranges - (((Idx >= Rank - SubRank && std::is_same_v) || - (Idx == Rank - SubRank && std::is_convertible_v>) || - (Idx < Rank - SubRank && CountRange::get(Idx) == 0)) && ...); + (((Idx >= Rank - SubRank && + std::is_same_v) || + (Idx == Rank - SubRank && + std::is_convertible_v>) || + (Idx < Rank - SubRank && CountRange::get(Idx) == 0)) && + ...); constexpr static bool layout_right_padded_value = ( // Use layout_right_padded for rank 0 (SubRank == 0) || - // Use layout_right_padded for rank 1 if rightmost slice specifier is range like - ((SubRank == 1) && - ( - ((Idx < Rank - 1) || - ((Idx == Rank - 1) && (std::is_same_v || - std::is_convertible_v>)) - ) && ... - ) - ) || + // Use layout_right_padded for rank 1 if rightmost slice specifier is + // range like + ((SubRank == 1) && + (((Idx < Rank - 1) || + ((Idx == Rank - 1) && + (std::is_same_v || + std::is_convertible_v>))) && + ...)) || // layout_right_padded case for SubRank > 1 ( - // rightmost must be range - ( - (Idx < Rank - NumGaps - SubRank && CountRange::get(Idx) == 0) || - (Idx == Rank - NumGaps - SubRank && + // rightmost must be range + ((Idx < Rank - NumGaps - SubRank && CountRange::get(Idx) == 0) || + (Idx == Rank - NumGaps - SubRank && (std::is_same_v || - std::is_convertible_v>)) || - (Idx > Rank - NumGaps - SubRank && Idx < Rank - NumGaps - 1 && std::is_same_v) || - (Idx >= Rank - NumGaps - 1 && Idx < Rank -1 && CountRange::get(Idx) == SubRank - 1) || - (Idx == Rank - 1 && (std::is_same_v || - std::is_convertible_v>)) - ) && ... )); + std::is_convertible_v>)) || + (Idx > Rank - NumGaps - SubRank && Idx < Rank - NumGaps - 1 && + std::is_same_v) || + (Idx >= Rank - NumGaps - 1 && Idx < Rank - 1 && + CountRange::get(Idx) == SubRank - 1) || + (Idx == Rank - 1 && + (std::is_same_v || + std::is_convertible_v>))) && + ...)); }; } // namespace detail @@ -313,25 +335,25 @@ struct deduce_layout_right_submapping< // and the diagnostic push. I tried really hard to find something shorter // but no luck ... #if defined __NVCC__ - #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ - #pragma nv_diagnostic push - #pragma nv_diag_suppress = implicit_return_from_non_void_function - #else - #ifdef __CUDA_ARCH__ - #pragma diagnostic push - #pragma diag_suppress implicit_return_from_non_void_function - #endif - #endif +#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ +#pragma nv_diagnostic push +#pragma nv_diag_suppress = implicit_return_from_non_void_function +#else +#ifdef __CUDA_ARCH__ +#pragma diagnostic push +#pragma diag_suppress implicit_return_from_non_void_function +#endif +#endif #elif defined __NVCOMPILER - #pragma diagnostic push - #pragma diag_suppress = implicit_return_from_non_void_function +#pragma diagnostic push +#pragma diag_suppress = implicit_return_from_non_void_function #endif // Actual submdspan mapping call template template -MDSPAN_INLINE_FUNCTION -constexpr auto -layout_right::mapping::submdspan_mapping_impl(SliceSpecifiers... slices) const { +MDSPAN_INLINE_FUNCTION constexpr auto +layout_right::mapping::submdspan_mapping_impl( + SliceSpecifiers... slices) const { // compute sub extents using src_ext_t = Extents; @@ -340,67 +362,73 @@ layout_right::mapping::submdspan_mapping_impl(SliceSpecifiers... slices // figure out sub layout type using deduce_layout = detail::deduce_layout_right_submapping< - typename dst_ext_t::index_type, dst_ext_t::rank(), decltype(std::make_index_sequence()), - SliceSpecifiers...>; - - using dst_layout_t = - std::conditional_t, layout_stride>>; + typename dst_ext_t::index_type, dst_ext_t::rank(), + decltype(std::make_index_sequence()), + SliceSpecifiers...>; + + using dst_layout_t = std::conditional_t< + deduce_layout::layout_right_value, layout_right, + std::conditional_t< + deduce_layout::layout_right_padded_value, + MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_right_padded, + layout_stride>>; using dst_mapping_t = typename dst_layout_t::template mapping; // Figure out if any slice's lower bound equals the corresponding extent. // If so, bypass evaluating the layout mapping. This fixes LWG Issue 4060. const bool out_of_bounds = - detail::any_slice_out_of_bounds(this->extents(), slices...); + detail::any_slice_out_of_bounds(this->extents(), slices...); auto offset = static_cast( - out_of_bounds ? - this->required_span_size() : - this->operator()(detail::first_of(slices)...) - ); + out_of_bounds ? this->required_span_size() + : this->operator()(detail::first_of(slices)...)); if constexpr (std::is_same_v) { // layout_right case + return submdspan_mapping_result{dst_mapping_t(dst_ext), + offset}; + } else if constexpr (std::is_same_v< + dst_layout_t, + MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_right_padded< + dynamic_extent>>) { return submdspan_mapping_result{ - dst_mapping_t(dst_ext), + dst_mapping_t(dst_ext, + stride(src_ext_t::rank() - 2 - deduce_layout::NumGaps)), offset}; - } else if constexpr (std::is_same_v>) { - return submdspan_mapping_result{ - dst_mapping_t(dst_ext, stride(src_ext_t::rank() - 2 - deduce_layout::NumGaps)), - offset}; } else { // layout_stride case - auto inv_map = detail::inv_map_rank( - std::integral_constant(), - std::index_sequence<>(), - slices...); - return submdspan_mapping_result{ - dst_mapping_t(dst_ext, detail::construct_sub_strides( - *this, inv_map, - // HIP needs deduction guides to have markups so we need to be explicit - // NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have the issue - // But Clang-CUDA also doesn't accept the use of deduction guide so disable it for CUDA alltogether - #if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA) - std::tuple{detail::stride_of(slices)...})), - #else - std::tuple{detail::stride_of(slices)...})), - #endif - offset}; + auto inv_map = detail::inv_map_rank(std::integral_constant(), + std::index_sequence<>(), slices...); + return submdspan_mapping_result { + dst_mapping_t(dst_ext, + detail::construct_sub_strides( + *this, inv_map, +// HIP needs deduction guides to have markups so we need to be explicit +// NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have +// the issue But Clang-CUDA also doesn't accept the use of deduction guide so +// disable it for CUDA alltogether +#if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA) + std::tuple{ + detail::stride_of(slices)...})), +#else + std::tuple{detail::stride_of(slices)...})), +#endif + offset + }; } #if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__) __builtin_unreachable(); #endif } #if defined __NVCC__ - #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ - #pragma nv_diagnostic pop - #else - #ifdef __CUDA_ARCH__ - #pragma diagnostic pop - #endif - #endif +#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ +#pragma nv_diagnostic pop +#else +#ifdef __CUDA_ARCH__ +#pragma diagnostic pop +#endif +#endif #elif defined __NVCOMPILER - #pragma diagnostic pop +#pragma diagnostic pop #endif //********************************** @@ -408,39 +436,40 @@ layout_right::mapping::submdspan_mapping_impl(SliceSpecifiers... slices //********************************* template template -MDSPAN_INLINE_FUNCTION -constexpr auto +MDSPAN_INLINE_FUNCTION constexpr auto layout_stride::mapping::submdspan_mapping_impl( - SliceSpecifiers... slices) const { + SliceSpecifiers... slices) const { auto dst_ext = submdspan_extents(extents(), slices...); using dst_ext_t = decltype(dst_ext); - auto inv_map = detail::inv_map_rank( - std::integral_constant(), - std::index_sequence<>(), - slices...); + auto inv_map = detail::inv_map_rank(std::integral_constant(), + std::index_sequence<>(), slices...); using dst_mapping_t = typename layout_stride::template mapping; // Figure out if any slice's lower bound equals the corresponding extent. // If so, bypass evaluating the layout mapping. This fixes LWG Issue 4060. const bool out_of_bounds = - detail::any_slice_out_of_bounds(this->extents(), slices...); + detail::any_slice_out_of_bounds(this->extents(), slices...); auto offset = static_cast( - out_of_bounds ? - this->required_span_size() : - this->operator()(detail::first_of(slices)...) - ); - - return submdspan_mapping_result{ - dst_mapping_t(dst_ext, detail::construct_sub_strides( - *this, inv_map, - // HIP needs deduction guides to have markups so we need to be explicit - // NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have the issue - #if defined(_MDSPAN_HAS_HIP) || (defined(__NVCC__) && (__CUDACC_VER_MAJOR__ * 100 + __CUDACC_VER_MINOR__ * 10) < 1120) - std::tuple(detail::stride_of(slices)...))), + out_of_bounds ? this->required_span_size() + : this->operator()(detail::first_of(slices)...)); + + return submdspan_mapping_result { + dst_mapping_t(dst_ext, + detail::construct_sub_strides( + *this, inv_map, +// HIP needs deduction guides to have markups so we need to be explicit +// NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have +// the issue +#if defined(_MDSPAN_HAS_HIP) || \ + (defined(__NVCC__) && \ + (__CUDACC_VER_MAJOR__ * 100 + __CUDACC_VER_MINOR__ * 10) < 1120) + std::tuple( + detail::stride_of(slices)...))), #else - std::tuple(detail::stride_of(slices)...))), + std::tuple(detail::stride_of(slices)...))), #endif - offset}; + offset + }; } } // namespace MDSPAN_IMPL_STANDARD_NAMESPACE From 9b219e93d4e292a8a9c4ce465d2243dce31a22aa Mon Sep 17 00:00:00 2001 From: Christian Trott Date: Fri, 31 May 2024 15:53:38 -0600 Subject: [PATCH 08/13] Add protection for C++14 to not have submdspan_mapping --- tests/foo_customizations.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/foo_customizations.hpp b/tests/foo_customizations.hpp index cf37d5e5..381b1ad9 100644 --- a/tests/foo_customizations.hpp +++ b/tests/foo_customizations.hpp @@ -225,6 +225,7 @@ class layout_foo::mapping { } #endif +#if MDSPAN_HAS_CXX_17 template friend constexpr auto submdspan_mapping( const mapping& src, SliceSpecifiers... slices) { @@ -241,6 +242,7 @@ class layout_foo::mapping { return sub_right; } } +#endif private: _MDSPAN_NO_UNIQUE_ADDRESS extents_type __extents{}; From f79db0b1d57df6a22bd458307d7c46797016323b Mon Sep 17 00:00:00 2001 From: Christian Trott Date: Mon, 3 Jun 2024 21:24:58 -0600 Subject: [PATCH 09/13] Fix typos Fix typos Co-authored-by: Mark Hoemmen --- include/experimental/__p2630_bits/submdspan_mapping.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan_mapping.hpp b/include/experimental/__p2630_bits/submdspan_mapping.hpp index 2456ee59..18f2c3d4 100644 --- a/include/experimental/__p2630_bits/submdspan_mapping.hpp +++ b/include/experimental/__p2630_bits/submdspan_mapping.hpp @@ -222,7 +222,7 @@ layout_left::mapping::submdspan_mapping_impl( // HIP needs deduction guides to have markups so we need to be explicit // NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have // the issue But Clang-CUDA also doesn't accept the use of deduction guide so -// disable it for CUDA alltogether +// disable it for CUDA altogether #if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA) std::tuple{ detail::stride_of(slices)...})), @@ -405,7 +405,7 @@ layout_right::mapping::submdspan_mapping_impl( // HIP needs deduction guides to have markups so we need to be explicit // NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have // the issue But Clang-CUDA also doesn't accept the use of deduction guide so -// disable it for CUDA alltogether +// disable it for CUDA altogether #if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA) std::tuple{ detail::stride_of(slices)...})), From 7bf2aff6b426db077511a02b6dda8fe424ad84e8 Mon Sep 17 00:00:00 2001 From: Christian Trott Date: Thu, 6 Jun 2024 14:06:58 -0600 Subject: [PATCH 10/13] Clang-format again --- .../__p2630_bits/submdspan_mapping.hpp | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan_mapping.hpp b/include/experimental/__p2630_bits/submdspan_mapping.hpp index 18f2c3d4..035f798b 100644 --- a/include/experimental/__p2630_bits/submdspan_mapping.hpp +++ b/include/experimental/__p2630_bits/submdspan_mapping.hpp @@ -36,14 +36,13 @@ namespace detail { // multiple times. This makes perfect forwarding not useful, but we // still don't want to pass those (possibly of size 64 x 3 bits) // objects by value. -template -MDSPAN_INLINE_FUNCTION -constexpr bool -one_slice_out_of_bounds(const IndexType& ext, const Slice& slice) -{ - using common_t = std::common_type_t; - return static_cast(detail::first_of(slice)) == static_cast(ext); +template +MDSPAN_INLINE_FUNCTION constexpr bool +one_slice_out_of_bounds(const IndexType &ext, const Slice &slice) { + using common_t = + std::common_type_t; + return static_cast(detail::first_of(slice)) == + static_cast(ext); } template ? 0 : 1)...>; //__static_partial_sums...>; + // IndexType>...>; constexpr static int NumGaps = (((Idx > 0 && CountRange::get(Idx) == 1 && std::is_convertible_v) @@ -267,7 +266,7 @@ struct deduce_layout_right_submapping< using CountRange = index_sequence_scan_impl< 0, (std::is_convertible_v ? 0 : 1)...>; //__static_partial_sums...>; + // IndexType>...>; constexpr static int NumGaps = (((Idx < Rank - 1 && CountRange::get(Idx) == SubRank - 1 && std::is_convertible_v) From 655ff3a5865282e4dd0bb3fac03bda11f02a62b8 Mon Sep 17 00:00:00 2001 From: Christian Trott Date: Sat, 8 Jun 2024 16:07:55 -0600 Subject: [PATCH 11/13] Rewrite layout preservation logic --- .../__p2630_bits/submdspan_mapping.hpp | 204 +++++++++--------- 1 file changed, 105 insertions(+), 99 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan_mapping.hpp b/include/experimental/__p2630_bits/submdspan_mapping.hpp index 035f798b..b61c2ae2 100644 --- a/include/experimental/__p2630_bits/submdspan_mapping.hpp +++ b/include/experimental/__p2630_bits/submdspan_mapping.hpp @@ -73,6 +73,26 @@ MDSPAN_INLINE_FUNCTION constexpr auto construct_sub_strides( (static_cast(src_mapping.stride(InvMapIdxs)) * static_cast(std::get(slices_stride_factor)))...}; } + +template +struct is_range_slice { + constexpr static bool value = + std::is_same_v || + std::is_convertible_v>; +}; + +template +constexpr bool is_range_slice_v = is_range_slice::value; + +template +struct is_index_slice { + constexpr static bool value = std::is_convertible_v; +}; + +template +constexpr bool is_index_slice_v = is_index_slice::value; + } // namespace detail //********************************** @@ -91,59 +111,55 @@ struct deduce_layout_left_submapping< IndexType, SubRank, std::index_sequence, SliceSpecifiers...> { using CountRange = index_sequence_scan_impl< - 0, (std::is_convertible_v ? 0 : 1)...>; - //__static_partial_sums...>; + 0, (is_index_slice_v ? 0 : 1)...>; + constexpr static int NumGaps = (((Idx > 0 && CountRange::get(Idx) == 1 && - std::is_convertible_v) + is_index_slice_v) ? 1 : 0) + ... + 0); - constexpr static bool layout_left_value = - // Use layout_left for rank 0 - (SubRank == 0) || - // Use layout_left for rank 1 if leftmost slice specifier is range like - ((SubRank == 1) && - ((Idx > 0 || std::is_same_v || - std::is_convertible_v>)&&...)) || + MDSPAN_INLINE_FUNCTION + constexpr static bool layout_left_value() { + // Use layout_left for rank 0 + if constexpr (SubRank == 0) { + return true; + // Use layout_left for rank 1 result if leftmost slice specifier is range like + } else if constexpr (SubRank == 1) { + return ((Idx > 0 || is_range_slice_v)&&...); + } else { // Preserve if leftmost SubRank-1 slices are full_extent_t and - // the leftmost SubRank slices are ranges - (((Idx < SubRank - 1 && std::is_same_v) || - (Idx == SubRank - 1 && - (std::is_same_v || - std::is_convertible_v>)) || - (Idx >= SubRank && CountRange::get(Idx) == SubRank)) && - ...); - - constexpr static bool layout_left_padded_value = - // Use layout_left_padded for rank 0 - (SubRank == 0) || - // Use layout_left_padded for rank 1 if leftmost slice specifier is range - // like - ((SubRank == 1) && - ((Idx > 0 || std::is_same_v || - std::is_convertible_v>)&&...)) || - // layout_left_padded case for SubRank > 1 - ( - // leftmost must be range - ((Idx == 0 && - (std::is_same_v || - std::is_convertible_v>)) || - (Idx > 0 && Idx <= NumGaps && CountRange::get(Idx) == 1) || - (Idx > NumGaps && Idx < NumGaps + SubRank - 1 && - std::is_same_v) || - (Idx == NumGaps + SubRank - 1 && - (std::is_same_v || - std::is_convertible_v>)) || - (Idx >= NumGaps + SubRank && CountRange::get(Idx) == SubRank)) && - ...); + // the slice at idx Subrank - 1 is a range and + // for idx > SubRank the slice is an index + return ((((Idx < SubRank - 1) && std::is_same_v) || + ((Idx == SubRank - 1) && is_range_slice_v) || + ((Idx > SubRank - 1) && is_index_slice_v)) && ...); + } +#if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__) + __builtin_unreachable(); +#endif + } + + MDSPAN_INLINE_FUNCTION + constexpr static bool layout_left_padded_value() { + // Technically could also keep layout_left_padded for SubRank==0 + // and SubRank==1 with leftmost slice specifier being a contiguous range + // but we intercept these cases separately + + // In all other cases: + // leftmost slice must be range + // then there can be a gap with index slices + // then SubRank - 2 full_extent slices + // then another range slice + // then more index slices + // e.g. R I I I F F F R I I for obtaining a rank-5 from a rank-10 + return ((((Idx == 0) && is_range_slice_v) || + ((Idx > 0 && Idx <= NumGaps) && is_index_slice_v) || + ((Idx > NumGaps && Idx < NumGaps + SubRank - 1) && std::is_same_v) || + ((Idx == NumGaps + SubRank - 1) && is_range_slice_v) || + ((Idx > NumGaps + SubRank - 1) && is_index_slice_v)) && ... ); + } }; } // namespace detail @@ -186,9 +202,9 @@ layout_left::mapping::submdspan_mapping_impl( SliceSpecifiers...>; using dst_layout_t = std::conditional_t< - deduce_layout::layout_left_value, layout_left, + deduce_layout::layout_left_value(), layout_left, std::conditional_t< - deduce_layout::layout_left_padded_value, + deduce_layout::layout_left_padded_value(), MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_left_padded, layout_stride>>; using dst_mapping_t = typename dst_layout_t::template mapping; @@ -274,56 +290,46 @@ struct deduce_layout_right_submapping< : 0) + ... + 0); - constexpr static bool layout_right_value = - // Use layout_right for rank 0 - (SubRank == 0) || - // Use layout_right for rank 1 if rightmost slice specifier is range like - ((SubRank == 1) && - (((Idx < Rank - 1) || - ((Idx == Rank - 1) && - (std::is_same_v || - std::is_convertible_v>))) && - ...)) || + MDSPAN_INLINE_FUNCTION + constexpr static bool layout_right_value() { + // Use layout_right for rank 0 + if constexpr (SubRank == 0) { + return true; + // Use layout_right for rank 1 result if rightmost slice specifier is range like + } else if constexpr (SubRank == 1) { + return ((Idx < Rank - 1 || is_range_slice_v)&&...); + } else { // Preserve if rightmost SubRank-1 slices are full_extent_t and - // the rightmost SubRank slices are ranges - (((Idx >= Rank - SubRank && - std::is_same_v) || - (Idx == Rank - SubRank && - std::is_convertible_v>) || - (Idx < Rank - SubRank && CountRange::get(Idx) == 0)) && - ...); - - constexpr static bool layout_right_padded_value = ( - // Use layout_right_padded for rank 0 - (SubRank == 0) || - // Use layout_right_padded for rank 1 if rightmost slice specifier is - // range like - ((SubRank == 1) && - (((Idx < Rank - 1) || - ((Idx == Rank - 1) && - (std::is_same_v || - std::is_convertible_v>))) && - ...)) || - // layout_right_padded case for SubRank > 1 - ( - // rightmost must be range - ((Idx < Rank - NumGaps - SubRank && CountRange::get(Idx) == 0) || - (Idx == Rank - NumGaps - SubRank && - (std::is_same_v || - std::is_convertible_v>)) || - (Idx > Rank - NumGaps - SubRank && Idx < Rank - NumGaps - 1 && - std::is_same_v) || - (Idx >= Rank - NumGaps - 1 && Idx < Rank - 1 && - CountRange::get(Idx) == SubRank - 1) || - (Idx == Rank - 1 && - (std::is_same_v || - std::is_convertible_v>))) && - ...)); + // the slice at idx Rank-Subrank is a range and + // for idx < Rank - SubRank the slice is an index + return ((((Idx >= Rank - SubRank) && std::is_same_v) || + ((Idx == Rank - SubRank) && is_range_slice_v) || + ((Idx < Rank - SubRank) && is_index_slice_v)) && ...); + } +#if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__) + __builtin_unreachable(); +#endif + } + + MDSPAN_INLINE_FUNCTION + constexpr static bool layout_right_padded_value() { + // Technically could also keep layout_right_padded for SubRank==0 + // and SubRank==1 with rightmost slice specifier being a contiguous range + // but we intercept these cases separately + + // In all other cases: + // rightmost slice must be range + // then there can be a gap with index slices + // then SubRank - 2 full_extent slices + // then another range slice + // then more index slices + // e.g. I I R F F F I I I R for obtaining a rank-5 from a rank-10 + return ((((Idx == Rank - 1) && is_range_slice_v) || + ((Idx >= Rank - NumGaps - 1 && Idx < Rank - 1) && is_index_slice_v) || + ((Idx > Rank - NumGaps - SubRank && Idx < Rank - NumGaps - 1) && std::is_same_v) || + ((Idx == Rank - NumGaps - SubRank) && is_range_slice_v) || + ((Idx < Rank - NumGaps - SubRank) && is_index_slice_v)) && ... ); + } }; } // namespace detail @@ -366,9 +372,9 @@ layout_right::mapping::submdspan_mapping_impl( SliceSpecifiers...>; using dst_layout_t = std::conditional_t< - deduce_layout::layout_right_value, layout_right, + deduce_layout::layout_right_value(), layout_right, std::conditional_t< - deduce_layout::layout_right_padded_value, + deduce_layout::layout_right_padded_value(), MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_right_padded, layout_stride>>; using dst_mapping_t = typename dst_layout_t::template mapping; From 69586904d339acf2d42cf4ff76b7b2f6e531eb32 Mon Sep 17 00:00:00 2001 From: Christian Trott Date: Mon, 10 Jun 2024 09:50:46 -0600 Subject: [PATCH 12/13] Move NVCC warning suppressin in submdspan_mapping --- .../__p2630_bits/submdspan_mapping.hpp | 128 +++++++----------- 1 file changed, 50 insertions(+), 78 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan_mapping.hpp b/include/experimental/__p2630_bits/submdspan_mapping.hpp index b61c2ae2..326de960 100644 --- a/include/experimental/__p2630_bits/submdspan_mapping.hpp +++ b/include/experimental/__p2630_bits/submdspan_mapping.hpp @@ -21,6 +21,26 @@ #include #include // index_sequence +// Suppress spurious warning with NVCC about no return statement. +// This is a known issue in NVCC and NVC++ +// Depending on the CUDA and GCC version we need both the builtin +// and the diagnostic push. I tried really hard to find something shorter +// but no luck ... +#if defined __NVCC__ +#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ +#pragma nv_diagnostic push +#pragma nv_diag_suppress = implicit_return_from_non_void_function +#else +#ifdef __CUDA_ARCH__ +#pragma diagnostic push +#pragma diag_suppress implicit_return_from_non_void_function +#endif +#endif +#elif defined __NVCOMPILER +#pragma diagnostic push +#pragma diag_suppress = implicit_return_from_non_void_function +#endif + namespace MDSPAN_IMPL_STANDARD_NAMESPACE { //****************************************** // Return type of submdspan_mapping overloads @@ -110,11 +130,11 @@ template , SliceSpecifiers...> { - using CountRange = index_sequence_scan_impl< + using count_range = index_sequence_scan_impl< 0, (is_index_slice_v ? 0 : 1)...>; - constexpr static int NumGaps = - (((Idx > 0 && CountRange::get(Idx) == 1 && + constexpr static int num_gaps = + (((Idx > 0 && count_range::get(Idx) == 1 && is_index_slice_v) ? 1 : 0) + @@ -154,35 +174,16 @@ struct deduce_layout_left_submapping< // then another range slice // then more index slices // e.g. R I I I F F F R I I for obtaining a rank-5 from a rank-10 - return ((((Idx == 0) && is_range_slice_v) || - ((Idx > 0 && Idx <= NumGaps) && is_index_slice_v) || - ((Idx > NumGaps && Idx < NumGaps + SubRank - 1) && std::is_same_v) || - ((Idx == NumGaps + SubRank - 1) && is_range_slice_v) || - ((Idx > NumGaps + SubRank - 1) && is_index_slice_v)) && ... ); + return ((((Idx == 0) && is_range_slice_v) || + ((Idx > 0 && Idx <= num_gaps) && is_index_slice_v) || + ((Idx > num_gaps && Idx < num_gaps + SubRank - 1) && std::is_same_v) || + ((Idx == num_gaps + SubRank - 1) && is_range_slice_v) || + ((Idx > num_gaps + SubRank - 1) && is_index_slice_v)) && ... ); } }; } // namespace detail -// Suppress spurious warning with NVCC about no return statement. -// This is a known issue in NVCC and NVC++ -// Depending on the CUDA and GCC version we need both the builtin -// and the diagnostic push. I tried really hard to find something shorter -// but no luck ... -#if defined __NVCC__ -#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ -#pragma nv_diagnostic push -#pragma nv_diag_suppress = implicit_return_from_non_void_function -#else -#ifdef __CUDA_ARCH__ -#pragma diagnostic push -#pragma diag_suppress implicit_return_from_non_void_function -#endif -#endif -#elif defined __NVCOMPILER -#pragma diagnostic push -#pragma diag_suppress = implicit_return_from_non_void_function -#endif // Actual submdspan mapping call template template @@ -225,7 +226,7 @@ layout_left::mapping::submdspan_mapping_impl( MDSPAN_IMPL_PROPOSED_NAMESPACE:: layout_left_padded>) { return submdspan_mapping_result{ - dst_mapping_t(dst_ext, stride(1 + deduce_layout::NumGaps)), offset}; + dst_mapping_t(dst_ext, stride(1 + deduce_layout::num_gaps)), offset}; } else { // layout_stride case auto inv_map = detail::inv_map_rank(std::integral_constant(), @@ -251,17 +252,6 @@ layout_left::mapping::submdspan_mapping_impl( __builtin_unreachable(); #endif } -#if defined __NVCC__ -#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ -#pragma nv_diagnostic pop -#else -#ifdef __CUDA_ARCH__ -#pragma diagnostic pop -#endif -#endif -#elif defined __NVCOMPILER -#pragma diagnostic pop -#endif //********************************** // layout_right submdspan_mapping @@ -279,12 +269,12 @@ struct deduce_layout_right_submapping< IndexType, SubRank, std::index_sequence, SliceSpecifiers...> { static constexpr size_t Rank = sizeof...(Idx); - using CountRange = index_sequence_scan_impl< + using count_range = index_sequence_scan_impl< 0, (std::is_convertible_v ? 0 : 1)...>; //__static_partial_sums...>; - constexpr static int NumGaps = - (((Idx < Rank - 1 && CountRange::get(Idx) == SubRank - 1 && + constexpr static int num_gaps = + (((Idx < Rank - 1 && count_range::get(Idx) == SubRank - 1 && std::is_convertible_v) ? 1 : 0) + @@ -324,35 +314,16 @@ struct deduce_layout_right_submapping< // then another range slice // then more index slices // e.g. I I R F F F I I I R for obtaining a rank-5 from a rank-10 - return ((((Idx == Rank - 1) && is_range_slice_v) || - ((Idx >= Rank - NumGaps - 1 && Idx < Rank - 1) && is_index_slice_v) || - ((Idx > Rank - NumGaps - SubRank && Idx < Rank - NumGaps - 1) && std::is_same_v) || - ((Idx == Rank - NumGaps - SubRank) && is_range_slice_v) || - ((Idx < Rank - NumGaps - SubRank) && is_index_slice_v)) && ... ); + return ((((Idx == Rank - 1) && is_range_slice_v) || + ((Idx >= Rank - num_gaps - 1 && Idx < Rank - 1) && is_index_slice_v) || + ((Idx > Rank - num_gaps - SubRank && Idx < Rank - num_gaps - 1) && std::is_same_v) || + ((Idx == Rank - num_gaps - SubRank) && is_range_slice_v) || + ((Idx < Rank - num_gaps - SubRank) && is_index_slice_v)) && ... ); } }; } // namespace detail -// Suppress spurious warning with NVCC about no return statement. -// This is a known issue in NVCC and NVC++ -// Depending on the CUDA and GCC version we need both the builtin -// and the diagnostic push. I tried really hard to find something shorter -// but no luck ... -#if defined __NVCC__ -#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ -#pragma nv_diagnostic push -#pragma nv_diag_suppress = implicit_return_from_non_void_function -#else -#ifdef __CUDA_ARCH__ -#pragma diagnostic push -#pragma diag_suppress implicit_return_from_non_void_function -#endif -#endif -#elif defined __NVCOMPILER -#pragma diagnostic push -#pragma diag_suppress = implicit_return_from_non_void_function -#endif // Actual submdspan mapping call template template @@ -397,7 +368,7 @@ layout_right::mapping::submdspan_mapping_impl( dynamic_extent>>) { return submdspan_mapping_result{ dst_mapping_t(dst_ext, - stride(src_ext_t::rank() - 2 - deduce_layout::NumGaps)), + stride(src_ext_t::rank() - 2 - deduce_layout::num_gaps)), offset}; } else { // layout_stride case @@ -424,17 +395,6 @@ layout_right::mapping::submdspan_mapping_impl( __builtin_unreachable(); #endif } -#if defined __NVCC__ -#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ -#pragma nv_diagnostic pop -#else -#ifdef __CUDA_ARCH__ -#pragma diagnostic pop -#endif -#endif -#elif defined __NVCOMPILER -#pragma diagnostic pop -#endif //********************************** // layout_stride submdspan_mapping @@ -478,3 +438,15 @@ layout_stride::mapping::submdspan_mapping_impl( } } // namespace MDSPAN_IMPL_STANDARD_NAMESPACE + +#if defined __NVCC__ +#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ +#pragma nv_diagnostic pop +#else +#ifdef __CUDA_ARCH__ +#pragma diagnostic pop +#endif +#endif +#elif defined __NVCOMPILER +#pragma diagnostic pop +#endif From 32bfb8d3501141b70c766d495bead3fd24bdb7cd Mon Sep 17 00:00:00 2001 From: Christian Trott Date: Mon, 10 Jun 2024 11:40:58 -0600 Subject: [PATCH 13/13] Address review comments --- .../__p2630_bits/submdspan_mapping.hpp | 28 +++++++++---------- tests/test_submdspan.cpp | 8 +++++- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan_mapping.hpp b/include/experimental/__p2630_bits/submdspan_mapping.hpp index 326de960..cf1bdd1e 100644 --- a/include/experimental/__p2630_bits/submdspan_mapping.hpp +++ b/include/experimental/__p2630_bits/submdspan_mapping.hpp @@ -133,7 +133,7 @@ struct deduce_layout_left_submapping< using count_range = index_sequence_scan_impl< 0, (is_index_slice_v ? 0 : 1)...>; - constexpr static int num_gaps = + constexpr static int gap_len = (((Idx > 0 && count_range::get(Idx) == 1 && is_index_slice_v) ? 1 @@ -175,10 +175,10 @@ struct deduce_layout_left_submapping< // then more index slices // e.g. R I I I F F F R I I for obtaining a rank-5 from a rank-10 return ((((Idx == 0) && is_range_slice_v) || - ((Idx > 0 && Idx <= num_gaps) && is_index_slice_v) || - ((Idx > num_gaps && Idx < num_gaps + SubRank - 1) && std::is_same_v) || - ((Idx == num_gaps + SubRank - 1) && is_range_slice_v) || - ((Idx > num_gaps + SubRank - 1) && is_index_slice_v)) && ... ); + ((Idx > 0 && Idx <= gap_len) && is_index_slice_v) || + ((Idx > gap_len && Idx < gap_len + SubRank - 1) && std::is_same_v) || + ((Idx == gap_len + SubRank - 1) && is_range_slice_v) || + ((Idx > gap_len + SubRank - 1) && is_index_slice_v)) && ... ); } }; @@ -199,7 +199,7 @@ layout_left::mapping::submdspan_mapping_impl( // figure out sub layout type using deduce_layout = detail::deduce_layout_left_submapping< typename dst_ext_t::index_type, dst_ext_t::rank(), - decltype(std::make_index_sequence()), + std::make_index_sequence, SliceSpecifiers...>; using dst_layout_t = std::conditional_t< @@ -226,7 +226,7 @@ layout_left::mapping::submdspan_mapping_impl( MDSPAN_IMPL_PROPOSED_NAMESPACE:: layout_left_padded>) { return submdspan_mapping_result{ - dst_mapping_t(dst_ext, stride(1 + deduce_layout::num_gaps)), offset}; + dst_mapping_t(dst_ext, stride(1 + deduce_layout::gap_len)), offset}; } else { // layout_stride case auto inv_map = detail::inv_map_rank(std::integral_constant(), @@ -273,7 +273,7 @@ struct deduce_layout_right_submapping< 0, (std::is_convertible_v ? 0 : 1)...>; //__static_partial_sums...>; - constexpr static int num_gaps = + constexpr static int gap_len = (((Idx < Rank - 1 && count_range::get(Idx) == SubRank - 1 && std::is_convertible_v) ? 1 @@ -315,10 +315,10 @@ struct deduce_layout_right_submapping< // then more index slices // e.g. I I R F F F I I I R for obtaining a rank-5 from a rank-10 return ((((Idx == Rank - 1) && is_range_slice_v) || - ((Idx >= Rank - num_gaps - 1 && Idx < Rank - 1) && is_index_slice_v) || - ((Idx > Rank - num_gaps - SubRank && Idx < Rank - num_gaps - 1) && std::is_same_v) || - ((Idx == Rank - num_gaps - SubRank) && is_range_slice_v) || - ((Idx < Rank - num_gaps - SubRank) && is_index_slice_v)) && ... ); + ((Idx >= Rank - gap_len - 1 && Idx < Rank - 1) && is_index_slice_v) || + ((Idx > Rank - gap_len - SubRank && Idx < Rank - gap_len - 1) && std::is_same_v) || + ((Idx == Rank - gap_len - SubRank) && is_range_slice_v) || + ((Idx < Rank - gap_len - SubRank) && is_index_slice_v)) && ... ); } }; @@ -339,7 +339,7 @@ layout_right::mapping::submdspan_mapping_impl( // figure out sub layout type using deduce_layout = detail::deduce_layout_right_submapping< typename dst_ext_t::index_type, dst_ext_t::rank(), - decltype(std::make_index_sequence()), + std::make_index_sequence, SliceSpecifiers...>; using dst_layout_t = std::conditional_t< @@ -368,7 +368,7 @@ layout_right::mapping::submdspan_mapping_impl( dynamic_extent>>) { return submdspan_mapping_result{ dst_mapping_t(dst_ext, - stride(src_ext_t::rank() - 2 - deduce_layout::num_gaps)), + stride(src_ext_t::rank() - 2 - deduce_layout::gap_len)), offset}; } else { // layout_stride case diff --git a/tests/test_submdspan.cpp b/tests/test_submdspan.cpp index 8df5ea64..7ab0ceba 100644 --- a/tests/test_submdspan.cpp +++ b/tests/test_submdspan.cpp @@ -146,7 +146,10 @@ using submdspan_test_types = , std::tuple, args_t<6,4,5,6,7,8>, Kokkos::extents, int, int, int, int, std::pair, Kokkos::full_extent_t> , std::tuple, args_t<6,4,5,6,7,8>, Kokkos::extents, int, int, int, int, int, Kokkos::full_extent_t> // LayoutLeft to layout_left_padded - , std::tuple, Kokkos::dextents, args_t<10,20>, Kokkos::dextents, std::pair, Kokkos::full_extent_t> + , std::tuple, Kokkos::dextents, args_t<10,20>, Kokkos::dextents, std::pair, Kokkos::full_extent_t> + , std::tuple, Kokkos::dextents, args_t<10,20,30>, Kokkos::dextents, std::pair, int, Kokkos::full_extent_t> + , std::tuple, Kokkos::dextents, args_t<10,20,30,40>, Kokkos::dextents, std::pair, int, Kokkos::full_extent_t, std::pair> + , std::tuple, Kokkos::dextents, args_t<10,20,30,40,50>, Kokkos::dextents, std::pair, int, Kokkos::full_extent_t, std::pair, int> // LayoutLeft to LayoutStride , std::tuple, args_t<10>, Kokkos::dextents, Kokkos::strided_slice> , std::tuple, args_t<10,20>, Kokkos::dextents, Kokkos::strided_slice, int> @@ -157,6 +160,9 @@ using submdspan_test_types = , std::tuple, args_t<6,4,5,6,7,8>, Kokkos::extents, int, Kokkos::full_extent_t, std::pair, int, Kokkos::full_extent_t, int> // layout_right to layout_right_padded , std::tuple, Kokkos::dextents, args_t<10,20>, Kokkos::dextents, Kokkos::full_extent_t, std::pair> + , std::tuple, Kokkos::dextents, args_t<10,20,30>, Kokkos::dextents, Kokkos::full_extent_t, int, std::pair> + , std::tuple, Kokkos::dextents, args_t<10,20,30,40>, Kokkos::dextents, std::pair, Kokkos::full_extent_t, int, std::pair> + , std::tuple, Kokkos::dextents, args_t<10,20,30,40,50>, Kokkos::dextents, int, std::pair, Kokkos::full_extent_t, int, std::pair> // layout_right to layout_stride , std::tuple, args_t<10>, Kokkos::dextents, Kokkos::strided_slice> , std::tuple, args_t<10>, Kokkos::extents, Kokkos::strided_slice,std::integral_constant>>