From a977d96b99e5cc6390c2751816e2d419a71d2b26 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Wed, 3 Jul 2024 15:09:48 +0200 Subject: [PATCH 01/10] [core] fix get_segment for const context Signed-off-by: Marcel Koch --- core/base/segmented_array.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/base/segmented_array.hpp b/core/base/segmented_array.hpp index ffa4d62e74a..1d694d15dee 100644 --- a/core/base/segmented_array.hpp +++ b/core/base/segmented_array.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -29,7 +29,7 @@ struct device_segmented_array { T* end; }; - constexpr segment get_segment(size_type segment_id) + constexpr segment get_segment(size_type segment_id) const { GKO_ASSERT(segment_id < (offsets_end - offsets_begin)); return {flat_begin + offsets_begin[segment_id], From 59f0ea5f751080ee2ab279882f409fbaf08f125e Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Wed, 3 Jul 2024 15:13:59 +0200 Subject: [PATCH 02/10] [dist] provide range-ids segmented by part-id Signed-off-by: Marcel Koch --- .../distributed/partition_kernels.cpp | 35 +++++++++++++++++- core/device_hooks/common_kernels.inc.cpp | 1 + core/distributed/partition.cpp | 13 ++++++- core/distributed/partition_kernels.hpp | 9 ++++- dpcpp/distributed/partition_kernels.dp.cpp | 32 +++++++++++++++- include/ginkgo/core/distributed/partition.hpp | 14 ++++++- omp/distributed/partition_kernels.cpp | 30 ++++++++++++++- reference/distributed/partition_kernels.cpp | 28 +++++++++++++- .../test/distributed/partition_kernels.cpp | 37 ++++++++++++++++++- test/distributed/partition_kernels.cpp | 12 +++++- 10 files changed, 201 insertions(+), 10 deletions(-) diff --git a/common/cuda_hip/distributed/partition_kernels.cpp b/common/cuda_hip/distributed/partition_kernels.cpp index 7f623b423fb..f9dbde0c839 100644 --- a/common/cuda_hip/distributed/partition_kernels.cpp +++ b/common/cuda_hip/distributed/partition_kernels.cpp @@ -1,17 +1,20 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause #include "core/distributed/partition_kernels.hpp" +#include #include #include #include #include #include +#include #include #include "common/cuda_hip/base/thrust.hpp" +#include "common/cuda_hip/components/atomic.hpp" #include "common/unified/base/kernel_launch.hpp" #include "core/components/fill_array_kernels.hpp" @@ -132,6 +135,36 @@ GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE( GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES); +void build_ranges_by_part(std::shared_ptr exec, + const int* range_parts, size_type num_ranges, + int num_parts, array& range_ids, + array& sizes) +{ + auto policy = thrust_policy(exec); + + range_ids.resize_and_reset(num_ranges); + auto range_ids_ptr = range_ids.get_data(); + thrust::sequence(policy, range_ids_ptr, range_ids_ptr + num_ranges); + + // mutable copy of range_parts such that it can be used as keys for sorting + array range_parts_copy{exec, num_ranges}; + thrust::copy_n(policy, range_parts, num_ranges, + range_parts_copy.get_data()); + auto range_parts_ptr = range_parts_copy.get_data(); + + thrust::stable_sort_by_key(policy, range_parts_ptr, + range_parts_ptr + num_ranges, range_ids_ptr); + + sizes.resize_and_reset(num_parts); + auto sizes_ptr = sizes.get_data(); + thrust::fill_n(policy, sizes_ptr, num_parts, 0); + thrust::for_each_n(policy, range_parts_ptr, num_ranges, + [sizes_ptr] __device__(const size_type pid) { + atomic_add(sizes_ptr + pid, int64(1)); + }); +} + + } // namespace partition } // namespace GKO_DEVICE_NAMESPACE } // namespace kernels diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index 5ed2daa540c..eace5247553 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -322,6 +322,7 @@ GKO_STUB_INDEX_TYPE(GKO_PARTITION_BUILD_FROM_MAPPING); GKO_STUB_INDEX_TYPE(GKO_PARTITION_BUILD_FROM_GLOBAL_SIZE); GKO_STUB_LOCAL_GLOBAL_TYPE(GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES); GKO_STUB_LOCAL_GLOBAL_TYPE(GKO_DECLARE_PARTITION_IS_ORDERED); +GKO_STUB(GKO_DECLARE_PARTITION_BUILD_RANGES_BY_PART); } // namespace partition diff --git a/core/distributed/partition.cpp b/core/distributed/partition.cpp index 763986f3a86..fb6e5e12a54 100644 --- a/core/distributed/partition.cpp +++ b/core/distributed/partition.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -21,6 +21,7 @@ GKO_REGISTER_OPERATION(build_ranges_from_global_size, partition::build_ranges_from_global_size); GKO_REGISTER_OPERATION(build_starting_indices, partition::build_starting_indices); +GKO_REGISTER_OPERATION(build_ranges_by_part, partition::build_ranges_by_part); GKO_REGISTER_OPERATION(has_ordered_parts, partition::has_ordered_parts); @@ -38,7 +39,8 @@ Partition::Partition( offsets_{exec, num_ranges + 1}, starting_indices_{exec, num_ranges}, part_sizes_{exec, static_cast(num_parts)}, - part_ids_{exec, num_ranges} + part_ids_{exec, num_ranges}, + ranges_by_part_{exec} { offsets_.fill(0); starting_indices_.fill(0); @@ -126,6 +128,13 @@ void Partition::finalize_construction() get_num_parts(), num_empty_parts_, starting_indices_.get_data(), part_sizes_.get_data())); size_ = get_element(offsets_, get_num_ranges()); + array range_ids(exec); + array num_ranges_per_part(exec); + exec->run(partition::make_build_ranges_by_part( + part_ids_.get_const_data(), get_num_ranges(), get_num_parts(), + range_ids, num_ranges_per_part)); + ranges_by_part_ = segmented_array::create_from_sizes( + std::move(range_ids), num_ranges_per_part); } template diff --git a/core/distributed/partition_kernels.hpp b/core/distributed/partition_kernels.hpp index b1df933e5c8..6573bed6903 100644 --- a/core/distributed/partition_kernels.hpp +++ b/core/distributed/partition_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -48,6 +48,12 @@ namespace kernels { comm_index_type& num_empty_parts, \ LocalIndexType* ranks, LocalIndexType* sizes) +#define GKO_DECLARE_PARTITION_BUILD_RANGES_BY_PART \ + void build_ranges_by_part(std::shared_ptr exec, \ + const int* range_parts, size_type num_ranges, \ + int num_parts, array& range_ids, \ + array& sizes) + #define GKO_DECLARE_PARTITION_IS_ORDERED(LocalIndexType, GlobalIndexType) \ void has_ordered_parts(std::shared_ptr exec, \ const experimental::distributed::Partition< \ @@ -67,6 +73,7 @@ namespace kernels { template \ GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES(LocalIndexType, \ GlobalIndexType); \ + GKO_DECLARE_PARTITION_BUILD_RANGES_BY_PART; \ template \ GKO_DECLARE_PARTITION_IS_ORDERED(LocalIndexType, GlobalIndexType) diff --git a/dpcpp/distributed/partition_kernels.dp.cpp b/dpcpp/distributed/partition_kernels.dp.cpp index 175ea3ac050..86968b81dc4 100644 --- a/dpcpp/distributed/partition_kernels.dp.cpp +++ b/dpcpp/distributed/partition_kernels.dp.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -10,6 +10,7 @@ #include "common/unified/base/kernel_launch.hpp" #include "core/components/fill_array_kernels.hpp" #include "dpcpp/base/onedpl.hpp" +#include "dpcpp/components/atomic.dp.hpp" namespace gko { @@ -130,6 +131,35 @@ GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE( GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES); +void build_ranges_by_part(std::shared_ptr exec, + const int* range_parts, size_type num_ranges, + int num_parts, array& range_ids, + array& sizes) +{ + auto policy = onedpl_policy(exec); + + range_ids.resize_and_reset(num_ranges); + auto range_ids_ptr = range_ids.get_data(); + // fill range_ids with 0,...,num_ranges - 1 + run_kernel( + exec, [] GKO_KERNEL(auto i, auto rid) { rid[i] = i; }, num_ranges, + range_ids_ptr); + + oneapi::dpl::stable_sort(policy, range_ids_ptr, range_ids_ptr + num_ranges, + [range_parts](const auto rid_a, const auto rid_b) { + return range_parts[rid_a] < range_parts[rid_b]; + }); + + sizes.resize_and_reset(num_parts); + auto sizes_ptr = sizes.get_data(); + oneapi::dpl::fill_n(policy, sizes_ptr, num_parts, 0); + oneapi::dpl::for_each_n(policy, range_parts, num_ranges, + [sizes_ptr](const size_type pid) { + atomic_add(sizes_ptr + pid, int64(1)); + }); +} + + } // namespace partition } // namespace dpcpp } // namespace kernels diff --git a/include/ginkgo/core/distributed/partition.hpp b/include/ginkgo/core/distributed/partition.hpp index 89adb22f3e7..b101f41b914 100644 --- a/include/ginkgo/core/distributed/partition.hpp +++ b/include/ginkgo/core/distributed/partition.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -8,6 +8,7 @@ #include #include +#include #include @@ -190,6 +191,16 @@ class Partition : public EnablePolymorphicObject< */ local_index_type get_part_size(comm_index_type part) const; + /** + * Returns the range IDs segmented by their part ID. + * + * @return range IDs segmented by part IDs + */ + const segmented_array& get_ranges_by_part() const + { + return ranges_by_part_; + } + /** * Checks if each part has no more than one contiguous range. * @@ -274,6 +285,7 @@ class Partition : public EnablePolymorphicObject< array starting_indices_; array part_sizes_; array part_ids_; + segmented_array ranges_by_part_; }; diff --git a/omp/distributed/partition_kernels.cpp b/omp/distributed/partition_kernels.cpp index 25b7b0bfce8..3d69ba6a0bd 100644 --- a/omp/distributed/partition_kernels.cpp +++ b/omp/distributed/partition_kernels.cpp @@ -1,9 +1,11 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause #include "core/distributed/partition_kernels.hpp" +#include + #include #include @@ -72,6 +74,32 @@ GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE( GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES); +void build_ranges_by_part(std::shared_ptr exec, + const int* range_parts, size_type num_ranges, + int num_parts, array& range_ids, + array& sizes) +{ + range_ids.resize_and_reset(num_ranges); + std::iota(range_ids.get_data(), range_ids.get_data() + num_ranges, + size_type(0)); + // sort by (part_id, range_id) + std::sort(range_ids.get_data(), range_ids.get_data() + num_ranges, + [range_parts](auto rid_a, auto rid_b) { + return std::tie(range_parts[rid_a], rid_a) < + std::tie(range_parts[rid_b], rid_b); + }); + + sizes.resize_and_reset(num_parts); + std::fill_n(sizes.get_data(), num_parts, int64(0)); +#pragma omp parallel for + for (size_type i = 0; i < num_ranges; ++i) { + auto& size = sizes.get_data()[range_parts[range_ids.get_data()[i]]]; +#pragma omp atomic + size++; + } +} + + } // namespace partition } // namespace omp } // namespace kernels diff --git a/reference/distributed/partition_kernels.cpp b/reference/distributed/partition_kernels.cpp index e5a66c74720..6bf669737bb 100644 --- a/reference/distributed/partition_kernels.cpp +++ b/reference/distributed/partition_kernels.cpp @@ -1,9 +1,11 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause #include "core/distributed/partition_kernels.hpp" +#include "core/base/segmented_array.hpp" + namespace gko { namespace kernels { @@ -109,6 +111,30 @@ void build_starting_indices(std::shared_ptr exec, GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE( GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES); + +void build_ranges_by_part(std::shared_ptr exec, + const int* range_parts, size_type num_ranges, + int num_parts, array& range_ids, + array& sizes) +{ + range_ids.resize_and_reset(num_ranges); + std::iota(range_ids.get_data(), range_ids.get_data() + num_ranges, + size_type(0)); + // sort by (part_id, range_id) + std::sort(range_ids.get_data(), range_ids.get_data() + num_ranges, + [range_parts](auto rid_a, auto rid_b) { + return std::tie(range_parts[rid_a], rid_a) < + std::tie(range_parts[rid_b], rid_b); + }); + + sizes.resize_and_reset(num_parts); + std::fill_n(sizes.get_data(), num_parts, int64(0)); + for (size_type i = 0; i < num_ranges; ++i) { + sizes.get_data()[range_parts[i]]++; + } +} + + template void has_ordered_parts( std::shared_ptr exec, diff --git a/reference/test/distributed/partition_kernels.cpp b/reference/test/distributed/partition_kernels.cpp index e06f3cc4029..57e7ffd30c9 100644 --- a/reference/test/distributed/partition_kernels.cpp +++ b/reference/test/distributed/partition_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -33,6 +33,22 @@ void assert_equal_data(const T* data, std::initializer_list reference_data) } +template +void assert_equal_segmented_array(const gko::segmented_array& data, + std::initializer_list buffer, + std::initializer_list offsets) +{ + gko::array buffer_arr(data.get_executor(), buffer); + gko::array offsets_arr(data.get_executor(), offsets); + auto view = gko::make_const_array_view(data.get_executor(), data.get_size(), + data.get_const_flat_data()) + .copy_to_array(); + + GKO_ASSERT_ARRAY_EQ(view, buffer_arr); + GKO_ASSERT_ARRAY_EQ(data.get_offsets(), offsets_arr); +} + + template class Partition : public ::testing::Test { protected: @@ -75,6 +91,8 @@ TYPED_TEST(Partition, BuildsFromMapping) assert_equal_data(partition->get_range_starting_indices(), {0, 0, 0, 2, 1, 2, 3, 3, 3, 4}); assert_equal_data(partition->get_part_sizes(), {5, 6, 5}); + assert_equal_segmented_array(partition->get_ranges_by_part(), + {1, 4, 6, 9, 2, 5, 7, 0, 3, 8}, {0, 4, 7, 10}); } @@ -100,6 +118,9 @@ TYPED_TEST(Partition, BuildsFromMappingWithEmptyParts) assert_equal_data(partition->get_range_starting_indices(), {0, 0, 0, 2, 1, 2, 3, 3, 3, 4}); assert_equal_data(partition->get_part_sizes(), {5, 6, 0, 5, 0}); + assert_equal_segmented_array(partition->get_ranges_by_part(), + {1, 4, 6, 9, 2, 5, 7, 0, 3, 8}, + {0, 4, 7, 7, 10, 10}); } @@ -119,6 +140,8 @@ TYPED_TEST(Partition, BuildsFromRanges) assert_equal_data(partition->get_part_ids(), {0, 1, 2, 3, 4}); assert_equal_data(partition->get_range_starting_indices(), {0, 0, 0, 0, 0}); assert_equal_data(partition->get_part_sizes(), {5, 0, 2, 2, 1}); + assert_equal_segmented_array(partition->get_ranges_by_part(), + {0, 1, 2, 3, 4}, {0, 1, 2, 3, 4, 5}); } @@ -135,6 +158,8 @@ TYPED_TEST(Partition, BuildsFromRangeWithSingleElement) EXPECT_EQ(partition->get_num_parts(), 0); EXPECT_EQ(partition->get_num_empty_parts(), 0); assert_equal_data(partition->get_range_bounds(), {0}); + assert_equal_segmented_array(partition->get_ranges_by_part(), I{}, + {0}); } @@ -156,6 +181,8 @@ TYPED_TEST(Partition, BuildsFromRangesWithPartIds) assert_equal_data(partition->get_part_ids(), {0, 4, 3, 1, 2}); assert_equal_data(partition->get_range_starting_indices(), {0, 0, 0, 0, 0}); assert_equal_data(partition->get_part_sizes(), {5, 2, 1, 2, 0}); + assert_equal_segmented_array(partition->get_ranges_by_part(), + {0, 3, 4, 2, 1}, {0, 1, 2, 3, 4, 5}); } @@ -174,6 +201,8 @@ TYPED_TEST(Partition, BuildsFromGlobalSize) assert_equal_data(partition->get_part_ids(), {0, 1, 2, 3, 4}); assert_equal_data(partition->get_range_starting_indices(), {0, 0, 0, 0, 0}); assert_equal_data(partition->get_part_sizes(), {3, 3, 3, 2, 2}); + assert_equal_segmented_array(partition->get_ranges_by_part(), + {0, 1, 2, 3, 4}, {0, 1, 2, 3, 4, 5}); } @@ -191,6 +220,8 @@ TYPED_TEST(Partition, BuildsFromGlobalSizeEmptySize) assert_equal_data(partition->get_part_ids(), {0, 1, 2, 3, 4}); assert_equal_data(partition->get_range_starting_indices(), {0, 0, 0, 0, 0}); assert_equal_data(partition->get_part_sizes(), {0, 0, 0, 0, 0}); + assert_equal_segmented_array(partition->get_ranges_by_part(), + {0, 1, 2, 3, 4}, {0, 1, 2, 3, 4, 5}); } @@ -208,6 +239,8 @@ TYPED_TEST(Partition, BuildsFromGlobalSizeWithEmptyParts) assert_equal_data(partition->get_part_ids(), {0, 1, 2, 3, 4}); assert_equal_data(partition->get_range_starting_indices(), {0, 0, 0, 0, 0}); assert_equal_data(partition->get_part_sizes(), {1, 1, 1, 0, 0}); + assert_equal_segmented_array(partition->get_ranges_by_part(), + {0, 1, 2, 3, 4}, {0, 1, 2, 3, 4, 5}); } @@ -225,6 +258,8 @@ TYPED_TEST(Partition, BuildsFromGlobalSizeWithZeroParts) ASSERT_EQ(partition->get_part_ids(), nullptr); ASSERT_EQ(partition->get_range_starting_indices(), nullptr); ASSERT_EQ(partition->get_part_sizes(), nullptr); + assert_equal_segmented_array(partition->get_ranges_by_part(), I{}, + {0}); } diff --git a/test/distributed/partition_kernels.cpp b/test/distributed/partition_kernels.cpp index 6634744211d..343b43525f3 100644 --- a/test/distributed/partition_kernels.cpp +++ b/test/distributed/partition_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -69,6 +69,16 @@ class Partition : public CommonTestFixture { gko::make_array_view( this->exec, dpart->get_num_parts(), const_cast(dpart->get_part_sizes()))); + + GKO_ASSERT_ARRAY_EQ( + gko::make_const_array_view( + this->ref, part->get_num_ranges(), + part->get_ranges_by_part().get_const_flat_data()), + gko::make_const_array_view( + this->exec, dpart->get_num_ranges(), + dpart->get_ranges_by_part().get_const_flat_data())); + GKO_ASSERT_ARRAY_EQ(part->get_ranges_by_part().get_offsets(), + dpart->get_ranges_by_part().get_offsets()) } std::default_random_engine rand_engine; From 08e7bcf820e411d6ef8b964ce802e70d783b1d72 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Wed, 10 Jul 2024 11:20:11 +0200 Subject: [PATCH 03/10] [dist] add device partition Signed-off-by: Marcel Koch --- core/distributed/device_partition.hpp | 92 +++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 core/distributed/device_partition.hpp diff --git a/core/distributed/device_partition.hpp b/core/distributed/device_partition.hpp new file mode 100644 index 00000000000..86481eaad43 --- /dev/null +++ b/core/distributed/device_partition.hpp @@ -0,0 +1,92 @@ +// SPDX-FileCopyrightText: 2024 - 2025 The Ginkgo authors +// +// SPDX-License-Identifier: BSD-3-Clause + +#ifndef GINKGO_PARTITION_HPP +#define GINKGO_PARTITION_HPP + +#include + +#include "core/base/segmented_array.hpp" + +namespace gko { + + +template +struct device_partition { + using local_index_type = LocalIndexType; + using global_index_type = GlobalIndexType; + using comm_index_type = experimental::distributed::comm_index_type; + + comm_index_type num_parts; + comm_index_type num_empty_parts; + size_type size; + global_index_type* offsets_begin; + global_index_type* offsets_end; + local_index_type* starting_indices_begin; + local_index_type* starting_indices_end; + local_index_type* part_sizes_begin; + local_index_type* part_sizes_end; + const comm_index_type* part_ids_begin; + const comm_index_type* part_ids_end; + device_segmented_array ranges_by_part; +}; + + +/** + * Create device_segmented_array from a segmented_array. + */ +template +constexpr device_partition +to_device( + const experimental::distributed::Partition* + partition) +{ + auto num_ranges = partition->get_num_ranges(); + auto num_parts = partition->get_num_parts(); + return {num_parts, + partition->get_num_empty_parts(), + partition->get_size(), + partition->get_range_bounds(), + partition->get_range_bounds() + num_ranges + 1, + partition->get_range_starting_indices(), + partition->get_range_starting_indices() + num_ranges, + partition->get_part_sizes(), + partition->get_part_sizes() + num_parts, + partition->get_part_ids(), + partition->get_part_ids() + num_parts, + to_device(partition->get_ranges_by_part())}; +} + +/** + * Explicitly create a const version of device_segmented_array. + * + * This is mostly relevant for tests. + */ +template +constexpr device_partition +to_device_const( + const experimental::distributed::Partition* + partition) +{ + auto num_ranges = partition->get_num_ranges(); + auto num_parts = partition->get_num_parts(); + return {num_parts, + partition->get_num_empty_parts(), + partition->get_size(), + partition->get_range_bounds(), + partition->get_range_bounds() + num_ranges + 1, + partition->get_range_starting_indices(), + partition->get_range_starting_indices() + num_ranges, + partition->get_part_sizes(), + partition->get_part_sizes() + num_parts, + partition->get_part_ids(), + partition->get_part_ids() + num_parts, + to_device(partition->get_ranges_by_part())}; +} + + +} // namespace gko + + +#endif // GINKGO_PARTITION_HPP From 24b4eb490024cec3ed02119547de40e6bf0d9c3d Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Fri, 5 Jul 2024 11:19:02 +0200 Subject: [PATCH 04/10] [imap] add local to global mapping Signed-off-by: Marcel Koch --- .../distributed/index_map_kernels.cpp | 86 ++++++++++- core/device_hooks/common_kernels.inc.cpp | 1 + core/distributed/index_map.cpp | 18 ++- core/distributed/index_map_kernels.hpp | 35 ++++- dpcpp/distributed/index_map_kernels.dp.cpp | 15 +- include/ginkgo/core/distributed/index_map.hpp | 16 +- omp/distributed/index_map_kernels.cpp | 73 +++++++++- reference/distributed/index_map_kernels.cpp | 73 +++++++++- reference/distributed/partition_helpers.hpp | 48 +++++- .../test/distributed/index_map_kernels.cpp | 115 ++++++++++++++- test/distributed/index_map_kernels.cpp | 137 +++++++++++++++++- 11 files changed, 604 insertions(+), 13 deletions(-) diff --git a/common/cuda_hip/distributed/index_map_kernels.cpp b/common/cuda_hip/distributed/index_map_kernels.cpp index e27c5221013..fde1efbabb4 100644 --- a/common/cuda_hip/distributed/index_map_kernels.cpp +++ b/common/cuda_hip/distributed/index_map_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -296,6 +296,90 @@ GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE( GKO_DECLARE_INDEX_MAP_MAP_TO_LOCAL); +template +void map_to_global( + std::shared_ptr exec, + device_partition partition, + device_segmented_array remote_global_idxs, + experimental::distributed::comm_index_type rank, + const array& local_ids, + experimental::distributed::index_space is, + array& global_ids) +{ + auto range_bounds = partition.offsets_begin; + auto starting_indices = partition.starting_indices_begin; + const auto& ranges_by_part = partition.ranges_by_part; + auto local_ids_it = local_ids.get_const_data(); + auto input_size = local_ids.get_size(); + + auto policy = thrust_policy(exec); + + global_ids.resize_and_reset(local_ids.get_size()); + auto global_ids_it = global_ids.get_data(); + + auto map_local = [rank, ranges_by_part, range_bounds, starting_indices, + partition] __device__(auto lid) { + auto local_size = + static_cast(partition.part_sizes_begin[rank]); + + if (lid < 0 || lid >= local_size) { + return invalid_index(); + } + + auto local_ranges = ranges_by_part.get_segment(rank); + auto local_ranges_size = + static_cast(local_ranges.end - local_ranges.begin); + + auto it = binary_search(int64(0), local_ranges_size, [=](const auto i) { + return starting_indices[local_ranges.begin[i]] >= lid; + }); + auto local_range_id = + it != local_ranges_size ? it : max(int64(0), it - 1); + auto range_id = local_ranges.begin[local_range_id]; + + return static_cast(lid - starting_indices[range_id]) + + range_bounds[range_id]; + }; + auto map_non_local = [remote_global_idxs] __device__(auto lid) { + auto remote_size = static_cast( + remote_global_idxs.flat_end - remote_global_idxs.flat_begin); + + if (lid < 0 || lid >= remote_size) { + return invalid_index(); + } + + return remote_global_idxs.flat_begin[lid]; + }; + auto map_combined = [map_local, map_non_local, partition, + rank] __device__(auto lid) { + auto local_size = + static_cast(partition.part_sizes_begin[rank]); + + if (lid < local_size) { + return map_local(lid); + } else { + return map_non_local(lid - local_size); + } + }; + + if (is == experimental::distributed::index_space::local) { + thrust::transform(policy, local_ids_it, local_ids_it + input_size, + global_ids_it, map_local); + } + if (is == experimental::distributed::index_space::non_local) { + thrust::transform(policy, local_ids_it, local_ids_it + input_size, + global_ids_it, map_non_local); + } + if (is == experimental::distributed::index_space::combined) { + thrust::transform(policy, local_ids_it, local_ids_it + input_size, + global_ids_it, map_combined); + } +} + +GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE( + GKO_DECLARE_INDEX_MAP_MAP_TO_GLOBAL); + + } // namespace index_map } // namespace GKO_DEVICE_NAMESPACE } // namespace kernels diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index eace5247553..2842fc81a26 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -344,6 +344,7 @@ namespace index_map { GKO_STUB_LOCAL_GLOBAL_TYPE(GKO_DECLARE_INDEX_MAP_BUILD_MAPPING); GKO_STUB_LOCAL_GLOBAL_TYPE(GKO_DECLARE_INDEX_MAP_MAP_TO_LOCAL); +GKO_STUB_LOCAL_GLOBAL_TYPE(GKO_DECLARE_INDEX_MAP_MAP_TO_GLOBAL); } // namespace index_map diff --git a/core/distributed/index_map.cpp b/core/distributed/index_map.cpp index 01717546bc0..b8750a88418 100644 --- a/core/distributed/index_map.cpp +++ b/core/distributed/index_map.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -13,6 +13,7 @@ namespace index_map_kernels { GKO_REGISTER_OPERATION(build_mapping, index_map::build_mapping); GKO_REGISTER_OPERATION(map_to_local, index_map::map_to_local); +GKO_REGISTER_OPERATION(map_to_global, index_map::map_to_global); } // namespace index_map_kernels @@ -89,6 +90,21 @@ array index_map::map_to_local( } +template +array +index_map::map_to_global( + const array& local_ids, index_space index_space_v) const +{ + array global_ids(exec_); + + exec_->run(index_map_kernels::make_map_to_global( + to_device(partition_.get()), to_device(remote_global_idxs_), rank_, + local_ids, index_space_v, global_ids)); + + return global_ids; +} + + template index_map::index_map( std::shared_ptr exec, diff --git a/core/distributed/index_map_kernels.hpp b/core/distributed/index_map_kernels.hpp index 4694ba6cc10..ccc2e5661d3 100644 --- a/core/distributed/index_map_kernels.hpp +++ b/core/distributed/index_map_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -13,6 +13,7 @@ #include "core/base/kernel_declaration.hpp" #include "core/base/segmented_array.hpp" +#include "core/distributed/device_partition.hpp" namespace gko { @@ -55,10 +56,13 @@ namespace kernels { * * - partition: the global partition * - remote_target_ids: the owning part ids of each segment of - * remote_global_idxs + * remote_global_idxs * - remote_global_idxs: the remote global indices, segmented by the owning part * ids * - rank: the part id of this process + * + * Any global index that is not in the specified local index space is mapped + * to invalid_index. */ #define GKO_DECLARE_INDEX_MAP_MAP_TO_LOCAL(_ltype, _gtype) \ void map_to_local( \ @@ -72,11 +76,36 @@ namespace kernels { experimental::distributed::index_space is, array<_ltype>& local_ids) +/** + * This kernels maps local indices to global indices. + * + * The relevant input parameter from the index map are: + * + * - partition: the global partition + * - remote_global_idxs: the remote global indices, segmented by the owning part + * ids + * - rank: the part id of this process + * + * Any local index that is not part of the specified index space is mapped to + * invalid_index. + */ +#define GKO_DECLARE_INDEX_MAP_MAP_TO_GLOBAL(_ltype, _gtype) \ + void map_to_global( \ + std::shared_ptr exec, \ + device_partition partition, \ + device_segmented_array remote_global_idxs, \ + experimental::distributed::comm_index_type rank, \ + const array<_ltype>& local_ids, \ + experimental::distributed::index_space is, array<_gtype>& global_ids) + + #define GKO_DECLARE_ALL_AS_TEMPLATES \ template \ GKO_DECLARE_INDEX_MAP_BUILD_MAPPING(LocalIndexType, GlobalIndexType); \ template \ - GKO_DECLARE_INDEX_MAP_MAP_TO_LOCAL(LocalIndexType, GlobalIndexType) + GKO_DECLARE_INDEX_MAP_MAP_TO_LOCAL(LocalIndexType, GlobalIndexType); \ + template \ + GKO_DECLARE_INDEX_MAP_MAP_TO_GLOBAL(LocalIndexType, GlobalIndexType) GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(index_map, diff --git a/dpcpp/distributed/index_map_kernels.dp.cpp b/dpcpp/distributed/index_map_kernels.dp.cpp index cf1b28140e1..65fce495b6a 100644 --- a/dpcpp/distributed/index_map_kernels.dp.cpp +++ b/dpcpp/distributed/index_map_kernels.dp.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -44,6 +44,19 @@ GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE( GKO_DECLARE_INDEX_MAP_MAP_TO_LOCAL); +template +void map_to_global( + std::shared_ptr exec, + device_partition partition, + device_segmented_array remote_global_idxs, + experimental::distributed::comm_index_type rank, + const array& local_ids, + experimental::distributed::index_space is, + array& global_ids) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE( + GKO_DECLARE_INDEX_MAP_MAP_TO_GLOBAL); + } // namespace index_map } // namespace GKO_DEVICE_NAMESPACE } // namespace kernels diff --git a/include/ginkgo/core/distributed/index_map.hpp b/include/ginkgo/core/distributed/index_map.hpp index c2c2473d769..0c690eb462e 100644 --- a/include/ginkgo/core/distributed/index_map.hpp +++ b/include/ginkgo/core/distributed/index_map.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -81,6 +81,20 @@ struct index_map { array map_to_local(const array& global_ids, index_space index_space_v) const; + + /** + * Maps local indices to global indices + * + * @param local_ids the local indices to map + * @param index_space_v the index space in which the passed-in local + * indices are defined + * + * @return the mapped global indices. Any local index, that is not in the + * specified index space is mapped to invalid_index + */ + array map_to_global(const array& local_ids, + index_space index_space_v) const; + /** * \brief get size of index_space::local */ diff --git a/omp/distributed/index_map_kernels.cpp b/omp/distributed/index_map_kernels.cpp index 7374f7b978b..135023fd7c0 100644 --- a/omp/distributed/index_map_kernels.cpp +++ b/omp/distributed/index_map_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -239,6 +239,77 @@ GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE( GKO_DECLARE_INDEX_MAP_MAP_TO_LOCAL); +template +void map_to_global( + std::shared_ptr exec, + device_partition partition, + device_segmented_array remote_global_idxs, + experimental::distributed::comm_index_type rank, + const array& local_ids, + experimental::distributed::index_space is, + array& global_ids) +{ + const auto& ranges_by_part = partition.ranges_by_part; + auto local_ranges = ranges_by_part.get_segment(rank); + + global_ids.resize_and_reset(local_ids.get_size()); + + auto local_size = + static_cast(partition.part_sizes_begin[rank]); + auto remote_size = static_cast( + remote_global_idxs.flat_end - remote_global_idxs.flat_begin); + size_type local_range_id = 0; + if (is == experimental::distributed::index_space::local) { +#pragma omp parallel for firstprivate(local_range_id) + for (size_type i = 0; i < local_ids.get_size(); ++i) { + auto lid = local_ids.get_const_data()[i]; + + if (0 <= lid && lid < local_size) { + local_range_id = + find_local_range(lid, rank, partition, local_range_id); + global_ids.get_data()[i] = map_to_global( + lid, partition, local_ranges.begin[local_range_id]); + } else { + global_ids.get_data()[i] = invalid_index(); + } + } + } + if (is == experimental::distributed::index_space::non_local) { +#pragma omp parallel for + for (size_type i = 0; i < local_ids.get_size(); ++i) { + auto lid = local_ids.get_const_data()[i]; + + if (0 <= lid && lid < remote_size) { + global_ids.get_data()[i] = remote_global_idxs.flat_begin[lid]; + } else { + global_ids.get_data()[i] = invalid_index(); + } + } + } + if (is == experimental::distributed::index_space::combined) { +#pragma omp parallel for firstprivate(local_range_id) + for (size_type i = 0; i < local_ids.get_size(); ++i) { + auto lid = local_ids.get_const_data()[i]; + + if (0 <= lid && lid < local_size) { + local_range_id = + find_local_range(lid, rank, partition, local_range_id); + global_ids.get_data()[i] = map_to_global( + lid, partition, local_ranges.begin[local_range_id]); + } else if (local_size <= lid && lid < local_size + remote_size) { + global_ids.get_data()[i] = + remote_global_idxs.flat_begin[lid - local_size]; + } else { + global_ids.get_data()[i] = invalid_index(); + } + } + } +} + +GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE( + GKO_DECLARE_INDEX_MAP_MAP_TO_GLOBAL); + + } // namespace index_map } // namespace omp } // namespace kernels diff --git a/reference/distributed/index_map_kernels.cpp b/reference/distributed/index_map_kernels.cpp index 322a95c6cdb..ed7e9bdcc52 100644 --- a/reference/distributed/index_map_kernels.cpp +++ b/reference/distributed/index_map_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -199,6 +199,77 @@ GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE( GKO_DECLARE_INDEX_MAP_MAP_TO_LOCAL); +template +void map_to_global( + std::shared_ptr exec, + device_partition partition, + device_segmented_array remote_global_idxs, + experimental::distributed::comm_index_type rank, + const array& local_ids, + experimental::distributed::index_space is, + array& global_ids) +{ + const auto& ranges_by_part = partition.ranges_by_part; + auto local_ranges = ranges_by_part.get_segment(rank); + + global_ids.resize_and_reset(local_ids.get_size()); + + auto local_size = + static_cast(partition.part_sizes_begin[rank]); + size_type local_range_id = 0; + auto map_local = [&](auto lid) { + if (0 <= lid && lid < local_size) { + local_range_id = + find_local_range(lid, rank, partition, local_range_id); + return map_to_global(lid, partition, + local_ranges.begin[local_range_id]); + } else { + return invalid_index(); + } + }; + + auto remote_size = static_cast( + remote_global_idxs.flat_end - remote_global_idxs.flat_begin); + auto map_non_local = [&](auto lid) { + if (0 <= lid && lid < remote_size) { + return remote_global_idxs.flat_begin[lid]; + } else { + return invalid_index(); + } + }; + + auto map_combined = [&](auto lid) { + if (lid < local_size) { + return map_local(lid); + } else { + return map_non_local(lid - local_size); + } + }; + + if (is == experimental::distributed::index_space::local) { + for (size_type i = 0; i < local_ids.get_size(); ++i) { + auto lid = local_ids.get_const_data()[i]; + global_ids.get_data()[i] = map_local(lid); + } + } + if (is == experimental::distributed::index_space::non_local) { + for (size_type i = 0; i < local_ids.get_size(); ++i) { + auto lid = local_ids.get_const_data()[i]; + global_ids.get_data()[i] = map_non_local(lid); + } + } + if (is == experimental::distributed::index_space::combined) { + for (size_type i = 0; i < local_ids.get_size(); ++i) { + auto lid = local_ids.get_const_data()[i]; + global_ids.get_data()[i] = map_combined(lid); + } + } +} + +GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE( + GKO_DECLARE_INDEX_MAP_MAP_TO_GLOBAL); + + } // namespace index_map } // namespace reference } // namespace kernels diff --git a/reference/distributed/partition_helpers.hpp b/reference/distributed/partition_helpers.hpp index 06bd1e11f32..222d02b6c29 100644 --- a/reference/distributed/partition_helpers.hpp +++ b/reference/distributed/partition_helpers.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -11,6 +11,9 @@ #include #include +#include "core/base/segmented_array.hpp" +#include "core/distributed/device_partition.hpp" + namespace gko { @@ -48,6 +51,49 @@ LocalIndexType map_to_local( } +template +size_type find_local_range( + LocalIndexType idx, size_type part_id, + device_partition partition, + const size_type local_range_id_hint = 0) +{ + const auto& ranges_by_part = partition.ranges_by_part; + auto local_ranges = ranges_by_part.get_segment(part_id); + auto local_range_size = + static_cast(local_ranges.end - local_ranges.begin); + + auto range_starting_indices = partition.starting_indices_begin; + if (range_starting_indices[local_ranges.begin[local_range_id_hint]] <= + idx && + (local_range_id_hint == local_range_size - 1 || + range_starting_indices[local_ranges.begin[local_range_id_hint + 1]] > + idx)) { + return local_range_id_hint; + } + + auto it = std::lower_bound( + local_ranges.begin, local_ranges.end, idx, + [range_starting_indices, local_ranges](const auto rid, const auto idx) { + return range_starting_indices[rid] < idx; + }); + auto local_range_id = std::distance(local_ranges.begin, it) - 1; + return local_range_id; +} + + +template +GlobalIndexType map_to_global( + LocalIndexType idx, + device_partition partition, + size_type range_id) +{ + auto range_bounds = partition.offsets_begin; + auto starting_indices = partition.starting_indices_begin; + return static_cast(idx - starting_indices[range_id]) + + range_bounds[range_id]; +} + + } // namespace gko diff --git a/reference/test/distributed/index_map_kernels.cpp b/reference/test/distributed/index_map_kernels.cpp index 72b0a0e523b..6beac5aaec3 100644 --- a/reference/test/distributed/index_map_kernels.cpp +++ b/reference/test/distributed/index_map_kernels.cpp @@ -1,10 +1,9 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause #include "core/distributed/index_map_kernels.hpp" -#include #include #include @@ -36,6 +35,8 @@ class IndexMap : public ::testing::Test { std::shared_ptr ref; std::shared_ptr part = part_type::build_from_mapping(ref, {ref, {0, 0, 1, 1, 2, 2}}, 3); + std::shared_ptr part_large = part_type::build_from_mapping( + ref, {ref, {0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 0, 0, 0}}, 3); }; @@ -195,3 +196,113 @@ TEST_F(IndexMap, CanGetLocalWithCombinedISWithInvalid) gko::array expected(ref, {2, 3, 0, 1, 2, 4, -1, 1}); GKO_ASSERT_ARRAY_EQ(local_ids, expected); } + + +TEST_F(IndexMap, CanGetGlobalWithLocalIS) +{ + gko::array global_ids(ref); + gko::array local_ids(ref, {5, 4, 3, 2, 1, 0, 4}); + auto remote_global_idxs = gko::segmented_array{ref}; + + gko::kernels::reference::index_map::map_to_global( + ref, to_device_const(part_large.get()), + to_device_const(remote_global_idxs), 1, local_ids, + gko::experimental::distributed::index_space::local, global_ids); + + gko::array expected(ref, {14, 13, 12, 5, 4, 3, 13}); + GKO_ASSERT_ARRAY_EQ(global_ids, expected); +} + + +TEST_F(IndexMap, CanGetGlobalWithLocalISWithInvalid) +{ + gko::array global_ids(ref); + gko::array local_ids(ref, {5, 4, 10, 3, 2, 1, 0, 100, 4}); + auto remote_global_idxs = gko::segmented_array{ref}; + + gko::kernels::reference::index_map::map_to_global( + ref, to_device_const(part_large.get()), + to_device_const(remote_global_idxs), 1, local_ids, + gko::experimental::distributed::index_space::local, global_ids); + + auto invalid = gko::invalid_index(); + gko::array expected( + ref, I{14, 13, invalid, 12, 5, 4, 3, invalid, 13}); + GKO_ASSERT_ARRAY_EQ(global_ids, expected); +} + + +TEST_F(IndexMap, CanGetGlobalWithNonLocalIS) +{ + gko::array global_ids(ref); + gko::array local_ids(ref, {5, 4, 3, 2, 1, 0, 4}); + auto remote_global_idxs = + gko::segmented_array::create_from_sizes( + {ref, {0, 1, 2, 17, 16, 15}}, {ref, {2, 4}}); + + gko::kernels::reference::index_map::map_to_global( + ref, to_device_const(part_large.get()), + to_device_const(remote_global_idxs), 1, local_ids, + gko::experimental::distributed::index_space::non_local, global_ids); + + gko::array expected(ref, {15, 16, 17, 2, 1, 0, 16}); + GKO_ASSERT_ARRAY_EQ(global_ids, expected); +} + + +TEST_F(IndexMap, CanGetGlobalWithNonLocalISWithInvalid) +{ + gko::array global_ids(ref); + gko::array local_ids(ref, {5, 4, 10, 3, 2, 1, 0, 100, 4}); + auto remote_global_idxs = + gko::segmented_array::create_from_sizes( + {ref, {0, 1, 2, 17, 16, 15}}, {ref, {2, 4}}); + + gko::kernels::reference::index_map::map_to_global( + ref, to_device_const(part_large.get()), + to_device_const(remote_global_idxs), 1, local_ids, + gko::experimental::distributed::index_space::non_local, global_ids); + + auto invalid = gko::invalid_index(); + gko::array expected( + ref, I{15, 16, invalid, 17, 2, 1, 0, invalid, 16}); + GKO_ASSERT_ARRAY_EQ(global_ids, expected); +} + + +TEST_F(IndexMap, CanGetGlobalWithCombinedIS) +{ + gko::array global_ids(ref); + gko::array local_ids(ref, {2, 5, 6, 10}); + auto remote_global_idxs = + gko::segmented_array::create_from_sizes( + {ref, {0, 1, 2, 17, 16, 15}}, {ref, {2, 4}}); + + gko::kernels::reference::index_map::map_to_global( + ref, to_device_const(part_large.get()), + to_device_const(remote_global_idxs), 1, local_ids, + gko::experimental::distributed::index_space::combined, global_ids); + + gko::array expected(ref, {5, 14, 0, 16}); + GKO_ASSERT_ARRAY_EQ(global_ids, expected); +} + + +TEST_F(IndexMap, CanGetGlobalWithCombinedISWithInvalid) +{ + gko::array global_ids(ref); + gko::array local_ids(ref, {2, 5, 133, 6, 10}); + auto remote_global_idxs = + gko::segmented_array::create_from_sizes( + {ref, {0, 1, 2, 17, 16, 15}}, {ref, {2, 4}}); + + gko::kernels::reference::index_map::map_to_global( + ref, to_device_const(part_large.get()), + to_device_const(remote_global_idxs), 1, local_ids, + gko::experimental::distributed::index_space::combined, global_ids); + + auto invalid = gko::invalid_index(); + gko::array expected( + ref, I{5, 14, invalid, 0, 16}); + GKO_ASSERT_ARRAY_EQ(global_ids, expected); +} diff --git a/test/distributed/index_map_kernels.cpp b/test/distributed/index_map_kernels.cpp index 4fb6f111123..60421f113ee 100644 --- a/test/distributed/index_map_kernels.cpp +++ b/test/distributed/index_map_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -166,6 +166,17 @@ class IndexMap : public CommonTestFixture { return {std::move(exec), std::move(query)}; } + gko::array generate_to_global_query( + std::shared_ptr exec, gko::size_type size, + gko::size_type num_queries) + { + std::uniform_int_distribution dist(0, size - 1); + gko::array query{ref, num_queries}; + std::generate_n(query.get_data(), query.get_size(), + [&] { return dist(engine); }); + return {std::move(exec), std::move(query)}; + } + gko::array generate_complement_idxs( std::shared_ptr exec, const gko::array& idxs) @@ -388,3 +399,127 @@ TEST_F(IndexMap, GetLocalWithCombinedIndexSpaceWithInvalidIndexSameAsRef) GKO_ASSERT_ARRAY_EQ(result, dresult); } + + +TEST_F(IndexMap, GetGlobalWithLocalIndexSpaceSameAsRef) +{ + auto query = generate_to_global_query(ref, local_size, 33); + auto dquery = gko::array(exec, query); + auto result = gko::array(ref); + auto dresult = gko::array(exec); + + gko::kernels::reference::index_map::map_to_global( + ref, to_device_const(part.get()), to_device_const(remote_global_idxs), + this_rank, query, gko::experimental::distributed::index_space::local, + result); + gko::kernels::GKO_DEVICE_NAMESPACE::index_map::map_to_global( + exec, to_device_const(dpart.get()), + to_device_const(dremote_global_idxs), this_rank, dquery, + gko::experimental::distributed::index_space::local, dresult); + + GKO_ASSERT_ARRAY_EQ(result, dresult); +} + + +TEST_F(IndexMap, GetGlobalWithLocalIndexSpaceWithInvalidIndexSameAsRef) +{ + auto query = generate_to_global_query(ref, local_size * 2, 33); + auto dquery = gko::array(exec, query); + auto result = gko::array(ref); + auto dresult = gko::array(exec); + + gko::kernels::reference::index_map::map_to_global( + ref, to_device_const(part.get()), to_device_const(remote_global_idxs), + this_rank, query, gko::experimental::distributed::index_space::local, + result); + gko::kernels::GKO_DEVICE_NAMESPACE::index_map::map_to_global( + exec, to_device_const(dpart.get()), + to_device_const(dremote_global_idxs), this_rank, dquery, + gko::experimental::distributed::index_space::local, dresult); + + GKO_ASSERT_ARRAY_EQ(result, dresult); +} + + +TEST_F(IndexMap, GetGlobalWithNonLocalIndexSpaceSameAsRef) +{ + auto query = + generate_to_global_query(ref, remote_global_idxs.get_size(), 33); + auto dquery = gko::array(exec, query); + auto result = gko::array(ref); + auto dresult = gko::array(exec); + + gko::kernels::reference::index_map::map_to_global( + ref, to_device_const(part.get()), to_device_const(remote_global_idxs), + this_rank, query, + gko::experimental::distributed::index_space::non_local, result); + gko::kernels::GKO_DEVICE_NAMESPACE::index_map::map_to_global( + exec, to_device_const(dpart.get()), + to_device_const(dremote_global_idxs), this_rank, dquery, + gko::experimental::distributed::index_space::non_local, dresult); + + GKO_ASSERT_ARRAY_EQ(result, dresult); +} + + +TEST_F(IndexMap, GetGlobalWithNonLocalIndexSpaceWithInvalidIndexSameAsRef) +{ + auto query = + generate_to_global_query(ref, remote_global_idxs.get_size() * 2, 33); + auto dquery = gko::array(exec, query); + auto result = gko::array(ref); + auto dresult = gko::array(exec); + + gko::kernels::reference::index_map::map_to_global( + ref, to_device_const(part.get()), to_device_const(remote_global_idxs), + this_rank, query, + gko::experimental::distributed::index_space::non_local, result); + gko::kernels::GKO_DEVICE_NAMESPACE::index_map::map_to_global( + exec, to_device_const(dpart.get()), + to_device_const(dremote_global_idxs), this_rank, dquery, + gko::experimental::distributed::index_space::non_local, dresult); + + GKO_ASSERT_ARRAY_EQ(result, dresult); +} + + +TEST_F(IndexMap, GetGlobalWithCombinedIndexSpaceSameAsRef) +{ + auto query = generate_to_global_query( + ref, local_size + remote_global_idxs.get_size(), 33); + auto dquery = gko::array(exec, query); + auto result = gko::array(ref); + auto dresult = gko::array(exec); + + gko::kernels::reference::index_map::map_to_global( + ref, to_device_const(part.get()), to_device_const(remote_global_idxs), + this_rank, query, gko::experimental::distributed::index_space::combined, + result); + gko::kernels::GKO_DEVICE_NAMESPACE::index_map::map_to_global( + exec, to_device_const(dpart.get()), + to_device_const(dremote_global_idxs), this_rank, dquery, + gko::experimental::distributed::index_space::combined, dresult); + + GKO_ASSERT_ARRAY_EQ(result, dresult); +} + + +TEST_F(IndexMap, GetGlobalWithCombinedIndexSpaceWithInvalidIndexSameAsRef) +{ + auto query = generate_to_global_query( + ref, (local_size + remote_global_idxs.get_size()) * 2, 33); + auto dquery = gko::array(exec, query); + auto result = gko::array(ref); + auto dresult = gko::array(exec); + + gko::kernels::reference::index_map::map_to_global( + ref, to_device_const(part.get()), to_device_const(remote_global_idxs), + this_rank, query, gko::experimental::distributed::index_space::combined, + result); + gko::kernels::GKO_DEVICE_NAMESPACE::index_map::map_to_global( + exec, to_device_const(dpart.get()), + to_device_const(dremote_global_idxs), this_rank, dquery, + gko::experimental::distributed::index_space::combined, dresult); + + GKO_ASSERT_ARRAY_EQ(result, dresult); +} From 3338ff6ae6e1172ca2dd6aabdbda57410f2aa7a5 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Mon, 16 Dec 2024 15:09:23 +0100 Subject: [PATCH 05/10] [dist] fix to global mapping --- test/distributed/index_map_kernels.cpp | 136 +++++++++++++++++++++++++ 1 file changed, 136 insertions(+) diff --git a/test/distributed/index_map_kernels.cpp b/test/distributed/index_map_kernels.cpp index 60421f113ee..1973e54651a 100644 --- a/test/distributed/index_map_kernels.cpp +++ b/test/distributed/index_map_kernels.cpp @@ -117,6 +117,13 @@ class IndexMap : public CommonTestFixture { IndexMap() { + std::random_device rd; + std::uniform_int_distribution<> engine_dist( + 0, std::numeric_limits::max()); + auto seed = engine_dist(rd); + std::cout << "seed = " << seed << std::endl; + engine.seed(490729788); + auto connections = generate_connection_idxs(ref, this_rank, part, engine, 11); auto dconnections = gko::array(exec, connections); @@ -523,3 +530,132 @@ TEST_F(IndexMap, GetGlobalWithCombinedIndexSpaceWithInvalidIndexSameAsRef) GKO_ASSERT_ARRAY_EQ(result, dresult); } + + +TEST_F(IndexMap, RoundTripGlobalWithLocalIndexSpace) +{ + auto local_space = gko::array(ref, local_size); + std::iota(local_space.get_data(), local_space.get_data() + local_size, + this_rank * local_size); + auto query = generate_query(ref, local_space, 33); + auto local = gko::array(exec); + auto global = gko::array(exec); + + gko::kernels::GKO_DEVICE_NAMESPACE::index_map::map_to_local( + exec, dpart.get(), dtarget_ids, to_device_const(dremote_global_idxs), + this_rank, query, gko::experimental::distributed::index_space::combined, + local); + gko::kernels::GKO_DEVICE_NAMESPACE::index_map::map_to_global( + exec, to_device_const(dpart.get()), + to_device_const(dremote_global_idxs), this_rank, local, + gko::experimental::distributed::index_space::combined, global); + + GKO_ASSERT_ARRAY_EQ(global, query); +} + + +TEST_F(IndexMap, RoundTripLocalWithLocalIndexSpace) +{ + auto query = generate_to_global_query(ref, local_size, 333); + auto local = gko::array(exec); + auto global = gko::array(exec); + + gko::kernels::GKO_DEVICE_NAMESPACE::index_map::map_to_global( + exec, to_device_const(part.get()), to_device_const(remote_global_idxs), + this_rank, query, gko::experimental::distributed::index_space::combined, + global); + gko::kernels::GKO_DEVICE_NAMESPACE::index_map::map_to_local( + exec, dpart.get(), dtarget_ids, to_device_const(dremote_global_idxs), + this_rank, global, + gko::experimental::distributed::index_space::combined, local); + + GKO_ASSERT_ARRAY_EQ(local, query); +} + + +TEST_F(IndexMap, RoundTripGlobalWithNonLocalIndexSpace) +{ + auto query = generate_query(exec, get_flat_array(remote_global_idxs), 333); + auto local = gko::array(exec); + auto global = gko::array(exec); + + gko::kernels::GKO_DEVICE_NAMESPACE::index_map::map_to_local( + exec, dpart.get(), dtarget_ids, to_device_const(dremote_global_idxs), + this_rank, query, gko::experimental::distributed::index_space::combined, + local); + gko::kernels::GKO_DEVICE_NAMESPACE::index_map::map_to_global( + exec, to_device_const(dpart.get()), + to_device_const(dremote_global_idxs), this_rank, local, + gko::experimental::distributed::index_space::combined, global); + + GKO_ASSERT_ARRAY_EQ(global, query); +} + + +TEST_F(IndexMap, RoundTripLocalWithNonLocalIndexSpace) +{ + auto query = + generate_to_global_query(ref, remote_global_idxs.get_size(), 33); + auto local = gko::array(exec); + auto global = gko::array(exec); + + gko::kernels::GKO_DEVICE_NAMESPACE::index_map::map_to_global( + exec, to_device_const(part.get()), to_device_const(remote_global_idxs), + this_rank, query, gko::experimental::distributed::index_space::combined, + global); + gko::kernels::GKO_DEVICE_NAMESPACE::index_map::map_to_local( + exec, dpart.get(), dtarget_ids, to_device_const(dremote_global_idxs), + this_rank, global, + gko::experimental::distributed::index_space::combined, local); + + GKO_ASSERT_ARRAY_EQ(local, query); +} + + +TEST_F(IndexMap, RoundTripGlobalWithCombinedIndexSpace) +{ + auto local_space = gko::array(ref, local_size); + std::iota(local_space.get_data(), local_space.get_data() + local_size, + this_rank * local_size); + auto combined_space = + combine_arrays(ref, local_space, get_flat_array(remote_global_idxs)); + auto query = generate_query(exec, combined_space, 333); + auto local = gko::array(exec); + auto global = gko::array(exec); + + gko::kernels::GKO_DEVICE_NAMESPACE::index_map::map_to_local( + exec, dpart.get(), dtarget_ids, to_device_const(dremote_global_idxs), + this_rank, query, gko::experimental::distributed::index_space::combined, + local); + gko::kernels::GKO_DEVICE_NAMESPACE::index_map::map_to_global( + exec, to_device_const(dpart.get()), + to_device_const(dremote_global_idxs), this_rank, local, + gko::experimental::distributed::index_space::combined, global); + + GKO_ASSERT_ARRAY_EQ(global, query); +} + + +TEST_F(IndexMap, RoundTripLocalWithCombinedIndexSpace) +{ + auto local_space = gko::array(ref, local_size); + std::iota(local_space.get_data(), local_space.get_data() + local_size, + this_rank * local_size); + auto combined_space = + combine_arrays(ref, local_space, get_flat_array(remote_global_idxs)); + auto query = generate_to_global_query( + exec, local_size + remote_global_idxs.get_size(), 333); + auto local = gko::array(exec); + auto global = gko::array(exec); + + gko::kernels::GKO_DEVICE_NAMESPACE::index_map::map_to_global( + exec, to_device_const(part.get()), to_device_const(remote_global_idxs), + this_rank, query, gko::experimental::distributed::index_space::combined, + global); + gko::kernels::GKO_DEVICE_NAMESPACE::index_map::map_to_local( + exec, dpart.get(), dtarget_ids, to_device_const(dremote_global_idxs), + this_rank, global, + gko::experimental::distributed::index_space::combined, local); + + GKO_ASSERT_ARRAY_EQ(local, query); +} From 6eb8e8dd054c91c15eb4ba17ed52bdec354db752 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Mon, 16 Dec 2024 15:09:41 +0100 Subject: [PATCH 06/10] [dist] add round trip index mapping test --- reference/distributed/partition_helpers.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/reference/distributed/partition_helpers.hpp b/reference/distributed/partition_helpers.hpp index 222d02b6c29..91c59e5188f 100644 --- a/reference/distributed/partition_helpers.hpp +++ b/reference/distributed/partition_helpers.hpp @@ -71,10 +71,10 @@ size_type find_local_range( return local_range_id_hint; } - auto it = std::lower_bound( + auto it = std::upper_bound( local_ranges.begin, local_ranges.end, idx, - [range_starting_indices, local_ranges](const auto rid, const auto idx) { - return range_starting_indices[rid] < idx; + [range_starting_indices](const auto value, const auto rid) { + return value < range_starting_indices[rid]; }); auto local_range_id = std::distance(local_ranges.begin, it) - 1; return local_range_id; From b274373156b3a5b1a85c2fdcac24f4f7c2a42aa4 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Mon, 16 Dec 2024 16:09:33 +0100 Subject: [PATCH 07/10] fixup! [dist] add round trip index mapping test --- test/distributed/index_map_kernels.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/distributed/index_map_kernels.cpp b/test/distributed/index_map_kernels.cpp index 1973e54651a..58bebb72d7b 100644 --- a/test/distributed/index_map_kernels.cpp +++ b/test/distributed/index_map_kernels.cpp @@ -537,7 +537,7 @@ TEST_F(IndexMap, RoundTripGlobalWithLocalIndexSpace) auto local_space = gko::array(ref, local_size); std::iota(local_space.get_data(), local_space.get_data() + local_size, this_rank * local_size); - auto query = generate_query(ref, local_space, 33); + auto query = generate_query(exec, local_space, 33); auto local = gko::array(exec); auto global = gko::array(exec); @@ -556,7 +556,7 @@ TEST_F(IndexMap, RoundTripGlobalWithLocalIndexSpace) TEST_F(IndexMap, RoundTripLocalWithLocalIndexSpace) { - auto query = generate_to_global_query(ref, local_size, 333); + auto query = generate_to_global_query(exec, local_size, 333); auto local = gko::array(exec); auto global = gko::array(exec); @@ -595,7 +595,7 @@ TEST_F(IndexMap, RoundTripGlobalWithNonLocalIndexSpace) TEST_F(IndexMap, RoundTripLocalWithNonLocalIndexSpace) { auto query = - generate_to_global_query(ref, remote_global_idxs.get_size(), 33); + generate_to_global_query(exec, remote_global_idxs.get_size(), 33); auto local = gko::array(exec); auto global = gko::array(exec); From 51b48d5af0e99986b7f116af19d7ffecc2ca51da Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Tue, 17 Dec 2024 12:24:28 +0100 Subject: [PATCH 08/10] fixup! [dist] add round trip index mapping test --- test/distributed/index_map_kernels.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/test/distributed/index_map_kernels.cpp b/test/distributed/index_map_kernels.cpp index 58bebb72d7b..e057fe95abd 100644 --- a/test/distributed/index_map_kernels.cpp +++ b/test/distributed/index_map_kernels.cpp @@ -561,9 +561,9 @@ TEST_F(IndexMap, RoundTripLocalWithLocalIndexSpace) auto global = gko::array(exec); gko::kernels::GKO_DEVICE_NAMESPACE::index_map::map_to_global( - exec, to_device_const(part.get()), to_device_const(remote_global_idxs), - this_rank, query, gko::experimental::distributed::index_space::combined, - global); + exec, to_device_const(dpart.get()), + to_device_const(dremote_global_idxs), this_rank, query, + gko::experimental::distributed::index_space::combined, global); gko::kernels::GKO_DEVICE_NAMESPACE::index_map::map_to_local( exec, dpart.get(), dtarget_ids, to_device_const(dremote_global_idxs), this_rank, global, @@ -600,9 +600,9 @@ TEST_F(IndexMap, RoundTripLocalWithNonLocalIndexSpace) auto global = gko::array(exec); gko::kernels::GKO_DEVICE_NAMESPACE::index_map::map_to_global( - exec, to_device_const(part.get()), to_device_const(remote_global_idxs), - this_rank, query, gko::experimental::distributed::index_space::combined, - global); + exec, to_device_const(dpart.get()), + to_device_const(dremote_global_idxs), this_rank, query, + gko::experimental::distributed::index_space::combined, global); gko::kernels::GKO_DEVICE_NAMESPACE::index_map::map_to_local( exec, dpart.get(), dtarget_ids, to_device_const(dremote_global_idxs), this_rank, global, @@ -649,9 +649,9 @@ TEST_F(IndexMap, RoundTripLocalWithCombinedIndexSpace) auto global = gko::array(exec); gko::kernels::GKO_DEVICE_NAMESPACE::index_map::map_to_global( - exec, to_device_const(part.get()), to_device_const(remote_global_idxs), - this_rank, query, gko::experimental::distributed::index_space::combined, - global); + exec, to_device_const(dpart.get()), + to_device_const(dremote_global_idxs), this_rank, query, + gko::experimental::distributed::index_space::combined, global); gko::kernels::GKO_DEVICE_NAMESPACE::index_map::map_to_local( exec, dpart.get(), dtarget_ids, to_device_const(dremote_global_idxs), this_rank, global, From 6a02bb8125faa4ff7628204e60077556cdd7719c Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Fri, 24 Jan 2025 11:02:19 +0000 Subject: [PATCH 09/10] [test] add assertion macro for segmented arrays Co-authored-by: Pratik Nayak --- core/test/utils/assertions.hpp | 42 +++++++++++ .../test/distributed/partition_kernels.cpp | 73 ++++++++++--------- test/distributed/partition_kernels.cpp | 11 +-- 3 files changed, 82 insertions(+), 44 deletions(-) diff --git a/core/test/utils/assertions.hpp b/core/test/utils/assertions.hpp index 6b1014cdba0..a74cae9daab 100644 --- a/core/test/utils/assertions.hpp +++ b/core/test/utils/assertions.hpp @@ -999,6 +999,38 @@ ::testing::AssertionResult array_equal(const std::string& first_expression, } +template +::testing::AssertionResult segmented_array_equal( + const std::string& first_expression, const std::string& second_expression, + const segmented_array& first, + const segmented_array& second) +{ + auto view_first = + gko::make_const_array_view(first.get_executor(), first.get_size(), + first.get_const_flat_data()) + .copy_to_array(); + auto view_second = + gko::make_const_array_view(second.get_executor(), second.get_size(), + second.get_const_flat_data()) + .copy_to_array(); + + auto buffer_result = array_equal(first_expression, second_expression, + view_first, view_second); + if (buffer_result == ::testing::AssertionFailure()) { + return buffer_result << "Buffers of the segmented arrays mismatch"; + } + + auto offsets_result = + array_equal(first_expression, second_expression, first.get_offsets(), + second.get_offsets()); + if (offsets_result == ::testing::AssertionFailure()) { + return offsets_result << "Offsets of the segmented arrays mismatch"; + } + + return ::testing::AssertionSuccess(); +} + + /** * This is a gtest predicate which checks if one string is contained in another. * @@ -1382,6 +1414,16 @@ T* plain_ptr(T* ptr) } +#define GKO_ASSERT_SEGMENTED_ARRAY_EQ(_array1, _array2) \ + { \ + ASSERT_PRED_FORMAT2(::gko::test::assertions::segmented_array_equal, \ + _array1, _array2); \ + } \ + static_assert(true, \ + "This assert is used to counter the false positive extra " \ + "semi-colon warnings") + + /** * Checks if one substring can be found inside a bigger string * diff --git a/reference/test/distributed/partition_kernels.cpp b/reference/test/distributed/partition_kernels.cpp index 57e7ffd30c9..65475e5403c 100644 --- a/reference/test/distributed/partition_kernels.cpp +++ b/reference/test/distributed/partition_kernels.cpp @@ -33,22 +33,6 @@ void assert_equal_data(const T* data, std::initializer_list reference_data) } -template -void assert_equal_segmented_array(const gko::segmented_array& data, - std::initializer_list buffer, - std::initializer_list offsets) -{ - gko::array buffer_arr(data.get_executor(), buffer); - gko::array offsets_arr(data.get_executor(), offsets); - auto view = gko::make_const_array_view(data.get_executor(), data.get_size(), - data.get_const_flat_data()) - .copy_to_array(); - - GKO_ASSERT_ARRAY_EQ(view, buffer_arr); - GKO_ASSERT_ARRAY_EQ(data.get_offsets(), offsets_arr); -} - - template class Partition : public ::testing::Test { protected: @@ -91,8 +75,11 @@ TYPED_TEST(Partition, BuildsFromMapping) assert_equal_data(partition->get_range_starting_indices(), {0, 0, 0, 2, 1, 2, 3, 3, 3, 4}); assert_equal_data(partition->get_part_sizes(), {5, 6, 5}); - assert_equal_segmented_array(partition->get_ranges_by_part(), - {1, 4, 6, 9, 2, 5, 7, 0, 3, 8}, {0, 4, 7, 10}); + GKO_ASSERT_SEGMENTED_ARRAY_EQ( + partition->get_ranges_by_part(), + gko::segmented_array::create_from_offsets( + {this->ref, {1, 4, 6, 9, 2, 5, 7, 0, 3, 8}}, + {this->ref, {0, 4, 7, 10}})); } @@ -118,9 +105,11 @@ TYPED_TEST(Partition, BuildsFromMappingWithEmptyParts) assert_equal_data(partition->get_range_starting_indices(), {0, 0, 0, 2, 1, 2, 3, 3, 3, 4}); assert_equal_data(partition->get_part_sizes(), {5, 6, 0, 5, 0}); - assert_equal_segmented_array(partition->get_ranges_by_part(), - {1, 4, 6, 9, 2, 5, 7, 0, 3, 8}, - {0, 4, 7, 7, 10, 10}); + GKO_ASSERT_SEGMENTED_ARRAY_EQ( + partition->get_ranges_by_part(), + gko::segmented_array::create_from_offsets( + {this->ref, {1, 4, 6, 9, 2, 5, 7, 0, 3, 8}}, + {this->ref, {0, 4, 7, 7, 10, 10}})); } @@ -140,8 +129,10 @@ TYPED_TEST(Partition, BuildsFromRanges) assert_equal_data(partition->get_part_ids(), {0, 1, 2, 3, 4}); assert_equal_data(partition->get_range_starting_indices(), {0, 0, 0, 0, 0}); assert_equal_data(partition->get_part_sizes(), {5, 0, 2, 2, 1}); - assert_equal_segmented_array(partition->get_ranges_by_part(), - {0, 1, 2, 3, 4}, {0, 1, 2, 3, 4, 5}); + GKO_ASSERT_SEGMENTED_ARRAY_EQ( + partition->get_ranges_by_part(), + gko::segmented_array::create_from_offsets( + {this->ref, {0, 1, 2, 3, 4}}, {this->ref, {0, 1, 2, 3, 4, 5}})); } @@ -158,8 +149,10 @@ TYPED_TEST(Partition, BuildsFromRangeWithSingleElement) EXPECT_EQ(partition->get_num_parts(), 0); EXPECT_EQ(partition->get_num_empty_parts(), 0); assert_equal_data(partition->get_range_bounds(), {0}); - assert_equal_segmented_array(partition->get_ranges_by_part(), I{}, - {0}); + GKO_ASSERT_SEGMENTED_ARRAY_EQ( + partition->get_ranges_by_part(), + gko::segmented_array::create_from_offsets( + {this->ref, {0}})); } @@ -181,8 +174,10 @@ TYPED_TEST(Partition, BuildsFromRangesWithPartIds) assert_equal_data(partition->get_part_ids(), {0, 4, 3, 1, 2}); assert_equal_data(partition->get_range_starting_indices(), {0, 0, 0, 0, 0}); assert_equal_data(partition->get_part_sizes(), {5, 2, 1, 2, 0}); - assert_equal_segmented_array(partition->get_ranges_by_part(), - {0, 3, 4, 2, 1}, {0, 1, 2, 3, 4, 5}); + GKO_ASSERT_SEGMENTED_ARRAY_EQ( + partition->get_ranges_by_part(), + gko::segmented_array::create_from_offsets( + {this->ref, {0, 3, 4, 2, 1}}, {this->ref, {0, 1, 2, 3, 4, 5}})); } @@ -201,8 +196,10 @@ TYPED_TEST(Partition, BuildsFromGlobalSize) assert_equal_data(partition->get_part_ids(), {0, 1, 2, 3, 4}); assert_equal_data(partition->get_range_starting_indices(), {0, 0, 0, 0, 0}); assert_equal_data(partition->get_part_sizes(), {3, 3, 3, 2, 2}); - assert_equal_segmented_array(partition->get_ranges_by_part(), - {0, 1, 2, 3, 4}, {0, 1, 2, 3, 4, 5}); + GKO_ASSERT_SEGMENTED_ARRAY_EQ( + partition->get_ranges_by_part(), + gko::segmented_array::create_from_offsets( + {this->ref, {0, 1, 2, 3, 4}}, {this->ref, {0, 1, 2, 3, 4, 5}})); } @@ -220,8 +217,10 @@ TYPED_TEST(Partition, BuildsFromGlobalSizeEmptySize) assert_equal_data(partition->get_part_ids(), {0, 1, 2, 3, 4}); assert_equal_data(partition->get_range_starting_indices(), {0, 0, 0, 0, 0}); assert_equal_data(partition->get_part_sizes(), {0, 0, 0, 0, 0}); - assert_equal_segmented_array(partition->get_ranges_by_part(), - {0, 1, 2, 3, 4}, {0, 1, 2, 3, 4, 5}); + GKO_ASSERT_SEGMENTED_ARRAY_EQ( + partition->get_ranges_by_part(), + gko::segmented_array::create_from_offsets( + {this->ref, {0, 1, 2, 3, 4}}, {this->ref, {0, 1, 2, 3, 4, 5}})); } @@ -239,8 +238,10 @@ TYPED_TEST(Partition, BuildsFromGlobalSizeWithEmptyParts) assert_equal_data(partition->get_part_ids(), {0, 1, 2, 3, 4}); assert_equal_data(partition->get_range_starting_indices(), {0, 0, 0, 0, 0}); assert_equal_data(partition->get_part_sizes(), {1, 1, 1, 0, 0}); - assert_equal_segmented_array(partition->get_ranges_by_part(), - {0, 1, 2, 3, 4}, {0, 1, 2, 3, 4, 5}); + GKO_ASSERT_SEGMENTED_ARRAY_EQ( + partition->get_ranges_by_part(), + gko::segmented_array::create_from_offsets( + {this->ref, {0, 1, 2, 3, 4}}, {this->ref, {0, 1, 2, 3, 4, 5}})); } @@ -258,8 +259,10 @@ TYPED_TEST(Partition, BuildsFromGlobalSizeWithZeroParts) ASSERT_EQ(partition->get_part_ids(), nullptr); ASSERT_EQ(partition->get_range_starting_indices(), nullptr); ASSERT_EQ(partition->get_part_sizes(), nullptr); - assert_equal_segmented_array(partition->get_ranges_by_part(), I{}, - {0}); + GKO_ASSERT_SEGMENTED_ARRAY_EQ( + partition->get_ranges_by_part(), + gko::segmented_array::create_from_offsets( + {this->ref, {0}})); } diff --git a/test/distributed/partition_kernels.cpp b/test/distributed/partition_kernels.cpp index 343b43525f3..093cbb8f58c 100644 --- a/test/distributed/partition_kernels.cpp +++ b/test/distributed/partition_kernels.cpp @@ -70,15 +70,8 @@ class Partition : public CommonTestFixture { this->exec, dpart->get_num_parts(), const_cast(dpart->get_part_sizes()))); - GKO_ASSERT_ARRAY_EQ( - gko::make_const_array_view( - this->ref, part->get_num_ranges(), - part->get_ranges_by_part().get_const_flat_data()), - gko::make_const_array_view( - this->exec, dpart->get_num_ranges(), - dpart->get_ranges_by_part().get_const_flat_data())); - GKO_ASSERT_ARRAY_EQ(part->get_ranges_by_part().get_offsets(), - dpart->get_ranges_by_part().get_offsets()) + GKO_ASSERT_SEGMENTED_ARRAY_EQ(part->get_ranges_by_part(), + dpart->get_ranges_by_part()); } std::default_random_engine rand_engine; From 7863f01f5bd7a4c564ec5fec2b015646ead86e36 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Fri, 24 Jan 2025 11:03:04 +0000 Subject: [PATCH 10/10] [dist] review updates: - reduce tests - update docs - minor refactoring Co-authored-by: Yu-Hsiang M. Tsai Co-authored-by: Pratik Nayak --- core/distributed/device_partition.hpp | 35 +-------- core/distributed/index_map.cpp | 4 +- core/distributed/index_map_kernels.hpp | 6 +- reference/distributed/partition_helpers.hpp | 2 + reference/distributed/partition_kernels.cpp | 3 +- .../test/distributed/index_map_kernels.cpp | 77 +++++++++---------- test/distributed/index_map_kernels.cpp | 2 - 7 files changed, 47 insertions(+), 82 deletions(-) diff --git a/core/distributed/device_partition.hpp b/core/distributed/device_partition.hpp index 86481eaad43..e0cac01a055 100644 --- a/core/distributed/device_partition.hpp +++ b/core/distributed/device_partition.hpp @@ -2,8 +2,7 @@ // // SPDX-License-Identifier: BSD-3-Clause -#ifndef GINKGO_PARTITION_HPP -#define GINKGO_PARTITION_HPP +#pragma once #include @@ -34,34 +33,7 @@ struct device_partition { /** - * Create device_segmented_array from a segmented_array. - */ -template -constexpr device_partition -to_device( - const experimental::distributed::Partition* - partition) -{ - auto num_ranges = partition->get_num_ranges(); - auto num_parts = partition->get_num_parts(); - return {num_parts, - partition->get_num_empty_parts(), - partition->get_size(), - partition->get_range_bounds(), - partition->get_range_bounds() + num_ranges + 1, - partition->get_range_starting_indices(), - partition->get_range_starting_indices() + num_ranges, - partition->get_part_sizes(), - partition->get_part_sizes() + num_parts, - partition->get_part_ids(), - partition->get_part_ids() + num_parts, - to_device(partition->get_ranges_by_part())}; -} - -/** - * Explicitly create a const version of device_segmented_array. - * - * This is mostly relevant for tests. + * Explicitly create a const version of device_partition. */ template constexpr device_partition @@ -87,6 +59,3 @@ to_device_const( } // namespace gko - - -#endif // GINKGO_PARTITION_HPP diff --git a/core/distributed/index_map.cpp b/core/distributed/index_map.cpp index b8750a88418..e06c57c40a0 100644 --- a/core/distributed/index_map.cpp +++ b/core/distributed/index_map.cpp @@ -98,8 +98,8 @@ index_map::map_to_global( array global_ids(exec_); exec_->run(index_map_kernels::make_map_to_global( - to_device(partition_.get()), to_device(remote_global_idxs_), rank_, - local_ids, index_space_v, global_ids)); + to_device_const(partition_.get()), to_device(remote_global_idxs_), + rank_, local_ids, index_space_v, global_ids)); return global_ids; } diff --git a/core/distributed/index_map_kernels.hpp b/core/distributed/index_map_kernels.hpp index ccc2e5661d3..c1d3be561f8 100644 --- a/core/distributed/index_map_kernels.hpp +++ b/core/distributed/index_map_kernels.hpp @@ -54,11 +54,11 @@ namespace kernels { * space defined by is. The resulting indices are stored in local_ids. * The index map is defined by the input parameters: * - * - partition: the global partition + * - partition: the global partition * - remote_target_ids: the owning part ids of each segment of * remote_global_idxs * - remote_global_idxs: the remote global indices, segmented by the owning part - * ids + * ids, and each segment sorted * - rank: the part id of this process * * Any global index that is not in the specified local index space is mapped @@ -81,7 +81,7 @@ namespace kernels { * * The relevant input parameter from the index map are: * - * - partition: the global partition + * - partition: the global partition * - remote_global_idxs: the remote global indices, segmented by the owning part * ids * - rank: the part id of this process diff --git a/reference/distributed/partition_helpers.hpp b/reference/distributed/partition_helpers.hpp index 91c59e5188f..8dd21d1cc68 100644 --- a/reference/distributed/partition_helpers.hpp +++ b/reference/distributed/partition_helpers.hpp @@ -87,6 +87,8 @@ GlobalIndexType map_to_global( device_partition partition, size_type range_id) { + assert(range_id < + std::distance(partition.offsets_begin, partition.offsets_end) - 1); auto range_bounds = partition.offsets_begin; auto starting_indices = partition.starting_indices_begin; return static_cast(idx - starting_indices[range_id]) + diff --git a/reference/distributed/partition_kernels.cpp b/reference/distributed/partition_kernels.cpp index 6bf669737bb..0ba8c325797 100644 --- a/reference/distributed/partition_kernels.cpp +++ b/reference/distributed/partition_kernels.cpp @@ -5,6 +5,7 @@ #include "core/distributed/partition_kernels.hpp" #include "core/base/segmented_array.hpp" +#include "ginkgo/core/base/math.hpp" namespace gko { @@ -128,7 +129,7 @@ void build_ranges_by_part(std::shared_ptr exec, }); sizes.resize_and_reset(num_parts); - std::fill_n(sizes.get_data(), num_parts, int64(0)); + std::fill_n(sizes.get_data(), num_parts, zero()); for (size_type i = 0; i < num_ranges; ++i) { sizes.get_data()[range_parts[i]]++; } diff --git a/reference/test/distributed/index_map_kernels.cpp b/reference/test/distributed/index_map_kernels.cpp index 6beac5aaec3..525bf90b5de 100644 --- a/reference/test/distributed/index_map_kernels.cpp +++ b/reference/test/distributed/index_map_kernels.cpp @@ -198,22 +198,6 @@ TEST_F(IndexMap, CanGetLocalWithCombinedISWithInvalid) } -TEST_F(IndexMap, CanGetGlobalWithLocalIS) -{ - gko::array global_ids(ref); - gko::array local_ids(ref, {5, 4, 3, 2, 1, 0, 4}); - auto remote_global_idxs = gko::segmented_array{ref}; - - gko::kernels::reference::index_map::map_to_global( - ref, to_device_const(part_large.get()), - to_device_const(remote_global_idxs), 1, local_ids, - gko::experimental::distributed::index_space::local, global_ids); - - gko::array expected(ref, {14, 13, 12, 5, 4, 3, 13}); - GKO_ASSERT_ARRAY_EQ(global_ids, expected); -} - - TEST_F(IndexMap, CanGetGlobalWithLocalISWithInvalid) { gko::array global_ids(ref); @@ -222,87 +206,98 @@ TEST_F(IndexMap, CanGetGlobalWithLocalISWithInvalid) gko::kernels::reference::index_map::map_to_global( ref, to_device_const(part_large.get()), - to_device_const(remote_global_idxs), 1, local_ids, + to_device_const(remote_global_idxs), 0, local_ids, gko::experimental::distributed::index_space::local, global_ids); auto invalid = gko::invalid_index(); gko::array expected( - ref, I{14, 13, invalid, 12, 5, 4, 3, invalid, 13}); + ref, I{17, 16, invalid, 15, 2, 1, 0, invalid, 16}); GKO_ASSERT_ARRAY_EQ(global_ids, expected); } -TEST_F(IndexMap, CanGetGlobalWithNonLocalIS) +TEST_F(IndexMap, CanGetGlobalWithNonLocalISWithInvalid) { gko::array global_ids(ref); - gko::array local_ids(ref, {5, 4, 3, 2, 1, 0, 4}); + gko::array local_ids(ref, {5, 4, 10, 3, 2, 1, 0, 100, 4}); auto remote_global_idxs = gko::segmented_array::create_from_sizes( - {ref, {0, 1, 2, 17, 16, 15}}, {ref, {2, 4}}); + {ref, {0, 1, 9, 8, 7, 6}}, {ref, {2, 4}}); gko::kernels::reference::index_map::map_to_global( ref, to_device_const(part_large.get()), to_device_const(remote_global_idxs), 1, local_ids, gko::experimental::distributed::index_space::non_local, global_ids); - gko::array expected(ref, {15, 16, 17, 2, 1, 0, 16}); + auto invalid = gko::invalid_index(); + gko::array expected( + ref, I{6, 7, invalid, 8, 9, 1, 0, invalid, 7}); GKO_ASSERT_ARRAY_EQ(global_ids, expected); } -TEST_F(IndexMap, CanGetGlobalWithNonLocalISWithInvalid) +TEST_F(IndexMap, CanGetGlobalWithCombinedISWithInvalid) { gko::array global_ids(ref); - gko::array local_ids(ref, {5, 4, 10, 3, 2, 1, 0, 100, 4}); + gko::array local_ids(ref, {2, 5, 133, 6, 10}); auto remote_global_idxs = gko::segmented_array::create_from_sizes( - {ref, {0, 1, 2, 17, 16, 15}}, {ref, {2, 4}}); + {ref, {0, 1, 9, 8, 7, 6}}, {ref, {2, 4}}); gko::kernels::reference::index_map::map_to_global( ref, to_device_const(part_large.get()), to_device_const(remote_global_idxs), 1, local_ids, - gko::experimental::distributed::index_space::non_local, global_ids); + gko::experimental::distributed::index_space::combined, global_ids); auto invalid = gko::invalid_index(); gko::array expected( - ref, I{15, 16, invalid, 17, 2, 1, 0, invalid, 16}); + ref, I{5, 14, invalid, 0, 7}); GKO_ASSERT_ARRAY_EQ(global_ids, expected); } -TEST_F(IndexMap, CanGetGlobalWithCombinedIS) +TEST_F(IndexMap, RoundTripGlobalWithCombinedIS) { - gko::array global_ids(ref); - gko::array local_ids(ref, {2, 5, 6, 10}); + gko::array result(ref); + gko::array global_ids(ref, + {5, 14, 14, 0, 3, 4, 8, 7, 12}); + gko::array local_ids(ref); auto remote_global_idxs = gko::segmented_array::create_from_sizes( - {ref, {0, 1, 2, 17, 16, 15}}, {ref, {2, 4}}); + {ref, {0, 1, 6, 7, 8, 9}}, {ref, {2, 4}}); + gko::array remote_target_ids(ref, {0, 2}); + gko::kernels::reference::index_map::map_to_local( + ref, part_large.get(), remote_target_ids, + to_device_const(remote_global_idxs), 1, global_ids, + gko::experimental::distributed::index_space::combined, local_ids); gko::kernels::reference::index_map::map_to_global( ref, to_device_const(part_large.get()), to_device_const(remote_global_idxs), 1, local_ids, - gko::experimental::distributed::index_space::combined, global_ids); + gko::experimental::distributed::index_space::combined, result); - gko::array expected(ref, {5, 14, 0, 16}); - GKO_ASSERT_ARRAY_EQ(global_ids, expected); + GKO_ASSERT_ARRAY_EQ(result, global_ids); } -TEST_F(IndexMap, CanGetGlobalWithCombinedISWithInvalid) +TEST_F(IndexMap, RoundTripLocalWithCombinedIS) { + gko::array result(ref); gko::array global_ids(ref); - gko::array local_ids(ref, {2, 5, 133, 6, 10}); + gko::array local_ids(ref, {2, 5, 4, 6, 3, 3, 10}); auto remote_global_idxs = gko::segmented_array::create_from_sizes( - {ref, {0, 1, 2, 17, 16, 15}}, {ref, {2, 4}}); + {ref, {0, 1, 6, 7, 8, 9}}, {ref, {2, 4}}); + gko::array remote_target_ids(ref, {0, 2}); gko::kernels::reference::index_map::map_to_global( ref, to_device_const(part_large.get()), to_device_const(remote_global_idxs), 1, local_ids, gko::experimental::distributed::index_space::combined, global_ids); + gko::kernels::reference::index_map::map_to_local( + ref, part_large.get(), remote_target_ids, + to_device_const(remote_global_idxs), 1, global_ids, + gko::experimental::distributed::index_space::combined, result); - auto invalid = gko::invalid_index(); - gko::array expected( - ref, I{5, 14, invalid, 0, 16}); - GKO_ASSERT_ARRAY_EQ(global_ids, expected); + GKO_ASSERT_ARRAY_EQ(result, local_ids); } diff --git a/test/distributed/index_map_kernels.cpp b/test/distributed/index_map_kernels.cpp index e057fe95abd..1b266fbafd0 100644 --- a/test/distributed/index_map_kernels.cpp +++ b/test/distributed/index_map_kernels.cpp @@ -120,8 +120,6 @@ class IndexMap : public CommonTestFixture { std::random_device rd; std::uniform_int_distribution<> engine_dist( 0, std::numeric_limits::max()); - auto seed = engine_dist(rd); - std::cout << "seed = " << seed << std::endl; engine.seed(490729788); auto connections =