Skip to content

Commit 11c4600

Browse files
oleksandr-pavlykbernhardmgruber
authored andcommitted
Use cuda/iterator in cub/test
This PR replaces thrust::constant_iterator with cuda::constant_iterator. Same for counting_iterator and transform_iterator. Replacing thrust::zip_iterator with cuda::zip_iterator was not always successful, see #6400 Replacing thrust::make_permutation_iterator with cuda::make_permutation_iterator also ran into compilation errors.
1 parent 82d70a7 commit 11c4600

File tree

55 files changed

+361
-384
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+361
-384
lines changed

cub/test/catch2_large_array_sort_helper.cuh

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,12 @@
77

88
#include <thrust/fill.h>
99
#include <thrust/functional.h>
10-
#include <thrust/iterator/counting_iterator.h>
11-
#include <thrust/iterator/transform_iterator.h>
1210
#include <thrust/memory.h>
1311
#include <thrust/random.h>
1412
#include <thrust/shuffle.h>
1513
#include <thrust/tabulate.h>
1614

15+
#include <cuda/iterator>
1716
#include <cuda/std/iterator>
1817
#include <cuda/std/limits>
1918
#include <cuda/std/type_traits>
@@ -184,9 +183,8 @@ struct large_array_sort_helper
184183
TIME(timer.print_elapsed_seconds_and_reset("Device Alloc"));
185184

186185
{ // Place the sorted keys into keys_out
187-
auto key_iter = thrust::make_transform_iterator(
188-
thrust::make_counting_iterator(std::size_t{0}),
189-
detail::key_sort_ref_key_transform<KeyType>(num_items, is_descending));
186+
auto key_iter = cuda::make_transform_iterator(
187+
cuda::counting_iterator(std::size_t{0}), detail::key_sort_ref_key_transform<KeyType>(num_items, is_descending));
190188
thrust::copy(c2h::device_policy, key_iter, key_iter + num_items, keys_out.begin());
191189
}
192190

@@ -212,9 +210,8 @@ struct large_array_sort_helper
212210
void verify_unstable_key_sort(std::size_t num_items, bool is_descending, const c2h::device_vector<KeyType>& keys)
213211
{
214212
TIME(c2h::cpu_timer timer);
215-
auto key_iter = thrust::make_transform_iterator(
216-
thrust::make_counting_iterator(std::size_t{0}),
217-
detail::key_sort_ref_key_transform<KeyType>{num_items, is_descending});
213+
auto key_iter = cuda::make_transform_iterator(
214+
cuda::counting_iterator(std::size_t{0}), detail::key_sort_ref_key_transform<KeyType>{num_items, is_descending});
218215
REQUIRE(thrust::equal(c2h::device_policy, keys.cbegin(), keys.cend(), key_iter));
219216
TIME(timer.print_elapsed_seconds_and_reset("Validate keys"));
220217
}
@@ -352,8 +349,8 @@ struct large_array_sort_helper
352349

353350
TIME(c2h::cpu_timer timer);
354351

355-
auto ref_key_begin = thrust::make_transform_iterator(
356-
thrust::make_counting_iterator(std::size_t{0}),
352+
auto ref_key_begin = cuda::make_transform_iterator(
353+
cuda::counting_iterator(std::size_t{0}),
357354
detail::pair_sort_ref_key_transform<KeyType>(num_items, num_summaries, is_descending));
358355

359356
REQUIRE(thrust::equal(c2h::device_policy, keys.cbegin(), keys.cend(), ref_key_begin));

cub/test/catch2_large_problem_helper.cuh

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,6 @@
66
#include <cub/util_type.cuh>
77

88
#include <thrust/equal.h>
9-
#include <thrust/iterator/constant_iterator.h>
10-
#include <thrust/iterator/counting_iterator.h>
11-
#include <thrust/iterator/transform_iterator.h>
129

1310
#include <cuda/iterator>
1411
#include <cuda/std/__algorithm/clamp.h>
@@ -43,8 +40,8 @@ struct concat_iterators_op
4340
template <typename FirstSegmentItT, typename SecondSegmentItT>
4441
auto make_concat_iterators_op(FirstSegmentItT first_it, SecondSegmentItT second_it, ::cuda::std::int64_t num_first_items)
4542
{
46-
return thrust::make_transform_iterator(
47-
thrust::make_counting_iterator(::cuda::std::int64_t{0}),
43+
return cuda::make_transform_iterator(
44+
cuda::counting_iterator(::cuda::std::int64_t{0}),
4845
concat_iterators_op<FirstSegmentItT, SecondSegmentItT>{first_it, second_it, num_first_items});
4946
}
5047

@@ -108,7 +105,7 @@ struct large_problem_test_helper
108105
{
109106
auto correctness_flags_end = correctness_flags.cbegin() + (num_elements / bits_per_element);
110107
const bool all_correct =
111-
thrust::equal(correctness_flags.cbegin(), correctness_flags_end, thrust::make_constant_iterator(0xFFFFFFFFU));
108+
thrust::equal(correctness_flags.cbegin(), correctness_flags_end, cuda::constant_iterator(0xFFFFFFFFU));
112109

113110
if (!all_correct)
114111
{

cub/test/catch2_segmented_sort_helper.cuh

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,14 @@
66

77
#include <thrust/device_ptr.h>
88
#include <thrust/for_each.h>
9-
#include <thrust/iterator/constant_iterator.h>
10-
#include <thrust/iterator/counting_iterator.h>
119
#include <thrust/logical.h>
1210
#include <thrust/random.h>
1311
#include <thrust/scan.h>
1412
#include <thrust/sequence.h>
1513
#include <thrust/sort.h>
1614
#include <thrust/unique.h>
1715

16+
#include <cuda/iterator>
1817
#include <cuda/std/limits>
1918
#include <cuda/std/tuple>
2019
#include <cuda/std/type_traits>
@@ -115,7 +114,7 @@ public:
115114
REQUIRE(count <= max_histo_size);
116115

117116
// Verify keys are sorted using prior histogram computation
118-
auto index_it = thrust::make_counting_iterator(std::size_t{0});
117+
auto index_it = cuda::counting_iterator(std::size_t{0});
119118
c2h::device_vector<key_t> unique_keys_out(count);
120119
c2h::device_vector<std::size_t> unique_indexes_out(count);
121120
thrust::unique_by_key_copy(
@@ -180,8 +179,7 @@ public:
180179

181180
void prepare_input_data(c2h::device_vector<key_t>& in_keys) const
182181
{
183-
auto data_gen_it =
184-
thrust::make_transform_iterator(thrust::make_counting_iterator(std::size_t{0}), mod_n<key_t>{sequence_length});
182+
auto data_gen_it = cuda::transform_iterator(cuda::counting_iterator(std::size_t{0}), mod_n<key_t>{sequence_length});
185183
thrust::copy_n(data_gen_it, in_keys.size(), in_keys.begin());
186184
}
187185

@@ -197,7 +195,7 @@ public:
197195
REQUIRE(count <= sequence_length * num_segments);
198196

199197
// // Verify keys are sorted using prior histogram computation
200-
auto index_it = thrust::make_counting_iterator(std::size_t{0});
198+
auto index_it = cuda::counting_iterator(std::size_t{0});
201199
c2h::device_vector<key_t> unique_keys_out(count);
202200
c2h::device_vector<std::size_t> unique_indexes_out(count);
203201
thrust::unique_by_key_copy(
@@ -510,15 +508,15 @@ void generate_unsorted_derived_inputs(
510508

511509
// Build keys in reversed order from how they'll eventually be sorted:
512510
thrust::for_each(c2h::nosync_device_policy,
513-
thrust::make_counting_iterator(0),
514-
thrust::make_counting_iterator(num_segments),
511+
cuda::counting_iterator(0),
512+
cuda::counting_iterator(num_segments),
515513
segment_filler<KeyT>{keys, offsets, !descending_sort});
516514
if constexpr (sort_pairs)
517515
{
518516
// Values are generated in reversed order from keys:
519517
thrust::for_each(c2h::nosync_device_policy,
520-
thrust::make_counting_iterator(0),
521-
thrust::make_counting_iterator(num_segments),
518+
cuda::counting_iterator(0),
519+
cuda::counting_iterator(num_segments),
522520
segment_filler<ValueT>{values, offsets, descending_sort});
523521
}
524522

@@ -544,8 +542,8 @@ void validate_sorted_derived_outputs(
544542
const int* offsets = thrust::raw_pointer_cast(d_offsets.data());
545543

546544
REQUIRE(thrust::all_of(c2h::device_policy,
547-
thrust::make_counting_iterator(0),
548-
thrust::make_counting_iterator(num_segments),
545+
cuda::counting_iterator(0),
546+
cuda::counting_iterator(num_segments),
549547
segment_checker<KeyT, ValueT, STABLE>{keys, values, offsets, descending_sort}));
550548
}
551549

@@ -734,8 +732,8 @@ void validate_sorted_random_outputs(
734732

735733
REQUIRE(thrust::all_of(
736734
c2h::device_policy,
737-
thrust::make_counting_iterator(0),
738-
thrust::make_counting_iterator(num_segments),
735+
cuda::counting_iterator(0),
736+
cuda::counting_iterator(num_segments),
739737
unstable_segmented_value_checker<KeyT, ValueT>{
740738
ref_keys, ref_values, test_values, d_segment_begin, d_segment_end}));
741739
}
@@ -1642,7 +1640,7 @@ inline int generate_unspecified_segments_offsets(
16421640
// calculation below.
16431641
c2h::gen(make_offset_eraser_seed(seed), erase_indices, 1, num_segments - 2);
16441642

1645-
auto const_zero_begin = thrust::make_constant_iterator<int>(0);
1643+
auto const_zero_begin = cuda::constant_iterator<int>(0);
16461644
auto const_zero_end = const_zero_begin + erase_indices.size();
16471645

16481646
thrust::scatter(

cub/test/catch2_test_block_run_length_decode.cu

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,7 @@
77
#include <cub/device/device_scan.cuh>
88
#include <cub/util_allocator.cuh>
99

10-
#include <thrust/iterator/counting_iterator.h>
11-
#include <thrust/iterator/transform_iterator.h>
12-
10+
#include <cuda/iterator>
1311
#include <cuda/std/type_traits>
1412

1513
#include <c2h/catch2_test_helper.h>
@@ -294,6 +292,16 @@ struct ModOp
294292
}
295293
};
296294

295+
template <typename TargetT>
296+
struct CastOp
297+
{
298+
template <typename U>
299+
__host__ __device__ __forceinline__ TargetT operator()(const U& x) const
300+
{
301+
return static_cast<TargetT>(x);
302+
}
303+
};
304+
297305
template <uint32_t RUNS_PER_THREAD,
298306
uint32_t DECODED_ITEMS_PER_THREAD,
299307
uint32_t BLOCK_DIM_X,
@@ -308,11 +316,12 @@ void TestAlgorithmSpecialisation()
308316

309317
using RunItemT = float;
310318
using RunLengthT = uint32_t;
311-
using ItemItT = thrust::counting_iterator<RunItemT>;
312-
using RunLengthsItT = thrust::transform_iterator<ModOp, thrust::counting_iterator<RunLengthT>>;
319+
using CasterOp = CastOp<RunItemT>;
320+
using ItemItT = cuda::transform_iterator<CasterOp, cuda::counting_iterator<uint64_t>>;
321+
using RunLengthsItT = cuda::transform_iterator<ModOp, cuda::counting_iterator<RunLengthT>>;
313322

314-
ItemItT d_unique_items(1000U);
315-
RunLengthsItT d_run_lengths(thrust::counting_iterator<RunLengthT>(0), ModOp{});
323+
ItemItT d_unique_items(cuda::counting_iterator<uint64_t>(1000U), CasterOp{});
324+
RunLengthsItT d_run_lengths(cuda::counting_iterator<RunLengthT>(0), ModOp{});
316325

317326
constexpr uint32_t num_runs = 10000;
318327
constexpr uint32_t num_blocks = (num_runs + (RUNS_PER_BLOCK - 1U)) / RUNS_PER_BLOCK;

cub/test/catch2_test_device_adjacent_difference_substract_left.cu

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
#include <cub/device/device_adjacent_difference.cuh>
77

8-
#include <thrust/iterator/discard_iterator.h>
8+
#include <cuda/iterator>
99

1010
#include <algorithm>
1111
#include <numeric>
@@ -59,7 +59,7 @@ C2H_TEST("DeviceAdjacentDifference::SubtractLeftCopy does not change the input",
5959
c2h::gen(C2H_SEED(2), in);
6060

6161
c2h::device_vector<type> reference = in;
62-
adjacent_difference_subtract_left_copy(in.begin(), thrust::discard_iterator<>(), num_items, cuda::std::minus<>{});
62+
adjacent_difference_subtract_left_copy(in.begin(), cuda::discard_iterator(), num_items, cuda::std::minus<>{});
6363

6464
REQUIRE(reference == in);
6565
}
@@ -252,7 +252,7 @@ C2H_TEST("DeviceAdjacentDifference::SubtractLeftCopy works with large indexes",
252252
c2h::device_vector<int> error(1);
253253
int* d_error = thrust::raw_pointer_cast(error.data());
254254
adjacent_difference_subtract_left_copy(
255-
thrust::counting_iterator<cuda::std::size_t>{0}, thrust::discard_iterator<>{}, num_items, check_difference{d_error});
255+
cuda::counting_iterator<cuda::std::size_t>{0}, cuda::discard_iterator{}, num_items, check_difference{d_error});
256256
const int h_error = error[0];
257257
REQUIRE(h_error == 0);
258258
}
@@ -280,8 +280,8 @@ C2H_TEST("DeviceAdjacentDifference::SubtractLeftCopy uses right number of invoca
280280
const int num_items = GENERATE_COPY(take(2, random(1, 1000000)));
281281
c2h::device_vector<unsigned long long> counts(1, 0);
282282
adjacent_difference_subtract_left_copy(
283-
thrust::counting_iterator<cuda::std::size_t>{0},
284-
thrust::discard_iterator<>(),
283+
cuda::counting_iterator<cuda::std::size_t>{0},
284+
cuda::discard_iterator(),
285285
num_items,
286286
invocation_counter{thrust::raw_pointer_cast(counts.data())});
287287

cub/test/catch2_test_device_adjacent_difference_substract_right.cu

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
#include <cub/device/device_adjacent_difference.cuh>
77

8-
#include <thrust/iterator/discard_iterator.h>
8+
#include <cuda/iterator>
99

1010
#include <algorithm>
1111
#include <numeric>
@@ -59,7 +59,7 @@ C2H_TEST("DeviceAdjacentDifference::SubtractRightCopy does not change the input"
5959
c2h::gen(C2H_SEED(2), in);
6060

6161
c2h::device_vector<type> reference = in;
62-
adjacent_difference_subtract_right_copy(in.begin(), thrust::discard_iterator<>(), num_items, cuda::std::minus<>{});
62+
adjacent_difference_subtract_right_copy(in.begin(), cuda::discard_iterator(), num_items, cuda::std::minus<>{});
6363

6464
REQUIRE(reference == in);
6565
}
@@ -304,7 +304,7 @@ C2H_TEST("DeviceAdjacentDifference::SubtractRightCopy works with large indexes",
304304
c2h::device_vector<int> error(1);
305305
int* d_error = thrust::raw_pointer_cast(error.data());
306306
adjacent_difference_subtract_right_copy(
307-
thrust::counting_iterator<cuda::std::size_t>{0}, thrust::discard_iterator<>{}, num_items, check_difference{d_error});
307+
cuda::counting_iterator<cuda::std::size_t>{0}, cuda::discard_iterator{}, num_items, check_difference{d_error});
308308
const int h_error = error[0];
309309
REQUIRE(h_error == 0);
310310
}
@@ -333,8 +333,8 @@ C2H_TEST("DeviceAdjacentDifference::SubtractRightCopy uses right number of invoc
333333
const int num_items = GENERATE_COPY(take(2, random(1, 1000000)));
334334
c2h::device_vector<unsigned long long> counts(1, 0);
335335
adjacent_difference_subtract_right_copy(
336-
thrust::counting_iterator<cuda::std::size_t>{0},
337-
thrust::discard_iterator<>(),
336+
cuda::counting_iterator<cuda::std::size_t>{0},
337+
cuda::discard_iterator(),
338338
num_items,
339339
invocation_counter{thrust::raw_pointer_cast(counts.data())});
340340

0 commit comments

Comments
 (0)