From 8cf54b8e5c62797f3c595ad65358683a671374e5 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Fri, 8 Sep 2023 15:07:20 -0700 Subject: [PATCH] Cleanups + deallocate before return --- .../trie/dynamic_bitset/dynamic_bitset.inl | 31 +++++++++---------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/include/cuco/detail/trie/dynamic_bitset/dynamic_bitset.inl b/include/cuco/detail/trie/dynamic_bitset/dynamic_bitset.inl index d3b7bbb5f..d56ef9d7c 100644 --- a/include/cuco/detail/trie/dynamic_bitset/dynamic_bitset.inl +++ b/include/cuco/detail/trie/dynamic_bitset/dynamic_bitset.inl @@ -182,27 +182,21 @@ constexpr void dynamic_bitset::build_ranks_and_selects( // Step 3. Compute selects thrust::device_vector select_markers(num_blocks, this->allocator_); + auto const select_markers_begin = thrust::raw_pointer_cast(select_markers.data()); + mark_blocks_with_select_entries<<>>( - bit_counts_begin, - thrust::raw_pointer_cast(select_markers.data()), - num_blocks, - words_per_block, - bits_per_block); + bit_counts_begin, select_markers_begin, num_blocks, words_per_block, bits_per_block); auto d_sum = reinterpret_cast(thrust::raw_pointer_cast( std::allocator_traits::allocate(temp_allocator, sizeof(size_type)))); - CUCO_CUDA_TRY(cub::DeviceReduce::Sum(nullptr, - temp_storage_bytes, - thrust::raw_pointer_cast(select_markers.data()), - d_sum, - num_blocks, - stream)); + CUCO_CUDA_TRY(cub::DeviceReduce::Sum( + nullptr, temp_storage_bytes, select_markers_begin, d_sum, num_blocks, stream)); d_temp_storage = temp_allocator.allocate(temp_storage_bytes); CUCO_CUDA_TRY(cub::DeviceReduce::Sum(thrust::raw_pointer_cast(d_temp_storage), temp_storage_bytes, - thrust::raw_pointer_cast(select_markers.data()), + select_markers_begin, d_sum, num_blocks, stream)); @@ -213,14 +207,17 @@ constexpr void dynamic_bitset::build_ranks_and_selects( stream.synchronize(); std::allocator_traits::deallocate( temp_allocator, thrust::device_ptr{reinterpret_cast(d_sum)}, sizeof(size_type)); + temp_allocator.deallocate(d_temp_storage, temp_storage_bytes); selects.resize(num_selects); + auto const select_begin = thrust::raw_pointer_cast(selects.data()); + CUCO_CUDA_TRY(cub::DeviceSelect::Flagged(nullptr, temp_storage_bytes, thrust::make_counting_iterator(0UL), - thrust::raw_pointer_cast(select_markers.data()), - thrust::raw_pointer_cast(selects.data()), + select_markers_begin, + select_begin, thrust::make_discard_iterator(), num_blocks, stream)); @@ -230,9 +227,9 @@ constexpr void dynamic_bitset::build_ranks_and_selects( CUCO_CUDA_TRY(cub::DeviceSelect::Flagged(thrust::raw_pointer_cast(d_temp_storage), temp_storage_bytes, thrust::make_counting_iterator(0UL), - thrust::raw_pointer_cast(select_markers.data()), - thrust::raw_pointer_cast(selects.data()), - thrust::discard_iterator(), + select_markers_begin, + select_begin, + thrust::make_discard_iterator(), num_blocks, stream));