diff --git a/include/cuco/detail/trie/dynamic_bitset/dynamic_bitset.cuh b/include/cuco/detail/trie/dynamic_bitset/dynamic_bitset.cuh index 039714bbf..e9cdb9b87 100644 --- a/include/cuco/detail/trie/dynamic_bitset/dynamic_bitset.cuh +++ b/include/cuco/detail/trie/dynamic_bitset/dynamic_bitset.cuh @@ -44,7 +44,7 @@ struct rank { * * @return The base rank */ - __host__ __device__ uint64_t constexpr abs() const noexcept + __host__ __device__ constexpr uint64_t abs() const noexcept { return (static_cast(abs_hi_) << 8) | abs_lo_; } @@ -54,7 +54,7 @@ struct rank { * * @param abs Base rank */ - __host__ __device__ void set_abs(uint64_t abs) noexcept + __host__ __device__ constexpr void set_abs(uint64_t abs) noexcept { abs_hi_ = static_cast(abs >> 8); abs_lo_ = static_cast(abs); @@ -72,6 +72,7 @@ struct rank { * * @tparam Allocator Type of allocator used for device storage */ +// TODO: have to use device_malloc_allocator for now otherwise the container cannot grow template > class dynamic_bitset { public: @@ -89,9 +90,7 @@ class dynamic_bitset { * * @param allocator Allocator used for allocating device storage */ - inline dynamic_bitset(Allocator const& allocator = Allocator{}); - dynamic_bitset(dynamic_bitset&&) = default; ///< Move constructor - inline ~dynamic_bitset(); + constexpr dynamic_bitset(Allocator const& allocator = Allocator{}); /** * @brief adds a new bit at the end @@ -100,7 +99,7 @@ class dynamic_bitset { * * @param bit Boolean value of new bit to be added */ - inline void append(bool bit) noexcept; + constexpr void append(bool bit) noexcept; /** * @brief Modifies a single bit @@ -108,19 +107,19 @@ class dynamic_bitset { * @param index position of bit to be modified * @param bit new value of bit */ - inline void set(size_type index, bool bit) noexcept; + constexpr void set(size_type index, bool bit) noexcept; /** * @brief Sets last bit to specified value * * @param bit new value of last bit */ - inline void set_last(bool bit) noexcept; + constexpr void set_last(bool bit) noexcept; /** * @brief Builds indexes for rank and select */ - inline void build() noexcept; + constexpr void build() noexcept; /** * @brief Bulk get operation @@ -134,10 +133,10 @@ class dynamic_bitset { * @param stream Stream to execute get kernel */ template - void get(KeyIt keys_begin, - KeyIt keys_end, - OutputIt outputs_begin, - cuda_stream_ref stream = {}) const noexcept; + constexpr void get(KeyIt keys_begin, + KeyIt keys_end, + OutputIt outputs_begin, + cuda_stream_ref stream = {}) const noexcept; /** * @brief Bulk rank operation @@ -151,10 +150,10 @@ class dynamic_bitset { * @param stream Stream to execute ranks kernel */ template - void ranks(KeyIt keys_begin, - KeyIt keys_end, - OutputIt outputs_begin, - cuda_stream_ref stream = {}) const noexcept; + constexpr void ranks(KeyIt keys_begin, + KeyIt keys_end, + OutputIt outputs_begin, + cuda_stream_ref stream = {}) const noexcept; /** * @brief Bulk select operation @@ -168,14 +167,15 @@ class dynamic_bitset { * @param stream Stream to execute selects kernel */ template - void selects(KeyIt keys_begin, - KeyIt keys_end, - OutputIt outputs_begin, - cuda_stream_ref stream = {}) const noexcept; + constexpr void selects(KeyIt keys_begin, + KeyIt keys_end, + OutputIt outputs_begin, + cuda_stream_ref stream = {}) const noexcept; /** *@brief Struct to hold all storage refs needed by bitvector_ref */ + // TODO: this is not a real ref type, to be changed struct storage_ref_type { const slot_type* words_ref_; ///< Words refs @@ -205,7 +205,7 @@ class dynamic_bitset { * * @return Value of bit at position specified by key */ - [[nodiscard]] __device__ bool get(size_type key) const noexcept; + [[nodiscard]] __device__ constexpr bool get(size_type key) const noexcept; /** * @brief Access a single word of internal storage @@ -214,7 +214,7 @@ class dynamic_bitset { * * @return Word at position specified by index */ - [[nodiscard]] __device__ slot_type get_word(size_type word_id) const noexcept; + [[nodiscard]] __device__ constexpr slot_type get_word(size_type word_id) const noexcept; /** * @brief Find position of first set bit starting from a given position (inclusive) @@ -232,7 +232,7 @@ class dynamic_bitset { * * @return Rank of input position */ - [[nodiscard]] __device__ size_type rank(size_type key) const noexcept; + [[nodiscard]] __device__ constexpr size_type rank(size_type key) const noexcept; /** * @brief Find position of Nth set (1) bit counting from start of bitvector @@ -241,7 +241,7 @@ class dynamic_bitset { * * @return Position of Nth set bit */ - [[nodiscard]] __device__ size_type select(size_type count) const noexcept; + [[nodiscard]] __device__ constexpr size_type select(size_type count) const noexcept; /** * @brief Find position of Nth not-set (0) bit counting from start of bitvector @@ -250,7 +250,7 @@ class dynamic_bitset { * * @return Position of Nth not-set bit */ - [[nodiscard]] __device__ size_type select0(size_type count) const noexcept; + [[nodiscard]] __device__ constexpr size_type select0(size_type count) const noexcept; private: /** @@ -263,7 +263,7 @@ class dynamic_bitset { * @return index in ranks which corresponds to highest rank less than count (least upper bound) */ template - [[nodiscard]] __device__ size_type get_initial_rank_estimate( + [[nodiscard]] __device__ constexpr size_type get_initial_rank_estimate( size_type count, const SelectsRef& selects, const RanksRef& ranks) const noexcept; /** @@ -277,8 +277,8 @@ class dynamic_bitset { * @return Increment to word_id based on rank values */ template - [[nodiscard]] __device__ size_type subtract_rank_from_count(size_type& count, - Rank rank) const noexcept; + [[nodiscard]] __device__ constexpr size_type subtract_rank_from_count(size_type& count, + Rank rank) const noexcept; /** * @brief Find position of Nth set bit in a 64-bit word @@ -300,7 +300,7 @@ class dynamic_bitset { * * @return Device ref of the current `dynamic_bitset` object */ - [[nodiscard]] ref_type ref() const noexcept; + [[nodiscard]] constexpr ref_type ref() const noexcept; /** * @brief Gets the number of bits dynamic_bitset holds @@ -336,9 +336,10 @@ class dynamic_bitset { * @param selects Output array of selects * @param flip_bits If true, negate bits to construct indexes for `0` bits */ - void build_ranks_and_selects(thrust::device_vector& ranks, - thrust::device_vector& selects, - bool flip_bits) noexcept; + constexpr void build_ranks_and_selects( + thrust::device_vector& ranks, + thrust::device_vector& selects, + bool flip_bits) noexcept; /** * @brief Helper function to calculate grid size for simple kernels @@ -348,7 +349,7 @@ class dynamic_bitset { * @return grid size */ // TODO: to be moved to the CUDA utility header - size_type constexpr default_grid_size(size_type num_elements) const noexcept; + constexpr size_type default_grid_size(size_type num_elements) const noexcept; }; } // namespace detail diff --git a/include/cuco/detail/trie/dynamic_bitset/dynamic_bitset.inl b/include/cuco/detail/trie/dynamic_bitset/dynamic_bitset.inl index b437f37e4..ae748ea70 100644 --- a/include/cuco/detail/trie/dynamic_bitset/dynamic_bitset.inl +++ b/include/cuco/detail/trie/dynamic_bitset/dynamic_bitset.inl @@ -29,7 +29,7 @@ namespace experimental { namespace detail { template -dynamic_bitset::dynamic_bitset(Allocator const& allocator) +constexpr dynamic_bitset::dynamic_bitset(Allocator const& allocator) : allocator_{allocator}, n_bits_{0}, words_{allocator}, @@ -41,12 +41,7 @@ dynamic_bitset::dynamic_bitset(Allocator const& allocator) } template -dynamic_bitset::~dynamic_bitset() -{ -} - -template -void dynamic_bitset::append(bool bit) noexcept +constexpr void dynamic_bitset::append(bool bit) noexcept { if (n_bits_ % bits_per_block == 0) { words_.resize(words_.size() + words_per_block); // Extend storage by one block @@ -56,7 +51,7 @@ void dynamic_bitset::append(bool bit) noexcept } template -void dynamic_bitset::set(size_type index, bool bit) noexcept +constexpr void dynamic_bitset::set(size_type index, bool bit) noexcept { size_type word_id = index / bits_per_word; size_type bit_id = index % bits_per_word; @@ -68,17 +63,17 @@ void dynamic_bitset::set(size_type index, bool bit) noexcept } template -void dynamic_bitset::set_last(bool bit) noexcept +constexpr void dynamic_bitset::set_last(bool bit) noexcept { set(n_bits_ - 1, bit); } template template -void dynamic_bitset::get(KeyIt keys_begin, - KeyIt keys_end, - OutputIt outputs_begin, - cuda_stream_ref stream) const noexcept +constexpr void dynamic_bitset::get(KeyIt keys_begin, + KeyIt keys_end, + OutputIt outputs_begin, + cuda_stream_ref stream) const noexcept { auto const num_keys = cuco::detail::distance(keys_begin, keys_end); @@ -92,10 +87,10 @@ void dynamic_bitset::get(KeyIt keys_begin, template template -void dynamic_bitset::ranks(KeyIt keys_begin, - KeyIt keys_end, - OutputIt outputs_begin, - cuda_stream_ref stream) const noexcept +constexpr void dynamic_bitset::ranks(KeyIt keys_begin, + KeyIt keys_end, + OutputIt outputs_begin, + cuda_stream_ref stream) const noexcept { auto const num_keys = cuco::detail::distance(keys_begin, keys_end); @@ -109,10 +104,10 @@ void dynamic_bitset::ranks(KeyIt keys_begin, template template -void dynamic_bitset::selects(KeyIt keys_begin, - KeyIt keys_end, - OutputIt outputs_begin, - cuda_stream_ref stream) const noexcept +constexpr void dynamic_bitset::selects(KeyIt keys_begin, + KeyIt keys_end, + OutputIt outputs_begin, + cuda_stream_ref stream) const noexcept { auto const num_keys = cuco::detail::distance(keys_begin, keys_end); @@ -125,7 +120,7 @@ void dynamic_bitset::selects(KeyIt keys_begin, } template -void dynamic_bitset::build_ranks_and_selects( +constexpr void dynamic_bitset::build_ranks_and_selects( thrust::device_vector& ranks, thrust::device_vector& selects, bool flip_bits) noexcept @@ -181,14 +176,14 @@ void dynamic_bitset::build_ranks_and_selects( } template -void dynamic_bitset::build() noexcept +constexpr void dynamic_bitset::build() noexcept { build_ranks_and_selects(ranks_, selects_, false); // 1-bits build_ranks_and_selects(ranks0_, selects0_, true); // 0-bits } template -dynamic_bitset::ref_type dynamic_bitset::ref() const noexcept +constexpr dynamic_bitset::ref_type dynamic_bitset::ref() const noexcept { return ref_type{storage_ref_type{thrust::raw_pointer_cast(words_.data()), thrust::raw_pointer_cast(ranks_.data()), @@ -211,6 +206,7 @@ constexpr dynamic_bitset::size_type dynamic_bitset::defaul } // Device reference implementations + template __host__ __device__ constexpr dynamic_bitset::reference::reference( storage_ref_type storage) noexcept @@ -219,13 +215,13 @@ __host__ __device__ constexpr dynamic_bitset::reference::reference( } template -__device__ bool dynamic_bitset::reference::get(size_type key) const noexcept +__device__ constexpr bool dynamic_bitset::reference::get(size_type key) const noexcept { return (storage_.words_ref_[key / bits_per_word] >> (key % bits_per_word)) & 1UL; } template -__device__ typename dynamic_bitset::slot_type +__device__ constexpr typename dynamic_bitset::slot_type dynamic_bitset::reference::get_word(size_type word_id) const noexcept { return storage_.words_ref_[word_id]; @@ -246,8 +242,8 @@ dynamic_bitset::reference::find_next_set(size_type key) const noexcep } template -__device__ typename dynamic_bitset::size_type dynamic_bitset::reference::rank( - size_type key) const noexcept +__device__ constexpr typename dynamic_bitset::size_type +dynamic_bitset::reference::rank(size_type key) const noexcept { size_type word_id = key / bits_per_word; size_type bit_id = key % bits_per_word; @@ -265,7 +261,7 @@ __device__ typename dynamic_bitset::size_type dynamic_bitset -__device__ typename dynamic_bitset::size_type +__device__ constexpr typename dynamic_bitset::size_type dynamic_bitset::reference::select(size_type count) const noexcept { auto rank_id = get_initial_rank_estimate(count, storage_.selects_ref_, storage_.ranks_ref_); @@ -278,7 +274,7 @@ dynamic_bitset::reference::select(size_type count) const noexcept } template -__device__ typename dynamic_bitset::size_type +__device__ constexpr typename dynamic_bitset::size_type dynamic_bitset::reference::select0(size_type count) const noexcept { auto rank_id = get_initial_rank_estimate(count, storage_.selects0_ref_, storage_.ranks0_ref_); @@ -292,7 +288,7 @@ dynamic_bitset::reference::select0(size_type count) const noexcept template template -__device__ typename dynamic_bitset::size_type +__device__ constexpr typename dynamic_bitset::size_type dynamic_bitset::reference::get_initial_rank_estimate( size_type count, SelectsRef const& selects, RanksRef const& ranks) const noexcept { @@ -319,7 +315,7 @@ dynamic_bitset::reference::get_initial_rank_estimate( template template -__device__ typename dynamic_bitset::size_type +__device__ constexpr typename dynamic_bitset::size_type dynamic_bitset::reference::subtract_rank_from_count(size_type& count, Rank rank) const noexcept {