Skip to content

Commit

Permalink
Cleanups: constexpr instead of inline, TODO, etc
Browse files Browse the repository at this point in the history
  • Loading branch information
PointKernel committed Aug 30, 2023
1 parent b65d5b1 commit 0cf4bac
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 66 deletions.
69 changes: 35 additions & 34 deletions include/cuco/detail/trie/dynamic_bitset/dynamic_bitset.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ struct rank {
*
* @return The base rank
*/
__host__ __device__ uint64_t constexpr abs() const noexcept
__host__ __device__ constexpr uint64_t abs() const noexcept
{
return (static_cast<uint64_t>(abs_hi_) << 8) | abs_lo_;
}
Expand All @@ -54,7 +54,7 @@ struct rank {
*
* @param abs Base rank
*/
__host__ __device__ void set_abs(uint64_t abs) noexcept
__host__ __device__ constexpr void set_abs(uint64_t abs) noexcept
{
abs_hi_ = static_cast<uint32_t>(abs >> 8);
abs_lo_ = static_cast<uint8_t>(abs);
Expand All @@ -72,6 +72,7 @@ struct rank {
*
* @tparam Allocator Type of allocator used for device storage
*/
// TODO: have to use device_malloc_allocator for now otherwise the container cannot grow
template <class Allocator = thrust::device_malloc_allocator<std::byte>>
class dynamic_bitset {
public:
Expand All @@ -89,9 +90,7 @@ class dynamic_bitset {
*
* @param allocator Allocator used for allocating device storage
*/
inline dynamic_bitset(Allocator const& allocator = Allocator{});
dynamic_bitset(dynamic_bitset&&) = default; ///< Move constructor
inline ~dynamic_bitset();
constexpr dynamic_bitset(Allocator const& allocator = Allocator{});

/**
* @brief adds a new bit at the end
Expand All @@ -100,27 +99,27 @@ class dynamic_bitset {
*
* @param bit Boolean value of new bit to be added
*/
inline void append(bool bit) noexcept;
constexpr void append(bool bit) noexcept;

/**
* @brief Modifies a single bit
*
* @param index position of bit to be modified
* @param bit new value of bit
*/
inline void set(size_type index, bool bit) noexcept;
constexpr void set(size_type index, bool bit) noexcept;

/**
* @brief Sets last bit to specified value
*
* @param bit new value of last bit
*/
inline void set_last(bool bit) noexcept;
constexpr void set_last(bool bit) noexcept;

/**
* @brief Builds indexes for rank and select
*/
inline void build() noexcept;
constexpr void build() noexcept;

/**
* @brief Bulk get operation
Expand All @@ -134,10 +133,10 @@ class dynamic_bitset {
* @param stream Stream to execute get kernel
*/
template <typename KeyIt, typename OutputIt>
void get(KeyIt keys_begin,
KeyIt keys_end,
OutputIt outputs_begin,
cuda_stream_ref stream = {}) const noexcept;
constexpr void get(KeyIt keys_begin,
KeyIt keys_end,
OutputIt outputs_begin,
cuda_stream_ref stream = {}) const noexcept;

/**
* @brief Bulk rank operation
Expand All @@ -151,10 +150,10 @@ class dynamic_bitset {
* @param stream Stream to execute ranks kernel
*/
template <typename KeyIt, typename OutputIt>
void ranks(KeyIt keys_begin,
KeyIt keys_end,
OutputIt outputs_begin,
cuda_stream_ref stream = {}) const noexcept;
constexpr void ranks(KeyIt keys_begin,
KeyIt keys_end,
OutputIt outputs_begin,
cuda_stream_ref stream = {}) const noexcept;

/**
* @brief Bulk select operation
Expand All @@ -168,14 +167,15 @@ class dynamic_bitset {
* @param stream Stream to execute selects kernel
*/
template <typename KeyIt, typename OutputIt>
void selects(KeyIt keys_begin,
KeyIt keys_end,
OutputIt outputs_begin,
cuda_stream_ref stream = {}) const noexcept;
constexpr void selects(KeyIt keys_begin,
KeyIt keys_end,
OutputIt outputs_begin,
cuda_stream_ref stream = {}) const noexcept;

/**
*@brief Struct to hold all storage refs needed by bitvector_ref
*/
// TODO: this is not a real ref type, to be changed
struct storage_ref_type {
const slot_type* words_ref_; ///< Words refs

Expand Down Expand Up @@ -205,7 +205,7 @@ class dynamic_bitset {
*
* @return Value of bit at position specified by key
*/
[[nodiscard]] __device__ bool get(size_type key) const noexcept;
[[nodiscard]] __device__ constexpr bool get(size_type key) const noexcept;

/**
* @brief Access a single word of internal storage
Expand All @@ -214,7 +214,7 @@ class dynamic_bitset {
*
* @return Word at position specified by index
*/
[[nodiscard]] __device__ slot_type get_word(size_type word_id) const noexcept;
[[nodiscard]] __device__ constexpr slot_type get_word(size_type word_id) const noexcept;

/**
* @brief Find position of first set bit starting from a given position (inclusive)
Expand All @@ -232,7 +232,7 @@ class dynamic_bitset {
*
* @return Rank of input position
*/
[[nodiscard]] __device__ size_type rank(size_type key) const noexcept;
[[nodiscard]] __device__ constexpr size_type rank(size_type key) const noexcept;

/**
* @brief Find position of Nth set (1) bit counting from start of bitvector
Expand All @@ -241,7 +241,7 @@ class dynamic_bitset {
*
* @return Position of Nth set bit
*/
[[nodiscard]] __device__ size_type select(size_type count) const noexcept;
[[nodiscard]] __device__ constexpr size_type select(size_type count) const noexcept;

/**
* @brief Find position of Nth not-set (0) bit counting from start of bitvector
Expand All @@ -250,7 +250,7 @@ class dynamic_bitset {
*
* @return Position of Nth not-set bit
*/
[[nodiscard]] __device__ size_type select0(size_type count) const noexcept;
[[nodiscard]] __device__ constexpr size_type select0(size_type count) const noexcept;

private:
/**
Expand All @@ -263,7 +263,7 @@ class dynamic_bitset {
* @return index in ranks which corresponds to highest rank less than count (least upper bound)
*/
template <typename SelectsRef, typename RanksRef>
[[nodiscard]] __device__ size_type get_initial_rank_estimate(
[[nodiscard]] __device__ constexpr size_type get_initial_rank_estimate(
size_type count, const SelectsRef& selects, const RanksRef& ranks) const noexcept;

/**
Expand All @@ -277,8 +277,8 @@ class dynamic_bitset {
* @return Increment to word_id based on rank values
*/
template <typename Rank>
[[nodiscard]] __device__ size_type subtract_rank_from_count(size_type& count,
Rank rank) const noexcept;
[[nodiscard]] __device__ constexpr size_type subtract_rank_from_count(size_type& count,
Rank rank) const noexcept;

/**
* @brief Find position of Nth set bit in a 64-bit word
Expand All @@ -300,7 +300,7 @@ class dynamic_bitset {
*
* @return Device ref of the current `dynamic_bitset` object
*/
[[nodiscard]] ref_type ref() const noexcept;
[[nodiscard]] constexpr ref_type ref() const noexcept;

/**
* @brief Gets the number of bits dynamic_bitset holds
Expand Down Expand Up @@ -336,9 +336,10 @@ class dynamic_bitset {
* @param selects Output array of selects
* @param flip_bits If true, negate bits to construct indexes for `0` bits
*/
void build_ranks_and_selects(thrust::device_vector<rank, rank_allocator_type>& ranks,
thrust::device_vector<size_type, size_allocator_type>& selects,
bool flip_bits) noexcept;
constexpr void build_ranks_and_selects(
thrust::device_vector<rank, rank_allocator_type>& ranks,
thrust::device_vector<size_type, size_allocator_type>& selects,
bool flip_bits) noexcept;

/**
* @brief Helper function to calculate grid size for simple kernels
Expand All @@ -348,7 +349,7 @@ class dynamic_bitset {
* @return grid size
*/
// TODO: to be moved to the CUDA utility header
size_type constexpr default_grid_size(size_type num_elements) const noexcept;
constexpr size_type default_grid_size(size_type num_elements) const noexcept;
};

} // namespace detail
Expand Down
60 changes: 28 additions & 32 deletions include/cuco/detail/trie/dynamic_bitset/dynamic_bitset.inl
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ namespace experimental {
namespace detail {

template <class Allocator>
dynamic_bitset<Allocator>::dynamic_bitset(Allocator const& allocator)
constexpr dynamic_bitset<Allocator>::dynamic_bitset(Allocator const& allocator)
: allocator_{allocator},
n_bits_{0},
words_{allocator},
Expand All @@ -41,12 +41,7 @@ dynamic_bitset<Allocator>::dynamic_bitset(Allocator const& allocator)
}

template <class Allocator>
dynamic_bitset<Allocator>::~dynamic_bitset()
{
}

template <class Allocator>
void dynamic_bitset<Allocator>::append(bool bit) noexcept
constexpr void dynamic_bitset<Allocator>::append(bool bit) noexcept
{
if (n_bits_ % bits_per_block == 0) {
words_.resize(words_.size() + words_per_block); // Extend storage by one block
Expand All @@ -56,7 +51,7 @@ void dynamic_bitset<Allocator>::append(bool bit) noexcept
}

template <class Allocator>
void dynamic_bitset<Allocator>::set(size_type index, bool bit) noexcept
constexpr void dynamic_bitset<Allocator>::set(size_type index, bool bit) noexcept
{
size_type word_id = index / bits_per_word;
size_type bit_id = index % bits_per_word;
Expand All @@ -68,17 +63,17 @@ void dynamic_bitset<Allocator>::set(size_type index, bool bit) noexcept
}

template <class Allocator>
void dynamic_bitset<Allocator>::set_last(bool bit) noexcept
constexpr void dynamic_bitset<Allocator>::set_last(bool bit) noexcept
{
set(n_bits_ - 1, bit);
}

template <class Allocator>
template <typename KeyIt, typename OutputIt>
void dynamic_bitset<Allocator>::get(KeyIt keys_begin,
KeyIt keys_end,
OutputIt outputs_begin,
cuda_stream_ref stream) const noexcept
constexpr void dynamic_bitset<Allocator>::get(KeyIt keys_begin,
KeyIt keys_end,
OutputIt outputs_begin,
cuda_stream_ref stream) const noexcept

{
auto const num_keys = cuco::detail::distance(keys_begin, keys_end);
Expand All @@ -92,10 +87,10 @@ void dynamic_bitset<Allocator>::get(KeyIt keys_begin,

template <class Allocator>
template <typename KeyIt, typename OutputIt>
void dynamic_bitset<Allocator>::ranks(KeyIt keys_begin,
KeyIt keys_end,
OutputIt outputs_begin,
cuda_stream_ref stream) const noexcept
constexpr void dynamic_bitset<Allocator>::ranks(KeyIt keys_begin,
KeyIt keys_end,
OutputIt outputs_begin,
cuda_stream_ref stream) const noexcept

{
auto const num_keys = cuco::detail::distance(keys_begin, keys_end);
Expand All @@ -109,10 +104,10 @@ void dynamic_bitset<Allocator>::ranks(KeyIt keys_begin,

template <class Allocator>
template <typename KeyIt, typename OutputIt>
void dynamic_bitset<Allocator>::selects(KeyIt keys_begin,
KeyIt keys_end,
OutputIt outputs_begin,
cuda_stream_ref stream) const noexcept
constexpr void dynamic_bitset<Allocator>::selects(KeyIt keys_begin,
KeyIt keys_end,
OutputIt outputs_begin,
cuda_stream_ref stream) const noexcept

{
auto const num_keys = cuco::detail::distance(keys_begin, keys_end);
Expand All @@ -125,7 +120,7 @@ void dynamic_bitset<Allocator>::selects(KeyIt keys_begin,
}

template <class Allocator>
void dynamic_bitset<Allocator>::build_ranks_and_selects(
constexpr void dynamic_bitset<Allocator>::build_ranks_and_selects(
thrust::device_vector<rank, rank_allocator_type>& ranks,
thrust::device_vector<size_type, size_allocator_type>& selects,
bool flip_bits) noexcept
Expand Down Expand Up @@ -181,14 +176,14 @@ void dynamic_bitset<Allocator>::build_ranks_and_selects(
}

template <class Allocator>
void dynamic_bitset<Allocator>::build() noexcept
constexpr void dynamic_bitset<Allocator>::build() noexcept
{
build_ranks_and_selects(ranks_, selects_, false); // 1-bits
build_ranks_and_selects(ranks0_, selects0_, true); // 0-bits
}

template <class Allocator>
dynamic_bitset<Allocator>::ref_type dynamic_bitset<Allocator>::ref() const noexcept
constexpr dynamic_bitset<Allocator>::ref_type dynamic_bitset<Allocator>::ref() const noexcept
{
return ref_type{storage_ref_type{thrust::raw_pointer_cast(words_.data()),
thrust::raw_pointer_cast(ranks_.data()),
Expand All @@ -211,6 +206,7 @@ constexpr dynamic_bitset<Allocator>::size_type dynamic_bitset<Allocator>::defaul
}

// Device reference implementations

template <class Allocator>
__host__ __device__ constexpr dynamic_bitset<Allocator>::reference::reference(
storage_ref_type storage) noexcept
Expand All @@ -219,13 +215,13 @@ __host__ __device__ constexpr dynamic_bitset<Allocator>::reference::reference(
}

template <class Allocator>
__device__ bool dynamic_bitset<Allocator>::reference::get(size_type key) const noexcept
__device__ constexpr bool dynamic_bitset<Allocator>::reference::get(size_type key) const noexcept
{
return (storage_.words_ref_[key / bits_per_word] >> (key % bits_per_word)) & 1UL;
}

template <class Allocator>
__device__ typename dynamic_bitset<Allocator>::slot_type
__device__ constexpr typename dynamic_bitset<Allocator>::slot_type
dynamic_bitset<Allocator>::reference::get_word(size_type word_id) const noexcept
{
return storage_.words_ref_[word_id];
Expand All @@ -246,8 +242,8 @@ dynamic_bitset<Allocator>::reference::find_next_set(size_type key) const noexcep
}

template <class Allocator>
__device__ typename dynamic_bitset<Allocator>::size_type dynamic_bitset<Allocator>::reference::rank(
size_type key) const noexcept
__device__ constexpr typename dynamic_bitset<Allocator>::size_type
dynamic_bitset<Allocator>::reference::rank(size_type key) const noexcept
{
size_type word_id = key / bits_per_word;
size_type bit_id = key % bits_per_word;
Expand All @@ -265,7 +261,7 @@ __device__ typename dynamic_bitset<Allocator>::size_type dynamic_bitset<Allocato
}

template <class Allocator>
__device__ typename dynamic_bitset<Allocator>::size_type
__device__ constexpr typename dynamic_bitset<Allocator>::size_type
dynamic_bitset<Allocator>::reference::select(size_type count) const noexcept
{
auto rank_id = get_initial_rank_estimate(count, storage_.selects_ref_, storage_.ranks_ref_);
Expand All @@ -278,7 +274,7 @@ dynamic_bitset<Allocator>::reference::select(size_type count) const noexcept
}

template <class Allocator>
__device__ typename dynamic_bitset<Allocator>::size_type
__device__ constexpr typename dynamic_bitset<Allocator>::size_type
dynamic_bitset<Allocator>::reference::select0(size_type count) const noexcept
{
auto rank_id = get_initial_rank_estimate(count, storage_.selects0_ref_, storage_.ranks0_ref_);
Expand All @@ -292,7 +288,7 @@ dynamic_bitset<Allocator>::reference::select0(size_type count) const noexcept

template <class Allocator>
template <typename SelectsRef, typename RanksRef>
__device__ typename dynamic_bitset<Allocator>::size_type
__device__ constexpr typename dynamic_bitset<Allocator>::size_type
dynamic_bitset<Allocator>::reference::get_initial_rank_estimate(
size_type count, SelectsRef const& selects, RanksRef const& ranks) const noexcept
{
Expand All @@ -319,7 +315,7 @@ dynamic_bitset<Allocator>::reference::get_initial_rank_estimate(

template <class Allocator>
template <typename Rank>
__device__ typename dynamic_bitset<Allocator>::size_type
__device__ constexpr typename dynamic_bitset<Allocator>::size_type
dynamic_bitset<Allocator>::reference::subtract_rank_from_count(size_type& count,
Rank rank) const noexcept
{
Expand Down

0 comments on commit 0cf4bac

Please sign in to comment.