Skip to content

Commit

Permalink
[FEATURE] SIMD add
Browse files Browse the repository at this point in the history
  • Loading branch information
eseiler committed Dec 1, 2023
1 parent 7bc2613 commit 2826ef5
Show file tree
Hide file tree
Showing 4 changed files with 206 additions and 61 deletions.
17 changes: 17 additions & 0 deletions include/hibf/interleaved_bloom_filter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -535,8 +535,25 @@ class interleaved_bloom_filter::counting_agent_type
explicit counting_agent_type(interleaved_bloom_filter const & ibf) :
ibf_ptr(std::addressof(ibf)),
membership_agent(ibf),
#if !HIBF_HAS_AVX512
result_buffer(ibf.bin_count())
{}
#else
// AVX512 will access result_buffer's memory in chunks, so we need to make sure that we allocate enough memory
// such that the last chunk is not out of bounds.
result_buffer(next_multiple_of_64(ibf.bin_count())) // Ensure large enough capacity.
{
result_buffer.resize(ibf.bin_count()); // Resize to actual requested size.
// Silences llvm's ASAN container-overflow warning.
# if defined(_LIBCPP_VERSION) && !defined(_LIBCPP_HAS_NO_ASAN)
__sanitizer_annotate_contiguous_container(result_buffer.data(),
result_buffer.data() + result_buffer.capacity(),
result_buffer.data() + result_buffer.size(),
result_buffer.data() + result_buffer.capacity());
# endif
}
#endif

//!\}

/*!\name Counting
Expand Down
217 changes: 166 additions & 51 deletions include/hibf/misc/counting_vector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,103 @@
#include <hibf/config.hpp> // for config
#include <hibf/contrib/aligned_allocator.hpp> // for aligned_allocator
#include <hibf/misc/bit_vector.hpp> // for bit_vector
#include <hibf/misc/divide_and_ceil.hpp>
#include <hibf/misc/next_multiple_of_64.hpp>

#include <cereal/macros.hpp> // for CEREAL_SERIALIZE_FUNCTION_NAME
#include <cereal/types/base_class.hpp> // for base_class

#if HIBF_HAS_AVX512
# include <simde/x86/avx512/add.h>
# include <simde/x86/avx512/load.h>
# include <simde/x86/avx512/mov.h>
# include <simde/x86/avx512/set1.h>
# include <simde/x86/avx512/store.h>
# include <simde/x86/avx512/sub.h>
#endif

namespace seqan::hibf
{

#if HIBF_HAS_AVX512
//!\cond
// Since the counting_vector can have different value types, we need specific SIMD instructions for each value type.
template <std::integral integral_t>
struct simd_mapping
{};

// CRTP base class for the simd_mapping, containg common functionality.
template <typename derived_t, std::integral integral_t>
requires std::same_as<derived_t, simd_mapping<integral_t>>
struct simd_mapping_crtp
{
// Let `B = sizeof(integral_t) * CHAR_BIT`, e.g. 8 for (u)int8_t, and 16 for (u)int16_t.
// We can process `512 / B` bits of the bit_vector at once.
static inline constexpr size_t bits_per_iterations = 512u / (sizeof(integral_t) * CHAR_BIT);
// clang-format off
// The type that is need to represent `bits_per_iterations` bits.
using bits_type = std::conditional_t<bits_per_iterations == 64, uint64_t,
std::conditional_t<bits_per_iterations == 32, uint32_t,
std::conditional_t<bits_per_iterations == 16, uint16_t,
std::conditional_t<bits_per_iterations == 8, uint8_t, void>>>>;
// clang-format on
static_assert(!std::same_as<bits_type, void>, "Unsupported bits_type.");

// Takes B bits from the bit_vector and expands them to a bits_type.
// E.g., B = 8 : [1,0,1,1,0,0,1,0] -> [0...01, 0...00, 0...01, 0...01, 0...00, 0...00, 0...01, 0...00], where
// each element is 64 bits wide.
static inline constexpr auto expand_bits(bits_type const * const bits)
{
return derived_t::mm512_maskz_mov_epi(*bits, derived_t::mm512_set1_epi(1));
}
};

// SIMD instructions for int8_t and uint8_t.
template <std::integral integral_t>
requires (sizeof(integral_t) == 1)
struct simd_mapping<integral_t> : simd_mapping_crtp<simd_mapping<integral_t>, integral_t>
{
static inline constexpr auto mm512_maskz_mov_epi = simde_mm512_maskz_mov_epi8;
static inline constexpr auto mm512_set1_epi = simde_mm512_set1_epi8;
static inline constexpr auto mm512_add_epi = simde_mm512_add_epi8;
static inline constexpr auto mm512_sub_epi = simde_mm512_sub_epi8;
};

// SIMD instructions for int16_t and uint16_t.
template <std::integral integral_t>
requires (sizeof(integral_t) == 2)
struct simd_mapping<integral_t> : simd_mapping_crtp<simd_mapping<integral_t>, integral_t>
{
static inline constexpr auto mm512_maskz_mov_epi = simde_mm512_maskz_mov_epi16;
static inline constexpr auto mm512_set1_epi = simde_mm512_set1_epi16;
static inline constexpr auto mm512_add_epi = simde_mm512_add_epi16;
static inline constexpr auto mm512_sub_epi = simde_mm512_sub_epi16;
};

// SIMD instructions for int32_t and uint32_t.
template <std::integral integral_t>
requires (sizeof(integral_t) == 4)
struct simd_mapping<integral_t> : simd_mapping_crtp<simd_mapping<integral_t>, integral_t>
{
static inline constexpr auto mm512_maskz_mov_epi = simde_mm512_maskz_mov_epi32;
static inline constexpr auto mm512_set1_epi = simde_mm512_set1_epi32;
static inline constexpr auto mm512_add_epi = simde_mm512_add_epi32;
static inline constexpr auto mm512_sub_epi = simde_mm512_sub_epi32;
};

// SIMD instructions for int64_t and uint64_t.
template <std::integral integral_t>
requires (sizeof(integral_t) == 8)
struct simd_mapping<integral_t> : simd_mapping_crtp<simd_mapping<integral_t>, integral_t>
{
static inline constexpr auto mm512_maskz_mov_epi = simde_mm512_maskz_mov_epi64;
static inline constexpr auto mm512_set1_epi = simde_mm512_set1_epi64;
static inline constexpr auto mm512_add_epi = simde_mm512_add_epi64;
static inline constexpr auto mm512_sub_epi = simde_mm512_sub_epi64;
};
//!\endcond
#endif

/*!\brief A data structure that behaves like a std::vector and can be used to consolidate the results of multiple calls
* to seqan::hibf::interleaved_bloom_filter::membership_agent_type::bulk_contains.
* \ingroup ibf
Expand All @@ -57,11 +147,11 @@ namespace seqan::hibf
* \include test/snippet/ibf/counting_vector.cpp
*/
template <std::integral value_t>
class counting_vector : public std::vector<value_t>
class counting_vector : public std::vector<value_t, seqan::hibf::contrib::aligned_allocator<value_t, 64u>>
{
private:
//!\brief The base type.
using base_t = std::vector<value_t>;
using base_t = std::vector<value_t, seqan::hibf::contrib::aligned_allocator<value_t, 64u>>;

public:
/*!\name Constructors, destructor and assignment
Expand All @@ -78,46 +168,37 @@ class counting_vector : public std::vector<value_t>
//!\}

/*!\brief Bin-wise adds the bits of a seqan::hibf::bit_vector.
* \param bit_vector The seqan::hibf::bit_vector.
* \attention The counting_vector must be at least as big as `bit_vector`.
*
* \copydetails operator-=(bit_vector const &)
* \details
*
* ### Example
*
* \include test/snippet/ibf/counting_vector.cpp
*/
counting_vector & operator+=(bit_vector const & bit_vector)
{
for_each_set_bin(bit_vector,
[this](size_t const bin)
{
++(*this)[bin];
});
impl<operation::add>(bit_vector);
return *this;
}

/*!\brief Bin-wise subtracts the bits of a seqan::hibf::bit_vector.
* \param bit_vector The seqan::hibf::bit_vector.
* \attention The counting_vector must be at least as big as `bit_vector`.
* \pre `counting_vector.size() >= bit_vector.size()` and either:
* * `bit_vector.size() % 64 == 0`
* \pre or
* * `counting_vector.capacity() >= seqan::hibf::next_multiple_of_64(bit_vector.size())` and
* * &forall; bit &isin; [`bit_vector.size()`, `bit_vector.capacity()`) : `bit_vector[bit] == 0`<br>
* In practive, this condition might be violated by setting bits in a bit_vector and then shrinking it via
* \ref bit_vector::resize() "resize()".
*/
counting_vector & operator-=(bit_vector const & bit_vector)
{
for_each_set_bin(bit_vector,
[this](size_t const bin)
{
assert((*this)[bin] > 0);
--(*this)[bin];
});
impl<operation::sub>(bit_vector);
return *this;
}

/*!\brief Bin-wise addition of two `seqan::hibf::counting_vector`s.
* \param rhs The other seqan::hibf::counting_vector.
* \attention The seqan::hibf::counting_vector must be at least as big as `rhs`.
*
* \copydetails operator-=(counting_vector const &)
* \details
*
* ### Example
*
* \include test/snippet/ibf/counting_vector.cpp
Expand All @@ -131,59 +212,93 @@ class counting_vector : public std::vector<value_t>
return *this;
}

/*!\brief Bin-wise substraction of two `seqan::hibf::counting_vector`s.
/*!\brief Bin-wise subtraction of two `seqan::hibf::counting_vector`s.
* \param rhs The other seqan::hibf::counting_vector.
* \attention The seqan::hibf::counting_vector must be at least as big as `rhs`.
* \pre `counting_vector.size() >= rhs.size()`
*/
counting_vector & operator-=(counting_vector const & rhs)
{
assert(this->size() >= rhs.size()); // The counting vector may be bigger than what we need.

std::transform(this->begin(),
this->end(),
rhs.begin(),
this->begin(),
[](auto a, auto b)
{
assert(a >= b);
return a - b;
});
std::transform(this->begin(), this->end(), rhs.begin(), this->begin(), std::minus<value_t>());

return *this;
}

private:
//!\brief Enumerates all bins of a seqan::hibf::bit_vector.
template <typename on_bin_fn_t>
void for_each_set_bin(bit_vector const & bit_vector, on_bin_fn_t && on_bin_fn)
//!\brief The operation to perform.
enum class operation
{
add,
sub
};

//!\brief Bin-wise adds or subtracts the bits of a seqan::hibf::bit_vector.
template <operation op>
inline void impl(bit_vector const & bit_vector)
{
assert(this->size() >= bit_vector.size()); // The counting vector may be bigger than what we need.
size_t const words = (bit_vector.size() + 63u) >> 6;
uint64_t const * const bitvector_raw = bit_vector.data();
#if HIBF_HAS_AVX512
// AVX512BW: mm512_maskz_mov_epi, mm512_add_epi
// AVX512F: mm512_set1_epi, _mm512_load_si512, _mm512_store_si512
using simd = simd_mapping<value_t>;
using bits_type = typename simd::bits_type;

bits_type const * bit_vector_ptr = reinterpret_cast<bits_type const *>(bit_vector.data());
value_t * counting_vector_ptr = base_t::data();

size_t const bits = next_multiple_of_64(bit_vector.size());
assert(bits <= this->capacity()); // Not enough memory reserved for AVX512 chunk access.
size_t const iterations = bits / simd::bits_per_iterations;

for (size_t iteration = 0; iteration < iterations;
++iteration, ++bit_vector_ptr, counting_vector_ptr += simd::bits_per_iterations)
{
simde__m512i load = simde_mm512_load_si512(counting_vector_ptr);
if constexpr (op == operation::add)
{
load = simd::mm512_add_epi(load, simd::expand_bits(bit_vector_ptr));
}
else
{
load = simd::mm512_sub_epi(load, simd::expand_bits(bit_vector_ptr));
}
simde_mm512_store_si512(counting_vector_ptr, load);
}
#else
size_t const words = divide_and_ceil(bit_vector.size(), 64u);
uint64_t const * const bit_vector_ptr = bit_vector.data();

// Jump to the next 1 and return the number of places jumped in the bit_sequence
auto jump_to_next_1bit = [](uint64_t & x)
// Jump to the next 1 and return the number of jumped bits in value.
auto jump_to_next_1bit = [](uint64_t & value)
{
auto const zeros = std::countr_zero(x);
x >>= zeros; // skip number of zeros
auto const zeros = std::countr_zero(value);
value >>= zeros; // skip number of zeros
return zeros;
};

// Each iteration can handle 64 bits
for (size_t batch = 0; batch < words; ++batch)
// Each iteration can handle 64 bits, i.e., one word.
for (size_t iteration = 0; iteration < words; ++iteration)
{
// get 64 bits starting at position `bit_pos`
uint64_t bit_sequence = bitvector_raw[batch];
uint64_t current_word = bit_vector_ptr[iteration];

// process each relative bin inside the bit_sequence
for (size_t bin = batch << 6; bit_sequence != 0u; ++bin, bit_sequence >>= 1)
// For each set bit in the current word, add/subtract 1 to the corresponding bin.
for (size_t bin = iteration * 64u; current_word != 0u; ++bin, current_word >>= 1)
{
// Jump to the next 1 and
bin += jump_to_next_1bit(bit_sequence);
// Jump to the next 1
bin += jump_to_next_1bit(current_word);

on_bin_fn(bin);
if constexpr (op == operation::add)
{
++(*this)[bin];
}
else
{
--(*this)[bin];
}
}
}
#endif
}
};

Expand Down
12 changes: 12 additions & 0 deletions include/hibf/platform.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,18 @@
# define HIBF_DISABLE_COMPILER_CHECK
#endif // HIBF_DOXYGEN_ONLY(1)0

/*!\def HIBF_HAS_AVX512
* \brief Whether AVX512F and AVX512BW are available.
* \ingroup core
*/
#ifndef HIBF_HAS_AVX512
# if __AVX512F__ && __AVX512BW__
# define HIBF_HAS_AVX512 1
# else
# define HIBF_HAS_AVX512 0
# endif
#endif

// ============================================================================
// Compiler support GCC
// ============================================================================
Expand Down
Loading

0 comments on commit 2826ef5

Please sign in to comment.