Skip to content

Commit

Permalink
[FEATURE] emplace_exists
Browse files Browse the repository at this point in the history
  • Loading branch information
eseiler committed Jul 11, 2024
1 parent fead38a commit 7814eb3
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 12 deletions.
20 changes: 12 additions & 8 deletions include/hibf/interleaved_bloom_filter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,10 @@ class interleaved_bloom_filter : private seqan::hibf::bit_vector
return h;
}

//!\brief Helper function to reduce code-duplication between emplace and emplace_exists.
template <bool check_exists>
inline auto emplace_impl(size_t const value, bin_index const bin) noexcept;

public:
class membership_agent_type; // documented upon definition below
template <std::integral value_t>
Expand Down Expand Up @@ -231,8 +235,6 @@ class interleaved_bloom_filter : private seqan::hibf::bit_vector
* \param[in] value The raw numeric value to process.
* \param[in] bin The bin index to insert into.
*
* \attention This function is only available for **uncompressed** Interleaved Bloom Filters.
*
* \details
*
* ### Example
Expand All @@ -241,11 +243,17 @@ class interleaved_bloom_filter : private seqan::hibf::bit_vector
*/
void emplace(size_t const value, bin_index const bin) noexcept;

/*!\brief Inserts a value into a specific bin and returns whether the value already existed.
* \param[in] value The raw numeric value to process.
* \param[in] bin The bin index to insert into.
* \returns `true` if the value already existed, `false` otherwise.
* \sa seqan::hibf::interleaved_bloom_filter::emplace
*/
[[nodiscard]] bool emplace_exists(size_t const value, bin_index const bin) noexcept;

/*!\brief Clears a specific bin.
* \param[in] bin The bin index to clear.
*
* \attention This function is only available for **uncompressed** Interleaved Bloom Filters.
*
* \details
*
* ### Example
Expand All @@ -259,8 +267,6 @@ class interleaved_bloom_filter : private seqan::hibf::bit_vector
* seqan::hibf::bin_index.
* \param[in] bin_range The range of bins to clear.
*
* \attention This function is only available for **uncompressed** Interleaved Bloom Filters.
*
* \details
*
* ### Example
Expand All @@ -287,8 +293,6 @@ class interleaved_bloom_filter : private seqan::hibf::bit_vector
* \param[in] new_bins_ The new number of bins.
* \throws std::invalid_argument If passed number of bins is smaller than current number of bins.
*
* \attention This function is only available for **uncompressed** Interleaved Bloom Filters.
* \attention The new number of bins must be greater or equal to the current number of bins.
* \attention This function invalidates all seqan::hibf::interleaved_bloom_filter::membership_agent_type constructed for
* this Interleaved Bloom Filter.
*
Expand Down
27 changes: 25 additions & 2 deletions src/interleaved_bloom_filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,16 +85,39 @@ interleaved_bloom_filter::interleaved_bloom_filter(config & configuration) :
}
}

void interleaved_bloom_filter::emplace(size_t const value, bin_index const bin) noexcept
template <bool check_exists>
inline auto interleaved_bloom_filter::emplace_impl(size_t const value, bin_index const bin) noexcept
{
assert(bin.value < bins);

[[maybe_unused]] bool exists{true};

for (size_t i = 0; i < hash_funs; ++i)
{
size_t idx = hash_and_fit(value, hash_seeds[i]);
idx += bin.value;
assert(idx < size());
(*this)[idx] = 1;

// Constructing the reference twice for emplace_exists would impact performance.
// No difference for emplace.
seqan::hibf::bit_vector::reference bit_reference{(*this)[idx]};
if constexpr (check_exists)
exists &= bit_reference;
bit_reference = 1;
};

if constexpr (check_exists)
return exists;
};

void interleaved_bloom_filter::emplace(size_t const value, bin_index const bin) noexcept
{
return emplace_impl<false>(value, bin);
}

bool interleaved_bloom_filter::emplace_exists(size_t const value, bin_index const bin) noexcept
{
return emplace_impl<true>(value, bin);
}

void interleaved_bloom_filter::clear(bin_index const bin) noexcept
Expand Down
22 changes: 20 additions & 2 deletions test/performance/ibf/interleaved_bloom_filter_benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,8 @@ inline benchmark::Counter elements_per_second(size_t const count)
return benchmark::Counter(count, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::OneK::kIs1000);
}

void emplace_benchmark(::benchmark::State & state)
template <bool check_exists>
inline void emplace_benchmark_impl(::benchmark::State & state)
{
auto const & [values, original_ibf] = set_up(state);

Expand All @@ -107,17 +108,33 @@ void emplace_benchmark(::benchmark::State & state)
for (auto _ : state)
{
size_t bin_index = 0u;
[[maybe_unused]] size_t result{};
for (auto && chunk : seqan::stl::views::chunk(values, chunk_size))
{
for (auto value : chunk)
ibf.emplace(value, seqan::hibf::bin_index{bin_index});
if constexpr (check_exists)
result += ibf.emplace_exists(value, seqan::hibf::bin_index{bin_index});
else
ibf.emplace(value, seqan::hibf::bin_index{bin_index});
++bin_index;
}
if constexpr (check_exists)
benchmark::DoNotOptimize(result);
}

state.counters["elements"] = elements_per_second(number_of_elements);
}

void emplace_benchmark(::benchmark::State & state)
{
emplace_benchmark_impl<false>(state);
}

void emplace_exists_benchmark(::benchmark::State & state)
{
emplace_benchmark_impl<true>(state);
}

void clear_benchmark(::benchmark::State & state)
{
auto const & [values, original_ibf] = set_up(state);
Expand Down Expand Up @@ -193,6 +210,7 @@ void bulk_count_benchmark(::benchmark::State & state)
}

BENCHMARK(emplace_benchmark)->RangeMultiplier(2)->Range(64, 1024);
BENCHMARK(emplace_exists_benchmark)->RangeMultiplier(2)->Range(64, 1024);
BENCHMARK(clear_benchmark)->RangeMultiplier(2)->Range(64, 1024);
BENCHMARK(clear_range_benchmark)->RangeMultiplier(2)->Range(64, 1024);
BENCHMARK(bulk_contains_benchmark)->RangeMultiplier(2)->Range(64, 1024);
Expand Down
20 changes: 20 additions & 0 deletions test/unit/hibf/interleaved_bloom_filter_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,26 @@ TEST(ibf_test, emplace)
}
}

TEST(ibf_test, emplace_exists)
{
// 1. Construct and emplace
seqan::hibf::interleaved_bloom_filter ibf{seqan::hibf::bin_count{128u},
seqan::hibf::bin_size{512},
seqan::hibf::hash_function_count{2u}};

for (size_t bin_idx : std::views::iota(0, 64))
for (size_t hash : std::views::iota(0, 64))
ibf.emplace(hash, seqan::hibf::bin_index{bin_idx});

// 2. Test for correctness
for (size_t bin_idx : std::views::iota(0, 64))
for (size_t hash : std::views::iota(0, 64))
ASSERT_TRUE(ibf.emplace_exists(hash, seqan::hibf::bin_index{bin_idx}));

for (size_t bin_idx : std::views::iota(64, 128))
ASSERT_FALSE(ibf.emplace_exists(0u, seqan::hibf::bin_index{bin_idx}));
}

TEST(ibf_test, clear)
{
// 1. Construct and emplace
Expand Down

0 comments on commit 7814eb3

Please sign in to comment.