Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FEATURE] emplace_exists #212

Merged
merged 1 commit into from
Jul 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 12 additions & 7 deletions include/hibf/interleaved_bloom_filter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,10 @@ class interleaved_bloom_filter : private seqan::hibf::bit_vector
return h;
}

//!\brief Helper function to reduce code-duplication between emplace and emplace_exists.
template <bool check_exists>
inline auto emplace_impl(size_t const value, bin_index const bin) noexcept;

public:
class membership_agent_type; // documented upon definition below
template <std::integral value_t>
Expand Down Expand Up @@ -231,8 +235,6 @@ class interleaved_bloom_filter : private seqan::hibf::bit_vector
* \param[in] value The raw numeric value to process.
* \param[in] bin The bin index to insert into.
*
* \attention This function is only available for **uncompressed** Interleaved Bloom Filters.
*
* \details
*
* ### Example
Expand All @@ -241,11 +243,17 @@ class interleaved_bloom_filter : private seqan::hibf::bit_vector
*/
void emplace(size_t const value, bin_index const bin) noexcept;

/*!\brief Inserts a value into a specific bin and returns whether the value already existed.
* \param[in] value The raw numeric value to process.
* \param[in] bin The bin index to insert into.
* \returns `true` if the value already existed, `false` otherwise.
* \sa seqan::hibf::interleaved_bloom_filter::emplace
*/
[[nodiscard]] bool emplace_exists(size_t const value, bin_index const bin) noexcept;

/*!\brief Clears a specific bin.
* \param[in] bin The bin index to clear.
*
* \attention This function is only available for **uncompressed** Interleaved Bloom Filters.
*
* \details
*
* ### Example
Expand All @@ -259,8 +267,6 @@ class interleaved_bloom_filter : private seqan::hibf::bit_vector
* seqan::hibf::bin_index.
* \param[in] bin_range The range of bins to clear.
*
* \attention This function is only available for **uncompressed** Interleaved Bloom Filters.
*
* \details
*
* ### Example
Expand All @@ -287,7 +293,6 @@ class interleaved_bloom_filter : private seqan::hibf::bit_vector
* \param[in] new_bins_ The new number of bins.
* \throws std::invalid_argument If passed number of bins is smaller than current number of bins.
*
* \attention This function is only available for **uncompressed** Interleaved Bloom Filters.
* \attention The new number of bins must be greater or equal to the current number of bins.
eseiler marked this conversation as resolved.
Show resolved Hide resolved
* \attention This function invalidates all seqan::hibf::interleaved_bloom_filter::membership_agent_type constructed for
* this Interleaved Bloom Filter.
Expand Down
27 changes: 25 additions & 2 deletions src/interleaved_bloom_filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,16 +85,39 @@ interleaved_bloom_filter::interleaved_bloom_filter(config & configuration) :
}
}

void interleaved_bloom_filter::emplace(size_t const value, bin_index const bin) noexcept
template <bool check_exists>
inline auto interleaved_bloom_filter::emplace_impl(size_t const value, bin_index const bin) noexcept
{
assert(bin.value < bins);

[[maybe_unused]] bool exists{true};

for (size_t i = 0; i < hash_funs; ++i)
{
size_t idx = hash_and_fit(value, hash_seeds[i]);
idx += bin.value;
assert(idx < size());
(*this)[idx] = 1;

// Constructing the reference twice for emplace_exists would impact performance.
// No difference for emplace.
seqan::hibf::bit_vector::reference bit_reference{(*this)[idx]};
if constexpr (check_exists)
exists &= bit_reference;
bit_reference = 1;
Comment on lines +101 to +106
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you mean that

        if constexpr (check_exists)
            exists &= (*this)[idx];
        (*this)[idx] = 1;

Would construct the reference twice and is therefore slower?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it would need to compute the bit position twice. Since there isn't much happening in emplace, doing it twice really affects performance.

emplace         50M/s
emplace_exists  47M/s
emplace_exists* 28M/s

* is with two references

};

if constexpr (check_exists)
return exists;
};

void interleaved_bloom_filter::emplace(size_t const value, bin_index const bin) noexcept
{
return emplace_impl<false>(value, bin);
}

bool interleaved_bloom_filter::emplace_exists(size_t const value, bin_index const bin) noexcept
{
return emplace_impl<true>(value, bin);
}

void interleaved_bloom_filter::clear(bin_index const bin) noexcept
Expand Down
22 changes: 20 additions & 2 deletions test/performance/ibf/interleaved_bloom_filter_benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,8 @@ inline benchmark::Counter elements_per_second(size_t const count)
return benchmark::Counter(count, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::OneK::kIs1000);
}

void emplace_benchmark(::benchmark::State & state)
template <bool check_exists>
inline void emplace_benchmark_impl(::benchmark::State & state)
{
auto const & [values, original_ibf] = set_up(state);

Expand All @@ -107,17 +108,33 @@ void emplace_benchmark(::benchmark::State & state)
for (auto _ : state)
{
size_t bin_index = 0u;
[[maybe_unused]] size_t result{};
for (auto && chunk : seqan::stl::views::chunk(values, chunk_size))
{
for (auto value : chunk)
ibf.emplace(value, seqan::hibf::bin_index{bin_index});
if constexpr (check_exists)
result += ibf.emplace_exists(value, seqan::hibf::bin_index{bin_index});
else
ibf.emplace(value, seqan::hibf::bin_index{bin_index});
++bin_index;
}
if constexpr (check_exists)
benchmark::DoNotOptimize(result);
}

state.counters["elements"] = elements_per_second(number_of_elements);
}

void emplace_benchmark(::benchmark::State & state)
{
emplace_benchmark_impl<false>(state);
}

void emplace_exists_benchmark(::benchmark::State & state)
{
emplace_benchmark_impl<true>(state);
}

void clear_benchmark(::benchmark::State & state)
{
auto const & [values, original_ibf] = set_up(state);
Expand Down Expand Up @@ -193,6 +210,7 @@ void bulk_count_benchmark(::benchmark::State & state)
}

BENCHMARK(emplace_benchmark)->RangeMultiplier(2)->Range(64, 1024);
BENCHMARK(emplace_exists_benchmark)->RangeMultiplier(2)->Range(64, 1024);
BENCHMARK(clear_benchmark)->RangeMultiplier(2)->Range(64, 1024);
BENCHMARK(clear_range_benchmark)->RangeMultiplier(2)->Range(64, 1024);
BENCHMARK(bulk_contains_benchmark)->RangeMultiplier(2)->Range(64, 1024);
Expand Down
20 changes: 20 additions & 0 deletions test/unit/hibf/interleaved_bloom_filter_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,26 @@ TEST(ibf_test, emplace)
}
}

TEST(ibf_test, emplace_exists)
{
// 1. Construct and emplace
seqan::hibf::interleaved_bloom_filter ibf{seqan::hibf::bin_count{128u},
seqan::hibf::bin_size{512},
seqan::hibf::hash_function_count{2u}};

for (size_t bin_idx : std::views::iota(0, 64))
for (size_t hash : std::views::iota(0, 64))
ibf.emplace(hash, seqan::hibf::bin_index{bin_idx});

// 2. Test for correctness
for (size_t bin_idx : std::views::iota(0, 64))
for (size_t hash : std::views::iota(0, 64))
ASSERT_TRUE(ibf.emplace_exists(hash, seqan::hibf::bin_index{bin_idx}));

for (size_t bin_idx : std::views::iota(64, 128))
ASSERT_FALSE(ibf.emplace_exists(0u, seqan::hibf::bin_index{bin_idx}));
}

TEST(ibf_test, clear)
{
// 1. Construct and emplace
Expand Down
Loading