From 9b1acaaf349135c8dc0b54855d1925bce2e3392c Mon Sep 17 00:00:00 2001 From: Enrico Seiler Date: Tue, 10 Dec 2024 08:16:01 +0100 Subject: [PATCH] [FEATURE] Add try_increase_bin_number_to --- include/hibf/interleaved_bloom_filter.hpp | 31 ++++++++++++-- src/interleaved_bloom_filter.cpp | 24 +++++++---- ...loom_filter_try_increase_bin_number_to.cpp | 42 +++++++++++++++++++ ...loom_filter_try_increase_bin_number_to.out | 11 +++++ ...ter_try_increase_bin_number_to.out.license | 3 ++ .../hibf/interleaved_bloom_filter_test.cpp | 22 ++++++++++ 6 files changed, 122 insertions(+), 11 deletions(-) create mode 100644 test/snippet/ibf/interleaved_bloom_filter_try_increase_bin_number_to.cpp create mode 100644 test/snippet/ibf/interleaved_bloom_filter_try_increase_bin_number_to.out create mode 100644 test/snippet/ibf/interleaved_bloom_filter_try_increase_bin_number_to.out.license diff --git a/include/hibf/interleaved_bloom_filter.hpp b/include/hibf/interleaved_bloom_filter.hpp index 0508f694..5c5bb087 100644 --- a/include/hibf/interleaved_bloom_filter.hpp +++ b/include/hibf/interleaved_bloom_filter.hpp @@ -300,13 +300,36 @@ class interleaved_bloom_filter : private seqan::hibf::bit_vector (*this)[bin.value + offset] = 0; } + /*!\brief Tries increasing the number of bins stored in the Interleaved Bloom Filter without reallocating memory. + * \param[in] new_bin_count The new number of bins. + * \returns `true` if the number of bins was set, `false` if the number of bins was not set. + * \sa seqan::hibf::interleaved_bloom_filter::increase_bin_number_to + * + * \attention If the new bin count is greater than the old bin count and this function returns `true`, all + * seqan::hibf::interleaved_bloom_filter::membership_agent_type and + * seqan::hibf::interleaved_bloom_filter::counting_agent_type constructed for this Interleaved Bloom Filter are + * invalidated. + * + * | Condition | Internal Condition | Effect | Return value | + * |-----------------------------------|------------------------|-------------------------------|--------------| + * | New bin count > current bin count | resize is not required | bin count is set to new value | `true` | + * | New bin count > current bin count | resize is required | none | `false` | + * | New bin count = current bin count | none | none | `true` | + * | New bin count < current bin count | none | none | `false` | + * + * ### Example + * + * \include test/snippet/ibf/interleaved_bloom_filter_try_increase_bin_number_to.cpp + */ + bool try_increase_bin_number_to(bin_count const new_bin_count) noexcept; + /*!\brief Increases the number of bins stored in the Interleaved Bloom Filter. - * \param[in] new_bins_ The new number of bins. + * \param[in] new_bin_count The new number of bins. * \throws std::invalid_argument If passed number of bins is smaller than current number of bins. * * \attention The new number of bins must be greater or equal to the current number of bins. - * \attention This function invalidates all seqan::hibf::interleaved_bloom_filter::membership_agent_type constructed for - * this Interleaved Bloom Filter. + * \attention This function invalidates all seqan::hibf::interleaved_bloom_filter::membership_agent_type and + * seqan::hibf::interleaved_bloom_filter::counting_agent_type constructed for this Interleaved Bloom Filter. * * \details * @@ -322,7 +345,7 @@ class interleaved_bloom_filter : private seqan::hibf::bit_vector * * \include test/snippet/ibf/interleaved_bloom_filter_increase_bin_number_to.cpp */ - void increase_bin_number_to(bin_count const new_bins_); + void increase_bin_number_to(bin_count const new_bin_count); //!\} /*!\name Lookup diff --git a/src/interleaved_bloom_filter.cpp b/src/interleaved_bloom_filter.cpp index af1f0968..60e7570e 100644 --- a/src/interleaved_bloom_filter.cpp +++ b/src/interleaved_bloom_filter.cpp @@ -154,20 +154,29 @@ void interleaved_bloom_filter::clear(bin_index const bin) noexcept (*this)[idx] = 0; } -void interleaved_bloom_filter::increase_bin_number_to(seqan::hibf::bin_count const new_bins_) +bool interleaved_bloom_filter::try_increase_bin_number_to(seqan::hibf::bin_count const new_bin_count) noexcept { - size_t const new_bins = new_bins_.value; - - if (new_bins < bins) - throw std::invalid_argument{"The number of new bins must be >= the current number of bins."}; - + size_t const new_bins = new_bin_count.value; size_t const new_bin_words = divide_and_ceil(new_bins, 64u); + if (new_bins < bins || new_bin_words > bin_words) + return false; + bins = new_bins; + return true; +} - if (new_bin_words == bin_words) // No need for internal resize if bin_words does not change. +void interleaved_bloom_filter::increase_bin_number_to(seqan::hibf::bin_count const new_bin_count) +{ + if (new_bin_count.value < bins) + throw std::invalid_argument{"The number of new bins must be >= the current number of bins."}; + + if (try_increase_bin_number_to(new_bin_count)) return; + size_t const new_bins = new_bin_count.value; + size_t const new_bin_words = divide_and_ceil(new_bins, 64u); + size_t const new_technical_bins = new_bin_words * 64u; size_t const new_bit_size = bin_size_ * new_technical_bins; size_t const old_bit_size = size(); @@ -193,6 +202,7 @@ void interleaved_bloom_filter::increase_bin_number_to(seqan::hibf::bin_count con } } + bins = new_bins; bin_words = new_bin_words; technical_bins = new_technical_bins; } diff --git a/test/snippet/ibf/interleaved_bloom_filter_try_increase_bin_number_to.cpp b/test/snippet/ibf/interleaved_bloom_filter_try_increase_bin_number_to.cpp new file mode 100644 index 00000000..4f294855 --- /dev/null +++ b/test/snippet/ibf/interleaved_bloom_filter_try_increase_bin_number_to.cpp @@ -0,0 +1,42 @@ +// SPDX-FileCopyrightText: 2006-2024, Knut Reinert & Freie Universität Berlin +// SPDX-FileCopyrightText: 2016-2024, Knut Reinert & MPI für molekulare Genetik +// SPDX-License-Identifier: CC0-1.0 + +#include // for interleaved_bloom_filter, bin_index, bin_count, bin_size +#include // for print, print_t + +int main() +{ + seqan::hibf::interleaved_bloom_filter ibf{seqan::hibf::bin_count{73u}, seqan::hibf::bin_size{1024u}}; + ibf.emplace(126, seqan::hibf::bin_index{0u}); + ibf.emplace(712, seqan::hibf::bin_index{3u}); + ibf.emplace(237, seqan::hibf::bin_index{9u}); + + // Same bin count has no effect and returns `true`. + bool result = ibf.try_increase_bin_number_to(seqan::hibf::bin_count{73u}); + std::cout << std::boolalpha << result << '\n'; // true + std::cout << ibf.bin_count() << '\n'; // 73 + + // Smaller bin count has no effect and returns `false`. + result = ibf.try_increase_bin_number_to(seqan::hibf::bin_count{50u}); + std::cout << std::boolalpha << result << '\n'; // false + std::cout << ibf.bin_count() << '\n'; // 73 + + // Larger bin count and resize not required increases the bin count and returns `true`. + result = ibf.try_increase_bin_number_to(seqan::hibf::bin_count{128u}); + std::cout << std::boolalpha << result << '\n'; // true + std::cout << ibf.bin_count() << '\n'; // 128 + + // Resize would be required, hence returns `false`. + result = ibf.try_increase_bin_number_to(seqan::hibf::bin_count{129u}); + std::cout << std::boolalpha << result << '\n'; // false + std::cout << ibf.bin_count() << '\n'; // 128 + + // Be sure to get the agent after `try_increase_bin_number_to` as it may invalidate all agents! + auto agent = ibf.membership_agent(); + + // The content of the bins which were already present before the resize does not change + seqan::hibf::print(agent.bulk_contains(126)); // [1,0,0,0,0,0,0,0,0,0,0,...,0] + seqan::hibf::print(agent.bulk_contains(712)); // [0,0,0,1,0,0,0,0,0,0,0,...,0] + seqan::hibf::print(agent.bulk_contains(237)); // [0,0,0,0,0,0,0,0,0,1,0,...,0] +} diff --git a/test/snippet/ibf/interleaved_bloom_filter_try_increase_bin_number_to.out b/test/snippet/ibf/interleaved_bloom_filter_try_increase_bin_number_to.out new file mode 100644 index 00000000..5c7d016d --- /dev/null +++ b/test/snippet/ibf/interleaved_bloom_filter_try_increase_bin_number_to.out @@ -0,0 +1,11 @@ +true +73 +false +73 +true +128 +false +128 +[1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] +[0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] +[0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] diff --git a/test/snippet/ibf/interleaved_bloom_filter_try_increase_bin_number_to.out.license b/test/snippet/ibf/interleaved_bloom_filter_try_increase_bin_number_to.out.license new file mode 100644 index 00000000..e345ee05 --- /dev/null +++ b/test/snippet/ibf/interleaved_bloom_filter_try_increase_bin_number_to.out.license @@ -0,0 +1,3 @@ +SPDX-FileCopyrightText: 2006-2024, Knut Reinert & Freie Universität Berlin +SPDX-FileCopyrightText: 2016-2024, Knut Reinert & MPI für molekulare Genetik +SPDX-License-Identifier: CC0-1.0 diff --git a/test/unit/hibf/interleaved_bloom_filter_test.cpp b/test/unit/hibf/interleaved_bloom_filter_test.cpp index 9a3d35a8..8aff0be2 100644 --- a/test/unit/hibf/interleaved_bloom_filter_test.cpp +++ b/test/unit/hibf/interleaved_bloom_filter_test.cpp @@ -387,6 +387,28 @@ TEST(ibf_test, counting_agent_no_ub) EXPECT_RANGE_EQ(agent2.bulk_count(std::views::iota(0u, 128u)), expected); } +TEST(ibf_test, try_increase_bin_number_to) +{ + seqan::hibf::interleaved_bloom_filter ibf{seqan::hibf::bin_count{73u}, seqan::hibf::bin_size{1024u}}; + size_t const original_bitsize{ibf.bit_size()}; + + EXPECT_TRUE(ibf.try_increase_bin_number_to({73u})); + EXPECT_EQ(ibf.bin_count(), 73u); + EXPECT_EQ(ibf.bit_size(), original_bitsize); + + EXPECT_FALSE(ibf.try_increase_bin_number_to({50u})); + EXPECT_EQ(ibf.bin_count(), 73u); + EXPECT_EQ(ibf.bit_size(), original_bitsize); + + EXPECT_TRUE(ibf.try_increase_bin_number_to({128u})); + EXPECT_EQ(ibf.bin_count(), 128u); + EXPECT_EQ(ibf.bit_size(), original_bitsize); + + EXPECT_FALSE(ibf.try_increase_bin_number_to({129u})); + EXPECT_EQ(ibf.bin_count(), 128u); + EXPECT_EQ(ibf.bit_size(), original_bitsize); +} + TEST(ibf_test, increase_bin_number_to) { seqan::hibf::interleaved_bloom_filter ibf{seqan::hibf::bin_count{73u}, seqan::hibf::bin_size{1024u}};