From 59e653e65e14077ccf709ae71140e308c69a7007 Mon Sep 17 00:00:00 2001 From: Svenja Mehringer Date: Tue, 17 Oct 2023 13:34:35 +0200 Subject: [PATCH 1/3] [FEATURE] Allow different merged bin FPR. --- include/hibf/config.hpp | 59 ++++++++++---- include/hibf/layout/hierarchical_binning.hpp | 78 ++++++++++++++++--- src/build/construct_ibf.cpp | 19 +++-- src/config.cpp | 8 ++ src/layout/hierarchical_binning.cpp | 51 +++++------- test/unit/hibf/config_test.cpp | 29 +++++++ ...archical_interleaved_bloom_filter_test.cpp | 1 + .../hibf/layout/hierarchical_binning_test.cpp | 2 +- 8 files changed, 188 insertions(+), 59 deletions(-) mode change 100755 => 100644 src/layout/hierarchical_binning.cpp diff --git a/include/hibf/config.hpp b/include/hibf/config.hpp index 82b6ba29..8536c33a 100644 --- a/include/hibf/config.hpp +++ b/include/hibf/config.hpp @@ -30,19 +30,20 @@ namespace seqan::hibf * * Here is the list of all configs options: * - * | Type | Option Name | Default | Note | - * |:--------|:-------------------------------------------------|:-------:|:-----------------------| - * | General | seqan::hibf::config::input_fn | - | [REQUIRED] | - * | General | seqan::hibf::config::number_of_user_bins | - | [REQUIRED] | - * | General | seqan::hibf::config::number_of_hash_functions | 2 | | - * | General | seqan::hibf::config::maximum_false_positive_rate | 0.05 | [RECOMMENDED_TO_ADAPT] | - * | General | seqan::hibf::config::threads | 1 | [RECOMMENDED_TO_ADAPT] | - * | Layout | seqan::hibf::config::sketch_bits | 12 | | - * | Layout | seqan::hibf::config::tmax | 0 | 0 indicates unset | - * | Layout | seqan::hibf::config::max_rearrangement_ratio | 0.5 | | - * | Layout | seqan::hibf::config::alpha | 1.2 | | - * | Layout | seqan::hibf::config::disable_estimate_union | false | | - * | Layout | seqan::hibf::config::disable_rearrangement | false | | + * | Type | Option Name | Default | Note | + * |:--------|:------------------------------------------------------------|:-------:|:-----------------------| + * | General | seqan::hibf::config::input_fn | - | [REQUIRED] | + * | General | seqan::hibf::config::number_of_user_bins | - | [REQUIRED] | + * | General | seqan::hibf::config::number_of_hash_functions | 2 | | + * | General | seqan::hibf::config::maximum_false_positive_rate | 0.05 | [RECOMMENDED_TO_ADAPT] | + * | General | seqan::hibf::config::relaxed_fpr | 0.3 | | + * | General | seqan::hibf::config::threads | 1 | [RECOMMENDED_TO_ADAPT] | + * | Layout | seqan::hibf::config::sketch_bits | 12 | | + * | Layout | seqan::hibf::config::tmax | 0 | 0 indicates unset | + * | Layout | seqan::hibf::config::max_rearrangement_ratio | 0.5 | | + * | Layout | seqan::hibf::config::alpha | 1.2 | | + * | Layout | seqan::hibf::config::disable_estimate_union | false | | + * | Layout | seqan::hibf::config::disable_rearrangement | false | | * * As a copy and paste source, here are all config options with their defaults: * @@ -148,6 +149,34 @@ struct config */ double maximum_false_positive_rate{0.05}; + /*!\brief Allow a higher FPR in non-accuracy-critical parts of the HIBF structure. + * + * Some parts in the hierarchical structure are not critical to ensure the seqan::hibf::config::maximum_false_positive_rate. + * These can be allowed to have a higher FPR to reduce the overall space consumption taking into account a small + * decrease in runtime performance. + * + * Value must be in range [0,1]. + * Value must be equal to or larger than seqan::hibf::config::maximum_false_positive_rate. + * Recommendation: default value (0.3) + * + * ### Technical details + * + * Merged bins in an HIBF layout will always be followed by one or more lower-level IBFs that will have split bins + * or single bins (split = 1) to recover the original user bins. Thus, the FPR of merged bins does not determine the + * seqan::hibf::config::maximum_false_positive_rate, but is independent. Choosing a higher FPR for merged bins can + * lower the memory requirement but increases the runtime. Experiments show that the decrease in memory is + * significant, while the runtime suffers only slightly. The accuracy of the results is not affected by this + * parameter. + * + * Note: For each IBF there is a limit to how high the FPR of merged bins can be. Specifically, the FPR for merged + * bins can never decrease the IBF size more than what is needed to ensure the + * seqan::hibf::config::maximum_false_positive_rate for split bins. This means that, at some point, choosing even + * higher values for this parameter will have no effect anymore. + * + * \sa [Bloom Filter Calculator](https://hur.st/bloomfilter/). + */ + double relaxed_fpr{0.3}; + /*!\brief The number of threads to use during construction. [RECOMMENDED_TO_ADAPT] * * Using more threads increases the memory consumption during construction because the threads hold local @@ -265,6 +294,9 @@ struct config * Constrains: * * seqan::hibf::config::number_of_hash_functions must be in `[1,5]`. * * seqan::hibf::config::maximum_false_positive_rate must be in `(0.0,1.0)`. + * * seqan::hibf::config::relaxed_fpr must be in `[0.0,1.0]`. + * * seqan::hibf::config::relaxed_fpr must be equal to or larger than + * seqan::hibf::config::maximum_false_positive_rate. * * seqan::hibf::config::threads must be greater than `0`. * * seqan::hibf::config::sketch_bits must be in `[5,32]`. * * seqan::hibf::config::tmax must be at most `18446744073709551552`. @@ -292,6 +324,7 @@ struct config archive(CEREAL_NVP(number_of_user_bins)); archive(CEREAL_NVP(number_of_hash_functions)); archive(CEREAL_NVP(maximum_false_positive_rate)); + archive(CEREAL_NVP(relaxed_fpr)); archive(CEREAL_NVP(threads)); archive(CEREAL_NVP(sketch_bits)); diff --git a/include/hibf/layout/hierarchical_binning.hpp b/include/hibf/layout/hierarchical_binning.hpp index 3ca42b8e..abce1310 100644 --- a/include/hibf/layout/hierarchical_binning.hpp +++ b/include/hibf/layout/hierarchical_binning.hpp @@ -9,9 +9,10 @@ #include // for addressof, pair #include // for vector -#include // for config -#include // for data_store -#include // for HIBF_WORKAROUND_GCC_BOGUS_MEMCPY +#include // for bin_size_in_bits +#include // for config +#include // for data_store +#include // for HIBF_WORKAROUND_GCC_BOGUS_MEMCPY namespace seqan::hibf::layout { @@ -32,6 +33,69 @@ class hierarchical_binning //!\brief The number of technical bins requested by the user. size_t num_technical_bins{}; + //!\brief Simplifies passing the parameters needed for tracking the maximum technical bin. + struct maximum_bin_tracker + { + size_t max_id{}; //!< The ID of the technical bin with maximal size. + size_t max_size{}; //!< The maximum technical bin size seen so far. + size_t max_split_id{}; //!< The ID of the split bin with maximal size (if any). + size_t max_split_size{}; //!< The maximum split bin size seen so far. + + void update_max(size_t const new_id, size_t const new_size) + { + if (new_size > max_size) + { + max_id = new_id; + max_size = new_size; + } + } + + //!\brief Split cardinality `new_size` must already account for fpr-correction. + void update_split_max(size_t const new_id, size_t const new_size) + { + if (new_size > max_split_size) + { + max_split_id = new_id; + max_split_size = new_size; + } + } + + /*!\brief Decides which bin is reported as the maximum bin. + *\param config The HIBF configuration. + *\return The chosen max bin id. + * + * As a HIBF feature, the merged bin FPR can differ from the overall maximum FPR. Merged bins in an HIBF layout + * will always be followed by one or more lower-level IBFs that will have split bins or single bins (split = 1) + * to recover the original user bins. + * + * We need to make sure, though, that downsizing merged bins does not affect split bins. + * Therefore, we check if choosing a merged bin as the max bin violates the minimum_bits needed for split bins. + * If so, we can report the largest split bin as the max bin as it will choose the correct size and downsize + * larger merged bins only a little. + */ + size_t choose_max_bin(seqan::hibf::config const & config) + { + if (max_id == max_split_id) // Overall max bin is a split bin. + return max_id; + + // the minimum size of the TBs of this IBF to ensure the maximum_false_positive_rate for split bins + size_t const minimum_bits{build::bin_size_in_bits({.fpr = config.maximum_false_positive_rate, + .hash_count = config.number_of_hash_functions, + .elements = max_split_size})}; + + // the potential size of the TBs of this IBF given the allowed merged bin FPR + size_t const merged_bits{build::bin_size_in_bits({.fpr = config.relaxed_fpr, + .hash_count = config.number_of_hash_functions, + .elements = max_size})}; + + // If split and merged bits are the same, we prefer merged bins. Better for build parallelisation. + if ((minimum_bits > merged_bits)) + return max_split_id; + + return max_id; + } + }; + public: hierarchical_binning() = default; //!< Defaulted. hierarchical_binning(hierarchical_binning const &) = delete; //!< Deleted. Would modify same data. @@ -123,15 +187,13 @@ class hierarchical_binning void backtrack_merged_bin(size_t trace_j, size_t const next_j, size_t const bin_id, - size_t & high_level_max_id, - size_t & high_level_max_size, + maximum_bin_tracker & max_tracker, bool is_first_row = false); void backtrack_split_bin(size_t trace_j, size_t const number_of_bins, size_t const bin_id, - size_t & high_level_max_id, - size_t & high_level_max_size); + maximum_bin_tracker & max_tracker); //!\brief Backtracks the trace matrix and writes the resulting binning into the output file. size_t backtracking(std::vector>> const & trace); @@ -143,8 +205,6 @@ class hierarchical_binning void update_libf_data(data_store & libf_data, size_t const bin_id) const; size_t add_lower_level(data_store & libf_data) const; - - void update_max_id(size_t & max_id, size_t & max_size, size_t const new_id, size_t const new_size) const; }; } // namespace seqan::hibf::layout diff --git a/src/build/construct_ibf.cpp b/src/build/construct_ibf.cpp index b241054e..953a58dd 100644 --- a/src/build/construct_ibf.cpp +++ b/src/build/construct_ibf.cpp @@ -28,12 +28,21 @@ seqan::hibf::interleaved_bloom_filter construct_ibf(robin_hood::unordered_flat_s build_data & data, bool is_root) { - size_t const kmers_per_bin{static_cast(std::ceil(static_cast(kmers.size()) / number_of_bins))}; - double const bin_bits{static_cast(bin_size_in_bits({.fpr = data.config.maximum_false_positive_rate, - .hash_count = data.config.number_of_hash_functions, - .elements = kmers_per_bin}))}; + bool const max_bin_is_merged = ibf_node.favourite_child_idx.has_value(); + assert(!max_bin_is_merged || number_of_bins == 1u); // merged max bin implies (=>) number of bins == 1 + + size_t const kmers_per_bin{(kmers.size() + number_of_bins - 1u) / number_of_bins}; // Integer ceil + double const fpr = max_bin_is_merged ? data.config.relaxed_fpr + : data.config.maximum_false_positive_rate; + + size_t const bin_bits{bin_size_in_bits({.fpr = fpr, // + .hash_count = data.config.number_of_hash_functions, + .elements = kmers_per_bin})}; + // data.fpr_correction[1] == 1.0, but we can avoid floating point operations with the ternary. + // Check number_of_bins instead of max_bin_is_merged, because split bins can also occupy only one technical bin. seqan::hibf::bin_size const bin_size{ - static_cast(std::ceil(bin_bits * data.fpr_correction[number_of_bins]))}; + number_of_bins == 1u ? bin_bits + : static_cast(std::ceil(bin_bits * data.fpr_correction[number_of_bins]))}; seqan::hibf::bin_count const bin_count{ibf_node.number_of_technical_bins}; timer local_index_allocation_timer{}; diff --git a/src/config.cpp b/src/config.cpp index 9c36453f..eabea357 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -75,6 +75,14 @@ void config::validate_and_set_defaults() if (maximum_false_positive_rate <= 0.0 || maximum_false_positive_rate >= 1.0) throw std::invalid_argument{"[HIBF CONFIG ERROR] config::maximum_false_positive_rate must be in (0.0,1.0)."}; + if (relaxed_fpr <= 0.0 || relaxed_fpr >= 1.0) + throw std::invalid_argument{ + "[HIBF CONFIG ERROR] config::relaxed_fpr must be in (0.0,1.0)."}; + + if (relaxed_fpr < maximum_false_positive_rate) + throw std::invalid_argument{"[HIBF CONFIG ERROR] config::relaxed_fpr must be " + "greater than or equal to config::maximum_false_positive_rate."}; + if (threads == 0u) throw std::invalid_argument{"[HIBF CONFIG ERROR] config::threads must be greater than 0."}; diff --git a/src/layout/hierarchical_binning.cpp b/src/layout/hierarchical_binning.cpp old mode 100755 new mode 100644 index 12e76b53..333b9efc --- a/src/layout/hierarchical_binning.cpp +++ b/src/layout/hierarchical_binning.cpp @@ -238,8 +238,7 @@ void hierarchical_binning::recursion(std::vector> & matrix, void hierarchical_binning::backtrack_merged_bin(size_t trace_j, size_t const next_j, size_t const bin_id, - size_t & high_level_max_id, - size_t & high_level_max_size, + maximum_bin_tracker & max_tracker, bool is_first_row) { size_t kmer_count = (*data->kmer_counts)[data->positions[trace_j]]; @@ -266,30 +265,33 @@ void hierarchical_binning::backtrack_merged_bin(size_t trace_j, if (!config.disable_estimate_union) kmer_count = sketch.estimate(); // overwrite kmer_count high_level_max_id/size bin - update_max_id(high_level_max_id, high_level_max_size, bin_id, kmer_count); + max_tracker.update_max(bin_id, kmer_count); // std::cout << "]: " << kmer_count << std::endl; } void hierarchical_binning::backtrack_split_bin(size_t trace_j, size_t const number_of_bins, size_t const bin_id, - size_t & high_level_max_id, - size_t & high_level_max_size) + maximum_bin_tracker & max_tracker) { assert(number_of_bins > 0u); - size_t cardinality = (*data->kmer_counts)[data->positions[trace_j]]; - size_t const corrected_cardinality = static_cast(cardinality * data->fpr_correction[number_of_bins]); - // NOLINTNEXTLINE(clang-analyzer-core.DivideZero) - size_t const cardinality_per_bin = (corrected_cardinality + number_of_bins - 1) / number_of_bins; // round up + // update layout data->hibf_layout->user_bins.emplace_back(data->previous.bin_indices, bin_id, number_of_bins, data->positions[trace_j]); - // std::cout << "split " << trace_j << " into " << number_of_bins << ": " << cardinality_per_bin << std::endl; + // update max bin + size_t const cardinality = (*data->kmer_counts)[data->positions[trace_j]]; + size_t const corrected_cardinality = static_cast(cardinality * data->fpr_correction[number_of_bins]); + // NOLINTNEXTLINE(clang-analyzer-core.DivideZero) + size_t const cardinality_per_bin = (corrected_cardinality + number_of_bins - 1) / number_of_bins; // round up - update_max_id(high_level_max_id, high_level_max_size, bin_id, cardinality_per_bin); + max_tracker.update_max(bin_id, cardinality_per_bin); + max_tracker.update_split_max(bin_id, cardinality_per_bin); + + // std::cout << "split " << trace_j << " into " << number_of_bins << ": " << cardinality_per_bin << std::endl; } size_t hierarchical_binning::backtracking(std::vector>> const & trace) @@ -301,9 +303,8 @@ size_t hierarchical_binning::backtracking(std::vector 0u && trace_i > 0u) @@ -316,14 +317,14 @@ size_t hierarchical_binning::backtracking(std::vector 0u) // the last UBs get merged into the remaining TB { // we are in the first row, merging the remaining UBs into the last TB (TB-0) - backtrack_merged_bin(trace_j, 0, bin_id, high_level_max_id, high_level_max_size, true); + backtrack_merged_bin(trace_j, 0, bin_id, max_tracker, true); } else if (trace_j == 0u) // the last UB is split into the remaining TBs { // we are in the first column, splitting the last UB (UB-0) into the remaining TBs (even if only into 1 bin). - backtrack_split_bin(trace_j, trace_i + 1, bin_id, high_level_max_id, high_level_max_size); + backtrack_split_bin(trace_j, trace_i + 1, bin_id, max_tracker); } - return high_level_max_id; + return max_tracker.choose_max_bin(config); } data_store hierarchical_binning::initialise_libf_data(size_t const trace_j) const @@ -404,16 +405,4 @@ size_t hierarchical_binning::add_lower_level(data_store & libf_data) const } } -void hierarchical_binning::update_max_id(size_t & max_id, - size_t & max_size, - size_t const new_id, - size_t const new_size) const -{ - if (new_size > max_size) - { - max_id = new_id; - max_size = new_size; - } -} - } // namespace seqan::hibf::layout diff --git a/test/unit/hibf/config_test.cpp b/test/unit/hibf/config_test.cpp index 6529be67..4752aac2 100644 --- a/test/unit/hibf/config_test.cpp +++ b/test/unit/hibf/config_test.cpp @@ -39,6 +39,7 @@ TEST(config_test, write_to) "@ \"number_of_user_bins\": 123456789,\n" "@ \"number_of_hash_functions\": 4,\n" "@ \"maximum_false_positive_rate\": 0.0001,\n" + "@ \"relaxed_fpr\": 0.3,\n" "@ \"threads\": 31,\n" "@ \"sketch_bits\": 8,\n" "@ \"tmax\": 128,\n" @@ -62,6 +63,7 @@ TEST(config_test, read_from) "@ \"number_of_user_bins\": 123456789,\n" "@ \"number_of_hash_functions\": 4,\n" "@ \"maximum_false_positive_rate\": 0.0001,\n" + "@ \"relaxed_fpr\": 0.3,\n" "@ \"threads\": 31,\n" "@ \"sketch_bits\": 8,\n" "@ \"tmax\": 128,\n" @@ -79,6 +81,7 @@ TEST(config_test, read_from) EXPECT_EQ(configuration.number_of_user_bins, 123456789); EXPECT_EQ(configuration.number_of_hash_functions, 4); EXPECT_EQ(configuration.maximum_false_positive_rate, 0.0001); + EXPECT_EQ(configuration.relaxed_fpr, 0.3); EXPECT_EQ(configuration.threads, 31); EXPECT_EQ(configuration.sketch_bits, 8); EXPECT_EQ(configuration.tmax, 128); @@ -102,6 +105,7 @@ TEST(config_test, read_from_with_more_meta) "@ \"number_of_user_bins\": 123456789,\n" "@ \"number_of_hash_functions\": 4,\n" "@ \"maximum_false_positive_rate\": 0.0001,\n" + "@ \"relaxed_fpr\": 0.3,\n" "@ \"threads\": 31,\n" "@ \"sketch_bits\": 8,\n" "@ \"tmax\": 128,\n" @@ -119,6 +123,7 @@ TEST(config_test, read_from_with_more_meta) EXPECT_EQ(configuration.number_of_user_bins, 123456789); EXPECT_EQ(configuration.number_of_hash_functions, 4); EXPECT_EQ(configuration.maximum_false_positive_rate, 0.0001); + EXPECT_EQ(configuration.relaxed_fpr, 0.3); EXPECT_EQ(configuration.threads, 31); EXPECT_EQ(configuration.sketch_bits, 8); EXPECT_EQ(configuration.tmax, 128); @@ -186,6 +191,30 @@ TEST(config_test, validate_and_set_defaults) "[HIBF CONFIG ERROR] config::maximum_false_positive_rate must be in (0.0,1.0)."); } + // relaxed_fpr must be in (0.0,1.0) + { + seqan::hibf::config configuration{.input_fn = dummy_input_fn, + .number_of_user_bins = 1u, + .relaxed_fpr = -0.1}; + check_error_message(configuration, + "[HIBF CONFIG ERROR] config::relaxed_fpr must be in [0.0,1.0]."); + + configuration.relaxed_fpr = 1.1; + check_error_message(configuration, + "[HIBF CONFIG ERROR] config::relaxed_fpr must be in [0.0,1.0]."); + } + + // relaxed_fpr must equal to or greater than maximum_false_positive_rate + { + seqan::hibf::config configuration{.input_fn = dummy_input_fn, + .number_of_user_bins = 1u, + .maximum_false_positive_rate = 0.3, + .relaxed_fpr = 0.2}; + check_error_message(configuration, + "[HIBF CONFIG ERROR] config::relaxed_fpr must be " + "greater than or equal to config::maximum_false_positive_rate."); + } + // threads cannot be 0 { seqan::hibf::config configuration{.input_fn = dummy_input_fn, .number_of_user_bins = 1u, .threads = 0u}; diff --git a/test/unit/hibf/hierarchical_interleaved_bloom_filter_test.cpp b/test/unit/hibf/hierarchical_interleaved_bloom_filter_test.cpp index ce67fa13..5c155d88 100644 --- a/test/unit/hibf/hierarchical_interleaved_bloom_filter_test.cpp +++ b/test/unit/hibf/hierarchical_interleaved_bloom_filter_test.cpp @@ -56,6 +56,7 @@ TEST(hibf_test, build_from_layout) "@ \"number_of_user_bins\": 2,\n" "@ \"number_of_hash_functions\": 2,\n" "@ \"maximum_false_positive_rate\": 0.05,\n" + "@ \"relaxed_fpr\": 0.3,\n" "@ \"threads\": 1,\n" "@ \"sketch_bits\": 12,\n" "@ \"tmax\": 64,\n" diff --git a/test/unit/hibf/layout/hierarchical_binning_test.cpp b/test/unit/hibf/layout/hierarchical_binning_test.cpp index 2a4ad379..350ff7fc 100644 --- a/test/unit/hibf/layout/hierarchical_binning_test.cpp +++ b/test/unit/hibf/layout/hierarchical_binning_test.cpp @@ -28,7 +28,7 @@ TEST(hierarchical_binning_test, small_example) data.fpr_correction = seqan::hibf::layout::compute_fpr_correction({.fpr = 0.05, .hash_count = 2, .t_max = config.tmax}); seqan::hibf::layout::hierarchical_binning algo{data, config}; - EXPECT_EQ(algo.execute(), 1u); // #HIGH_LEVEL_IBF max_bin_id:3 + EXPECT_EQ(algo.execute(), 3u); // #HIGH_LEVEL_IBF max_bin_id:3 std::vector expected_max_bins{{{1}, 22}, {{2}, 22}}; From 58067e0cc48774f7c245bc6ed4b45859202f22e4 Mon Sep 17 00:00:00 2001 From: Svenja Mehringer Date: Mon, 23 Oct 2023 10:55:43 +0200 Subject: [PATCH 2/3] [MISC] Add graph::node::max_bin_is_merged(). --- include/hibf/layout/graph.hpp | 5 +++++ src/build/construct_ibf.cpp | 2 +- src/hierarchical_interleaved_bloom_filter.cpp | 6 +++--- test/unit/hibf/layout/graph_test.cpp | 13 ++++++++++++- 4 files changed, 21 insertions(+), 5 deletions(-) diff --git a/include/hibf/layout/graph.hpp b/include/hibf/layout/graph.hpp index 75c75ffd..4db26f21 100644 --- a/include/hibf/layout/graph.hpp +++ b/include/hibf/layout/graph.hpp @@ -48,6 +48,11 @@ struct graph std::optional favourite_child_idx{std::nullopt}; std::vector remaining_records{}; // non-merged bins (either split or single) + bool max_bin_is_merged() const + { + return favourite_child_idx.has_value(); + } + // Doesn't work, because the type is incomplete. To compare node, a comparison for the children member is needed. // But children is a std::vector, so a comparison for node is needed to compare children. // https://godbolt.org/z/arrr4YKae diff --git a/src/build/construct_ibf.cpp b/src/build/construct_ibf.cpp index 953a58dd..640b6401 100644 --- a/src/build/construct_ibf.cpp +++ b/src/build/construct_ibf.cpp @@ -28,7 +28,7 @@ seqan::hibf::interleaved_bloom_filter construct_ibf(robin_hood::unordered_flat_s build_data & data, bool is_root) { - bool const max_bin_is_merged = ibf_node.favourite_child_idx.has_value(); + bool const max_bin_is_merged = ibf_node.max_bin_is_merged(); assert(!max_bin_is_merged || number_of_bins == 1u); // merged max bin implies (=>) number of bins == 1 size_t const kmers_per_bin{(kmers.size() + number_of_bins - 1u) / number_of_bins}; // Integer ceil diff --git a/src/hierarchical_interleaved_bloom_filter.cpp b/src/hierarchical_interleaved_bloom_filter.cpp index 519fa401..f8914586 100644 --- a/src/hierarchical_interleaved_bloom_filter.cpp +++ b/src/hierarchical_interleaved_bloom_filter.cpp @@ -47,7 +47,7 @@ size_t hierarchical_build(hierarchical_interleaved_bloom_filter & hibf, auto initialise_max_bin_kmers = [&]() -> size_t { - if (current_node.favourite_child_idx.has_value()) // max bin is a merged bin + if (current_node.max_bin_is_merged()) { // recursively initialize favourite child first ibf_positions[current_node.max_bin_index] = @@ -91,7 +91,7 @@ size_t hierarchical_build(hierarchical_interleaved_bloom_filter & hibf, // We do not want to process the favourite child. It has already been processed prior. // https://godbolt.org/z/6Yav7hrG1 - if (current_node.favourite_child_idx.has_value()) + if (current_node.max_bin_is_merged()) std::erase(indices, current_node.favourite_child_idx.value()); if (is_root) @@ -127,7 +127,7 @@ size_t hierarchical_build(hierarchical_interleaved_bloom_filter & hibf, loop_over_children(); // If max bin was a merged bin, process all remaining records, otherwise the first one has already been processed - size_t const start{(current_node.favourite_child_idx.has_value()) ? 0u : 1u}; + size_t const start{(current_node.max_bin_is_merged()) ? 0u : 1u}; for (size_t i = start; i < current_node.remaining_records.size(); ++i) { auto const & record = current_node.remaining_records[i]; diff --git a/test/unit/hibf/layout/graph_test.cpp b/test/unit/hibf/layout/graph_test.cpp index eef6c098..414b823a 100644 --- a/test/unit/hibf/layout/graph_test.cpp +++ b/test/unit/hibf/layout/graph_test.cpp @@ -11,7 +11,18 @@ #include // for graph #include // for layout -TEST(layout_test, printing_max_bins) +TEST(graph_node_test, function_max_bin_is_merged) +{ + seqan::hibf::layout::graph::node current_node{}; + + EXPECT_FALSE(current_node.max_bin_is_merged()); + + current_node.favourite_child_idx = 0; + + EXPECT_TRUE(current_node.max_bin_is_merged()); +} + +TEST(graph_test, construction_from_layout) { // prepare layout seqan::hibf::layout::layout hibf_layout; From 226b1f14b2cbf2d01ed44ce729c9e1e60f616b6b Mon Sep 17 00:00:00 2001 From: Svenja Mehringer Date: Mon, 23 Oct 2023 12:35:03 +0200 Subject: [PATCH 3/3] [MISC] Rename maximum_false_positive_rate->maximum_fpr. --- README.md | 2 +- include/hibf/config.hpp | 58 +++++++++---------- .../hierarchical_interleaved_bloom_filter.hpp | 2 +- include/hibf/layout/hierarchical_binning.hpp | 9 +-- src/build/construct_ibf.cpp | 3 +- src/config.cpp | 11 ++-- src/hierarchical_interleaved_bloom_filter.cpp | 5 +- src/interleaved_bloom_filter.cpp | 2 +- src/layout/compute_layout.cpp | 4 +- ...cal_interleaved_bloom_filter_benchmark.cpp | 2 +- test/snippet/hibf/hibf_construction.cpp | 4 +- test/snippet/readme.cpp | 2 +- test/unit/hibf/config_test.cpp | 44 ++++++-------- ...archical_interleaved_bloom_filter_test.cpp | 8 +-- 14 files changed, 74 insertions(+), 82 deletions(-) diff --git a/README.md b/README.md index cd0acda6..765d1a63 100644 --- a/README.md +++ b/README.md @@ -118,7 +118,7 @@ int main() seqan::hibf::config config{.input_fn = get_user_bin_data, // required .number_of_user_bins = 3u, // required .number_of_hash_functions = 2u, - .maximum_false_positive_rate = 0.05, + .maximum_fpr = 0.05, .threads = 1u}; // The HIBF constructor will determine a hierarchical layout for the user bins and build the filter. diff --git a/include/hibf/config.hpp b/include/hibf/config.hpp index 8536c33a..4bb39470 100644 --- a/include/hibf/config.hpp +++ b/include/hibf/config.hpp @@ -30,20 +30,20 @@ namespace seqan::hibf * * Here is the list of all configs options: * - * | Type | Option Name | Default | Note | - * |:--------|:------------------------------------------------------------|:-------:|:-----------------------| - * | General | seqan::hibf::config::input_fn | - | [REQUIRED] | - * | General | seqan::hibf::config::number_of_user_bins | - | [REQUIRED] | - * | General | seqan::hibf::config::number_of_hash_functions | 2 | | - * | General | seqan::hibf::config::maximum_false_positive_rate | 0.05 | [RECOMMENDED_TO_ADAPT] | - * | General | seqan::hibf::config::relaxed_fpr | 0.3 | | - * | General | seqan::hibf::config::threads | 1 | [RECOMMENDED_TO_ADAPT] | - * | Layout | seqan::hibf::config::sketch_bits | 12 | | - * | Layout | seqan::hibf::config::tmax | 0 | 0 indicates unset | - * | Layout | seqan::hibf::config::max_rearrangement_ratio | 0.5 | | - * | Layout | seqan::hibf::config::alpha | 1.2 | | - * | Layout | seqan::hibf::config::disable_estimate_union | false | | - * | Layout | seqan::hibf::config::disable_rearrangement | false | | + * | Type | Option Name | Default | Note | + * |:--------|:----------------------------------------------|:-------:|:-----------------------| + * | General | seqan::hibf::config::input_fn | - | [REQUIRED] | + * | General | seqan::hibf::config::number_of_user_bins | - | [REQUIRED] | + * | General | seqan::hibf::config::number_of_hash_functions | 2 | | + * | General | seqan::hibf::config::maximum_fpr | 0.05 | [RECOMMENDED_TO_ADAPT] | + * | General | seqan::hibf::config::relaxed_fpr | 0.3 | | + * | General | seqan::hibf::config::threads | 1 | [RECOMMENDED_TO_ADAPT] | + * | Layout | seqan::hibf::config::sketch_bits | 12 | | + * | Layout | seqan::hibf::config::tmax | 0 | 0 indicates unset | + * | Layout | seqan::hibf::config::max_rearrangement_ratio | 0.5 | | + * | Layout | seqan::hibf::config::alpha | 1.2 | | + * | Layout | seqan::hibf::config::disable_estimate_union | false | | + * | Layout | seqan::hibf::config::disable_rearrangement | false | | * * As a copy and paste source, here are all config options with their defaults: * @@ -62,7 +62,7 @@ namespace seqan::hibf * Check the documentation of the following options that influence the memory consumption: * * seqan::hibf::config::threads * * seqan::hibf::config::number_of_hash_functions - * * seqan::hibf::config::maximum_false_positive_rate + * * seqan::hibf::config::maximum_fpr * * ## Validation * @@ -134,7 +134,7 @@ struct config /*!\brief The desired maximum false positive rate of the underlying Bloom Filters. [RECOMMENDED_TO_ADAPT] * * We ensure that when querying a single hash value in the (H)IBF, the probability of getting a false positive answer - * will not exceed the value set for seqan::hibf::config::maximum_false_positive_rate. + * will not exceed the value set for seqan::hibf::config::maximum_fpr. * The internal Bloom Filters will be configured accordingly. Individual Bloom Filters might have a different * but always lower false positive rate (FPR). * @@ -147,30 +147,30 @@ struct config * * \sa [Bloom Filter Calculator](https://hur.st/bloomfilter/). */ - double maximum_false_positive_rate{0.05}; + double maximum_fpr{0.05}; /*!\brief Allow a higher FPR in non-accuracy-critical parts of the HIBF structure. * - * Some parts in the hierarchical structure are not critical to ensure the seqan::hibf::config::maximum_false_positive_rate. - * These can be allowed to have a higher FPR to reduce the overall space consumption taking into account a small - * decrease in runtime performance. + * Some parts in the hierarchical structure are not critical to ensure the seqan::hibf::config::maximum_fpr. + * These can be allowed to have a higher FPR to reduce the overall space consumption, while only minimally + * affecting the runtime performance. * - * Value must be in range [0,1]. - * Value must be equal to or larger than seqan::hibf::config::maximum_false_positive_rate. + * Value must be in range (0.0,1.0). + * Value must be equal to or larger than seqan::hibf::config::maximum_fpr. * Recommendation: default value (0.3) * * ### Technical details - * + * * Merged bins in an HIBF layout will always be followed by one or more lower-level IBFs that will have split bins * or single bins (split = 1) to recover the original user bins. Thus, the FPR of merged bins does not determine the - * seqan::hibf::config::maximum_false_positive_rate, but is independent. Choosing a higher FPR for merged bins can + * seqan::hibf::config::maximum_fpr, but is independent. Choosing a higher FPR for merged bins can * lower the memory requirement but increases the runtime. Experiments show that the decrease in memory is * significant, while the runtime suffers only slightly. The accuracy of the results is not affected by this * parameter. * * Note: For each IBF there is a limit to how high the FPR of merged bins can be. Specifically, the FPR for merged * bins can never decrease the IBF size more than what is needed to ensure the - * seqan::hibf::config::maximum_false_positive_rate for split bins. This means that, at some point, choosing even + * seqan::hibf::config::maximum_fpr for split bins. This means that, at some point, choosing even * higher values for this parameter will have no effect anymore. * * \sa [Bloom Filter Calculator](https://hur.st/bloomfilter/). @@ -293,10 +293,10 @@ struct config * * Constrains: * * seqan::hibf::config::number_of_hash_functions must be in `[1,5]`. - * * seqan::hibf::config::maximum_false_positive_rate must be in `(0.0,1.0)`. - * * seqan::hibf::config::relaxed_fpr must be in `[0.0,1.0]`. + * * seqan::hibf::config::maximum_fpr must be in `(0.0,1.0)`. + * * seqan::hibf::config::relaxed_fpr must be in `(0.0,1.0)`. * * seqan::hibf::config::relaxed_fpr must be equal to or larger than - * seqan::hibf::config::maximum_false_positive_rate. + * seqan::hibf::config::maximum_fpr. * * seqan::hibf::config::threads must be greater than `0`. * * seqan::hibf::config::sketch_bits must be in `[5,32]`. * * seqan::hibf::config::tmax must be at most `18446744073709551552`. @@ -323,7 +323,7 @@ struct config archive(CEREAL_NVP(number_of_user_bins)); archive(CEREAL_NVP(number_of_hash_functions)); - archive(CEREAL_NVP(maximum_false_positive_rate)); + archive(CEREAL_NVP(maximum_fpr)); archive(CEREAL_NVP(relaxed_fpr)); archive(CEREAL_NVP(threads)); diff --git a/include/hibf/hierarchical_interleaved_bloom_filter.hpp b/include/hibf/hierarchical_interleaved_bloom_filter.hpp index bb887402..123019c1 100644 --- a/include/hibf/hierarchical_interleaved_bloom_filter.hpp +++ b/include/hibf/hierarchical_interleaved_bloom_filter.hpp @@ -161,7 +161,7 @@ class hierarchical_interleaved_bloom_filter * * Options recommended to adapt to your setup: * * `threads` - Choose number of threads depending on your hardware settings to speed up construction - * * `maximum_false_positive_rate` - How many false positive answers can you tolerate? A low FPR (e.g. 0.001) is + * * `maximum_fpr` - How many false positive answers can you tolerate? A low FPR (e.g. 0.001) is * needed if you can tolerate a high RAM peak when using the HIBF but post-processing steps are heavy and FPs * should be avoided. A high FPR (e.g. `0.3`) can be chosed if you want a very small HIBF and false positive * can be easily filtered in the down-stream analysis diff --git a/include/hibf/layout/hierarchical_binning.hpp b/include/hibf/layout/hierarchical_binning.hpp index abce1310..720159e9 100644 --- a/include/hibf/layout/hierarchical_binning.hpp +++ b/include/hibf/layout/hierarchical_binning.hpp @@ -78,13 +78,14 @@ class hierarchical_binning if (max_id == max_split_id) // Overall max bin is a split bin. return max_id; - // the minimum size of the TBs of this IBF to ensure the maximum_false_positive_rate for split bins - size_t const minimum_bits{build::bin_size_in_bits({.fpr = config.maximum_false_positive_rate, + // Split cardinality `max_split_size` already accounts for fpr correction. + // The minimum size of the TBs of this IBF to ensure the maximum_false_positive_rate for split bins. + size_t const minimum_bits{build::bin_size_in_bits({.fpr = config.maximum_fpr, .hash_count = config.number_of_hash_functions, .elements = max_split_size})}; - // the potential size of the TBs of this IBF given the allowed merged bin FPR - size_t const merged_bits{build::bin_size_in_bits({.fpr = config.relaxed_fpr, + // The potential size of the TBs of this IBF given the allowed merged bin FPR. + size_t const merged_bits{build::bin_size_in_bits({.fpr = config.relaxed_fpr, // .hash_count = config.number_of_hash_functions, .elements = max_size})}; diff --git a/src/build/construct_ibf.cpp b/src/build/construct_ibf.cpp index 640b6401..4323c96f 100644 --- a/src/build/construct_ibf.cpp +++ b/src/build/construct_ibf.cpp @@ -32,8 +32,7 @@ seqan::hibf::interleaved_bloom_filter construct_ibf(robin_hood::unordered_flat_s assert(!max_bin_is_merged || number_of_bins == 1u); // merged max bin implies (=>) number of bins == 1 size_t const kmers_per_bin{(kmers.size() + number_of_bins - 1u) / number_of_bins}; // Integer ceil - double const fpr = max_bin_is_merged ? data.config.relaxed_fpr - : data.config.maximum_false_positive_rate; + double const fpr = max_bin_is_merged ? data.config.relaxed_fpr : data.config.maximum_fpr; size_t const bin_bits{bin_size_in_bits({.fpr = fpr, // .hash_count = data.config.number_of_hash_functions, diff --git a/src/config.cpp b/src/config.cpp index eabea357..b4ef22cc 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -72,16 +72,15 @@ void config::validate_and_set_defaults() if (number_of_hash_functions == 0u || number_of_hash_functions > 5u) throw std::invalid_argument{"[HIBF CONFIG ERROR] config::number_of_hash_functions must be in [1,5]."}; - if (maximum_false_positive_rate <= 0.0 || maximum_false_positive_rate >= 1.0) - throw std::invalid_argument{"[HIBF CONFIG ERROR] config::maximum_false_positive_rate must be in (0.0,1.0)."}; + if (maximum_fpr <= 0.0 || maximum_fpr >= 1.0) + throw std::invalid_argument{"[HIBF CONFIG ERROR] config::maximum_fpr must be in (0.0,1.0)."}; if (relaxed_fpr <= 0.0 || relaxed_fpr >= 1.0) - throw std::invalid_argument{ - "[HIBF CONFIG ERROR] config::relaxed_fpr must be in (0.0,1.0)."}; + throw std::invalid_argument{"[HIBF CONFIG ERROR] config::relaxed_fpr must be in (0.0,1.0)."}; - if (relaxed_fpr < maximum_false_positive_rate) + if (relaxed_fpr < maximum_fpr) throw std::invalid_argument{"[HIBF CONFIG ERROR] config::relaxed_fpr must be " - "greater than or equal to config::maximum_false_positive_rate."}; + "greater than or equal to config::maximum_fpr."}; if (threads == 0u) throw std::invalid_argument{"[HIBF CONFIG ERROR] config::threads must be greater than 0."}; diff --git a/src/hierarchical_interleaved_bloom_filter.cpp b/src/hierarchical_interleaved_bloom_filter.cpp index f8914586..4e769ed8 100644 --- a/src/hierarchical_interleaved_bloom_filter.cpp +++ b/src/hierarchical_interleaved_bloom_filter.cpp @@ -182,8 +182,9 @@ void build_index(hierarchical_interleaved_bloom_filter & hibf, layout::graph::node const & root_node = data.ibf_graph.root; size_t const t_max{root_node.number_of_technical_bins}; - data.fpr_correction = layout::compute_fpr_correction( - {.fpr = config.maximum_false_positive_rate, .hash_count = config.number_of_hash_functions, .t_max = t_max}); + data.fpr_correction = layout::compute_fpr_correction({.fpr = config.maximum_fpr, // + .hash_count = config.number_of_hash_functions, + .t_max = t_max}); hierarchical_build(hibf, root_node, data); diff --git a/src/interleaved_bloom_filter.cpp b/src/interleaved_bloom_filter.cpp index b08ba316..2c4af247 100644 --- a/src/interleaved_bloom_filter.cpp +++ b/src/interleaved_bloom_filter.cpp @@ -59,7 +59,7 @@ size_t max_bin_size(config & configuration) max_size = std::max(max_size, kmers.size()); } - return build::bin_size_in_bits({.fpr = configuration.maximum_false_positive_rate, + return build::bin_size_in_bits({.fpr = configuration.maximum_fpr, // .hash_count = configuration.number_of_hash_functions, .elements = max_size}); } diff --git a/src/layout/compute_layout.cpp b/src/layout/compute_layout.cpp index 844e5742..47866f6e 100644 --- a/src/layout/compute_layout.cpp +++ b/src/layout/compute_layout.cpp @@ -31,11 +31,11 @@ layout compute_layout(config const & config, std::stringstream output_buffer; std::stringstream header_buffer; - data_store store{.false_positive_rate = config.maximum_false_positive_rate, + data_store store{.false_positive_rate = config.maximum_fpr, .hibf_layout = &resulting_layout, .kmer_counts = std::addressof(kmer_counts), .sketches = std::addressof(sketches)}; - store.fpr_correction = compute_fpr_correction({.fpr = config.maximum_false_positive_rate, + store.fpr_correction = compute_fpr_correction({.fpr = config.maximum_fpr, // .hash_count = config.number_of_hash_functions, .t_max = config.tmax}); diff --git a/test/performance/hibf/hierarchical_interleaved_bloom_filter_benchmark.cpp b/test/performance/hibf/hierarchical_interleaved_bloom_filter_benchmark.cpp index cdb9e5b1..2cce4218 100644 --- a/test/performance/hibf/hierarchical_interleaved_bloom_filter_benchmark.cpp +++ b/test/performance/hibf/hierarchical_interleaved_bloom_filter_benchmark.cpp @@ -59,7 +59,7 @@ auto set_up(::benchmark::State const & state) seqan::hibf::config config{.input_fn = distribute_hashes_across_ub, .number_of_user_bins = num_ub, .number_of_hash_functions = hash_num, - .maximum_false_positive_rate = fpr, + .maximum_fpr = fpr, .threads = 4u, // Only applies to layout and build .disable_estimate_union = true}; diff --git a/test/snippet/hibf/hibf_construction.cpp b/test/snippet/hibf/hibf_construction.cpp index 6f8994d8..dc7798d7 100644 --- a/test/snippet/hibf/hibf_construction.cpp +++ b/test/snippet/hibf/hibf_construction.cpp @@ -24,8 +24,8 @@ int main() seqan::hibf::config config{.input_fn = my_input, // required .number_of_user_bins = 2, // required .number_of_hash_functions = 2, - .maximum_false_positive_rate = 0.05, // recommended to adapt - .threads = 1, // recommended to adapt + .maximum_fpr = 0.05, // recommended to adapt + .threads = 1, // recommended to adapt .sketch_bits = 12, .tmax = 0, // triggers default copmutation .alpha = 1.2, diff --git a/test/snippet/readme.cpp b/test/snippet/readme.cpp index fbf24ac0..76eeac77 100644 --- a/test/snippet/readme.cpp +++ b/test/snippet/readme.cpp @@ -52,7 +52,7 @@ int main() seqan::hibf::config config{.input_fn = get_user_bin_data, // required .number_of_user_bins = 3u, // required .number_of_hash_functions = 2u, - .maximum_false_positive_rate = 0.05, + .maximum_fpr = 0.05, .threads = 1u}; // The HIBF constructor will determine a hierarchical layout for the user bins and build the filter. diff --git a/test/unit/hibf/config_test.cpp b/test/unit/hibf/config_test.cpp index 4752aac2..5bf035d9 100644 --- a/test/unit/hibf/config_test.cpp +++ b/test/unit/hibf/config_test.cpp @@ -21,7 +21,7 @@ TEST(config_test, write_to) configuration.number_of_user_bins = 123456789; configuration.number_of_hash_functions = 4; - configuration.maximum_false_positive_rate = 0.0001; + configuration.maximum_fpr = 0.0001; configuration.threads = 31; configuration.sketch_bits = 8; configuration.tmax = 128; @@ -38,7 +38,7 @@ TEST(config_test, write_to) "@ \"version\": 1,\n" "@ \"number_of_user_bins\": 123456789,\n" "@ \"number_of_hash_functions\": 4,\n" - "@ \"maximum_false_positive_rate\": 0.0001,\n" + "@ \"maximum_fpr\": 0.0001,\n" "@ \"relaxed_fpr\": 0.3,\n" "@ \"threads\": 31,\n" "@ \"sketch_bits\": 8,\n" @@ -62,7 +62,7 @@ TEST(config_test, read_from) "@ \"version\": 1,\n" "@ \"number_of_user_bins\": 123456789,\n" "@ \"number_of_hash_functions\": 4,\n" - "@ \"maximum_false_positive_rate\": 0.0001,\n" + "@ \"maximum_fpr\": 0.0001,\n" "@ \"relaxed_fpr\": 0.3,\n" "@ \"threads\": 31,\n" "@ \"sketch_bits\": 8,\n" @@ -80,7 +80,7 @@ TEST(config_test, read_from) EXPECT_EQ(configuration.number_of_user_bins, 123456789); EXPECT_EQ(configuration.number_of_hash_functions, 4); - EXPECT_EQ(configuration.maximum_false_positive_rate, 0.0001); + EXPECT_EQ(configuration.maximum_fpr, 0.0001); EXPECT_EQ(configuration.relaxed_fpr, 0.3); EXPECT_EQ(configuration.threads, 31); EXPECT_EQ(configuration.sketch_bits, 8); @@ -104,7 +104,7 @@ TEST(config_test, read_from_with_more_meta) "@ \"version\": 1,\n" "@ \"number_of_user_bins\": 123456789,\n" "@ \"number_of_hash_functions\": 4,\n" - "@ \"maximum_false_positive_rate\": 0.0001,\n" + "@ \"maximum_fpr\": 0.0001,\n" "@ \"relaxed_fpr\": 0.3,\n" "@ \"threads\": 31,\n" "@ \"sketch_bits\": 8,\n" @@ -122,7 +122,7 @@ TEST(config_test, read_from_with_more_meta) EXPECT_EQ(configuration.number_of_user_bins, 123456789); EXPECT_EQ(configuration.number_of_hash_functions, 4); - EXPECT_EQ(configuration.maximum_false_positive_rate, 0.0001); + EXPECT_EQ(configuration.maximum_fpr, 0.0001); EXPECT_EQ(configuration.relaxed_fpr, 0.3); EXPECT_EQ(configuration.threads, 31); EXPECT_EQ(configuration.sketch_bits, 8); @@ -178,41 +178,33 @@ TEST(config_test, validate_and_set_defaults) check_error_message(configuration, "[HIBF CONFIG ERROR] config::number_of_hash_functions must be in [1,5]."); } - // maximum_false_positive_rate must be in (0.0,1.0) + // maximum_fpr must be in (0.0,1.0) { - seqan::hibf::config configuration{.input_fn = dummy_input_fn, - .number_of_user_bins = 1u, - .maximum_false_positive_rate = 0.0}; - check_error_message(configuration, - "[HIBF CONFIG ERROR] config::maximum_false_positive_rate must be in (0.0,1.0)."); + seqan::hibf::config configuration{.input_fn = dummy_input_fn, .number_of_user_bins = 1u, .maximum_fpr = 0.0}; + check_error_message(configuration, "[HIBF CONFIG ERROR] config::maximum_fpr must be in (0.0,1.0)."); - configuration.maximum_false_positive_rate = 1.0; - check_error_message(configuration, - "[HIBF CONFIG ERROR] config::maximum_false_positive_rate must be in (0.0,1.0)."); + configuration.maximum_fpr = 1.0; + check_error_message(configuration, "[HIBF CONFIG ERROR] config::maximum_fpr must be in (0.0,1.0)."); } // relaxed_fpr must be in (0.0,1.0) { - seqan::hibf::config configuration{.input_fn = dummy_input_fn, - .number_of_user_bins = 1u, - .relaxed_fpr = -0.1}; - check_error_message(configuration, - "[HIBF CONFIG ERROR] config::relaxed_fpr must be in [0.0,1.0]."); + seqan::hibf::config configuration{.input_fn = dummy_input_fn, .number_of_user_bins = 1u, .relaxed_fpr = 0.0}; + check_error_message(configuration, "[HIBF CONFIG ERROR] config::relaxed_fpr must be in (0.0,1.0)."); - configuration.relaxed_fpr = 1.1; - check_error_message(configuration, - "[HIBF CONFIG ERROR] config::relaxed_fpr must be in [0.0,1.0]."); + configuration.relaxed_fpr = 1.0; + check_error_message(configuration, "[HIBF CONFIG ERROR] config::relaxed_fpr must be in (0.0,1.0)."); } - // relaxed_fpr must equal to or greater than maximum_false_positive_rate + // relaxed_fpr must equal to or greater than maximum_fpr { seqan::hibf::config configuration{.input_fn = dummy_input_fn, .number_of_user_bins = 1u, - .maximum_false_positive_rate = 0.3, + .maximum_fpr = 0.3, .relaxed_fpr = 0.2}; check_error_message(configuration, "[HIBF CONFIG ERROR] config::relaxed_fpr must be " - "greater than or equal to config::maximum_false_positive_rate."); + "greater than or equal to config::maximum_fpr."); } // threads cannot be 0 diff --git a/test/unit/hibf/hierarchical_interleaved_bloom_filter_test.cpp b/test/unit/hibf/hierarchical_interleaved_bloom_filter_test.cpp index 5c155d88..7b674fb5 100644 --- a/test/unit/hibf/hierarchical_interleaved_bloom_filter_test.cpp +++ b/test/unit/hibf/hierarchical_interleaved_bloom_filter_test.cpp @@ -55,7 +55,7 @@ TEST(hibf_test, build_from_layout) "@ \"version\": 1,\n" "@ \"number_of_user_bins\": 2,\n" "@ \"number_of_hash_functions\": 2,\n" - "@ \"maximum_false_positive_rate\": 0.05,\n" + "@ \"maximum_fpr\": 0.05,\n" "@ \"relaxed_fpr\": 0.3,\n" "@ \"threads\": 1,\n" "@ \"sketch_bits\": 12,\n" @@ -104,7 +104,7 @@ TEST(hibf_test, three_level_hibf) it = i; }, .number_of_user_bins = 4097, - .maximum_false_positive_rate = 0.001, + .maximum_fpr = 0.001, .threads = 4, .tmax = 64, .disable_estimate_union = true, @@ -153,7 +153,7 @@ TEST(hibf_test, unevenly_sized_and_unique_user_bins) it = i; }, .number_of_user_bins = 500, - .maximum_false_positive_rate = 0.001, + .maximum_fpr = 0.001, .threads = 4, .tmax = 64, .disable_estimate_union = true, @@ -187,7 +187,7 @@ TEST(hibf_test, evenly_sized_and_highly_similar_user_bins) it = i; }, .number_of_user_bins = 1000, - .maximum_false_positive_rate = 0.001, + .maximum_fpr = 0.001, .threads = 4, .tmax = 64, .disable_estimate_union = true,