From 53c9cffac918376d10cf8c9204fa436e8d1c6847 Mon Sep 17 00:00:00 2001 From: Svenja Mehringer Date: Thu, 11 Jul 2024 11:19:54 +0200 Subject: [PATCH] [MISC] Compute layout also takes positions Co-authored-by: Enrico Seiler --- include/hibf/layout/compute_layout.hpp | 2 ++ include/hibf/misc/iota_vector.hpp | 36 +++++++++++++++++++ src/hierarchical_interleaved_bloom_filter.cpp | 8 +++-- src/layout/compute_layout.cpp | 21 ++++++++--- test/unit/hibf/layout/compute_layout_test.cpp | 15 +++++--- 5 files changed, 70 insertions(+), 12 deletions(-) create mode 100644 include/hibf/misc/iota_vector.hpp diff --git a/include/hibf/layout/compute_layout.hpp b/include/hibf/layout/compute_layout.hpp index 34febfb0..62475cd1 100644 --- a/include/hibf/layout/compute_layout.hpp +++ b/include/hibf/layout/compute_layout.hpp @@ -20,6 +20,7 @@ namespace seqan::hibf::layout * \param[in] config The configuration to compute the layout with. * \param[in] kmer_counts The vector that will store the kmer counts (estimations). * \param[in] sketches The vector that will store the sketches. + * \param[in] positions Specifies which user bins the layout should be calculated on (positions in the other vectors). * \param[in,out] union_estimation_timer The timer that measures the union estimation time. * \param[in,out] rearrangement_timer The timer that measures the rearrangement time. * \returns layout @@ -27,6 +28,7 @@ namespace seqan::hibf::layout layout compute_layout(config const & config, std::vector const & kmer_counts, std::vector const & sketches, + std::vector && positions, concurrent_timer & union_estimation_timer, concurrent_timer & rearrangement_timer); diff --git a/include/hibf/misc/iota_vector.hpp b/include/hibf/misc/iota_vector.hpp new file mode 100644 index 00000000..b6ca7a10 --- /dev/null +++ b/include/hibf/misc/iota_vector.hpp @@ -0,0 +1,36 @@ +// SPDX-FileCopyrightText: 2006-2024, Knut Reinert & Freie Universität Berlin +// SPDX-FileCopyrightText: 2016-2024, Knut Reinert & MPI für molekulare Genetik +// SPDX-License-Identifier: BSD-3-Clause + +/*!\file + * \brief Provides seqan::hibf::iota_vector. + * \author Enrico Seiler + */ + +#pragma once + +#include +#include +#include +#include + +#include + +namespace seqan::hibf +{ + +/*!\brief Creates a vector of size `size` with values from 0 to `size - 1`. + * \tparam value_t The value type of the vector. Defaults to `size_t`. + * \param[in] size The size of the vector. + * \returns A vector of size `size` with values from 0 to `size - 1`. +*/ +template +HIBF_CONSTEXPR_VECTOR std::vector iota_vector(size_t const size) +{ + assert(size <= std::numeric_limits::max()); + std::vector result(size); + std::iota(result.begin(), result.end(), value_t{}); + return result; +} + +} // namespace seqan::hibf diff --git a/src/hierarchical_interleaved_bloom_filter.cpp b/src/hierarchical_interleaved_bloom_filter.cpp index f4078300..29fa390b 100644 --- a/src/hierarchical_interleaved_bloom_filter.cpp +++ b/src/hierarchical_interleaved_bloom_filter.cpp @@ -29,9 +29,10 @@ #include // for graph #include // for layout #include // for divide_and_ceil -#include // for concurrent_timer -#include // for compute_sketches -#include // for hyperloglog +#include +#include // for concurrent_timer +#include // for compute_sketches +#include // for hyperloglog namespace seqan::hibf { @@ -223,6 +224,7 @@ hierarchical_interleaved_bloom_filter::hierarchical_interleaved_bloom_filter(con auto layout = layout::compute_layout(configuration, kmer_counts, sketches, + iota_vector(configuration.number_of_user_bins), layout_union_estimation_timer, layout_rearrangement_timer); layout_dp_algorithm_timer.stop(); diff --git a/src/layout/compute_layout.cpp b/src/layout/compute_layout.cpp index b7786b49..780482b2 100644 --- a/src/layout/compute_layout.cpp +++ b/src/layout/compute_layout.cpp @@ -13,8 +13,9 @@ #include // for data_store #include // for hierarchical_binning #include // for layout -#include // for concurrent_timer -#include // for hyperloglog +#include +#include // for concurrent_timer +#include // for hyperloglog namespace seqan::hibf::layout { @@ -22,15 +23,22 @@ namespace seqan::hibf::layout layout compute_layout(config const & config, std::vector const & kmer_counts, std::vector const & sketches, + std::vector && positions, concurrent_timer & union_estimation_timer, concurrent_timer & rearrangement_timer) { + assert(kmer_counts.size() == sketches.size()); + assert(positions.size() <= sketches.size()); + assert(sketches.size() == config.number_of_user_bins); + assert(std::ranges::max(positions) <= config.number_of_user_bins); + layout resulting_layout{}; data_store store{.false_positive_rate = config.maximum_fpr, .hibf_layout = &resulting_layout, .kmer_counts = std::addressof(kmer_counts), - .sketches = std::addressof(sketches)}; + .sketches = std::addressof(sketches), + .positions = std::move(positions)}; store.fpr_correction = compute_fpr_correction({.fpr = config.maximum_fpr, // .hash_count = config.number_of_hash_functions, @@ -59,7 +67,12 @@ layout compute_layout(config const & config, concurrent_timer union_estimation_timer; concurrent_timer rearrangement_timer; - return compute_layout(config, kmer_counts, sketches, union_estimation_timer, rearrangement_timer); + return compute_layout(config, + kmer_counts, + sketches, + iota_vector(config.number_of_user_bins), + union_estimation_timer, + rearrangement_timer); } } // namespace seqan::hibf::layout diff --git a/test/unit/hibf/layout/compute_layout_test.cpp b/test/unit/hibf/layout/compute_layout_test.cpp index 8447849d..d1f439a5 100644 --- a/test/unit/hibf/layout/compute_layout_test.cpp +++ b/test/unit/hibf/layout/compute_layout_test.cpp @@ -8,9 +8,10 @@ #include // for function #include // for vector, allocator -#include // for insert_iterator, config -#include // for compute_layout -#include // for layout +#include // for insert_iterator, config +#include // for compute_layout +#include // for layout +#include #include // for concurrent_timer #include // for compute_sketches #include // for hyperloglog @@ -39,8 +40,12 @@ TEST(compute_layout, dispatch) seqan::hibf::concurrent_timer union_estimation_timer{}; seqan::hibf::concurrent_timer rearrangement_timer{}; - auto layout2 = - seqan::hibf::layout::compute_layout(config, kmer_counts, sketches, union_estimation_timer, rearrangement_timer); + auto layout2 = seqan::hibf::layout::compute_layout(config, + kmer_counts, + sketches, + seqan::hibf::iota_vector(config.number_of_user_bins), + union_estimation_timer, + rearrangement_timer); EXPECT_TRUE(layout1 == layout2); }