Skip to content

Commit

Permalink
Merge pull request #210 from smehringer/partial_update
Browse files Browse the repository at this point in the history
[MISC] Compute layout also takes positions.
  • Loading branch information
eseiler authored Jul 11, 2024
2 parents 42e493f + 53c9cff commit fead38a
Show file tree
Hide file tree
Showing 5 changed files with 70 additions and 12 deletions.
2 changes: 2 additions & 0 deletions include/hibf/layout/compute_layout.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,15 @@ namespace seqan::hibf::layout
* \param[in] config The configuration to compute the layout with.
* \param[in] kmer_counts The vector that will store the kmer counts (estimations).
* \param[in] sketches The vector that will store the sketches.
* \param[in] positions Specifies which user bins the layout should be calculated on (positions in the other vectors).
* \param[in,out] union_estimation_timer The timer that measures the union estimation time.
* \param[in,out] rearrangement_timer The timer that measures the rearrangement time.
* \returns layout
*/
layout compute_layout(config const & config,
std::vector<size_t> const & kmer_counts,
std::vector<sketch::hyperloglog> const & sketches,
std::vector<size_t> && positions,
concurrent_timer & union_estimation_timer,
concurrent_timer & rearrangement_timer);

Expand Down
36 changes: 36 additions & 0 deletions include/hibf/misc/iota_vector.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// SPDX-FileCopyrightText: 2006-2024, Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2024, Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: BSD-3-Clause

/*!\file
* \brief Provides seqan::hibf::iota_vector.
* \author Enrico Seiler <enrico.seiler AT fu-berlin.de>
*/

#pragma once

#include <cassert>
#include <limits>
#include <numeric>
#include <vector>

#include <hibf/platform.hpp>

namespace seqan::hibf
{

/*!\brief Creates a vector of size `size` with values from 0 to `size - 1`.
* \tparam value_t The value type of the vector. Defaults to `size_t`.
* \param[in] size The size of the vector.
* \returns A vector of size `size` with values from 0 to `size - 1`.
*/
template <std::unsigned_integral value_t = size_t>
HIBF_CONSTEXPR_VECTOR std::vector<value_t> iota_vector(size_t const size)
{
assert(size <= std::numeric_limits<value_t>::max());
std::vector<value_t> result(size);
std::iota(result.begin(), result.end(), value_t{});
return result;
}

} // namespace seqan::hibf
8 changes: 5 additions & 3 deletions src/hierarchical_interleaved_bloom_filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,10 @@
#include <hibf/layout/graph.hpp> // for graph
#include <hibf/layout/layout.hpp> // for layout
#include <hibf/misc/divide_and_ceil.hpp> // for divide_and_ceil
#include <hibf/misc/timer.hpp> // for concurrent_timer
#include <hibf/sketch/compute_sketches.hpp> // for compute_sketches
#include <hibf/sketch/hyperloglog.hpp> // for hyperloglog
#include <hibf/misc/iota_vector.hpp>
#include <hibf/misc/timer.hpp> // for concurrent_timer
#include <hibf/sketch/compute_sketches.hpp> // for compute_sketches
#include <hibf/sketch/hyperloglog.hpp> // for hyperloglog

namespace seqan::hibf
{
Expand Down Expand Up @@ -223,6 +224,7 @@ hierarchical_interleaved_bloom_filter::hierarchical_interleaved_bloom_filter(con
auto layout = layout::compute_layout(configuration,
kmer_counts,
sketches,
iota_vector(configuration.number_of_user_bins),
layout_union_estimation_timer,
layout_rearrangement_timer);
layout_dp_algorithm_timer.stop();
Expand Down
21 changes: 17 additions & 4 deletions src/layout/compute_layout.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,24 +13,32 @@
#include <hibf/layout/data_store.hpp> // for data_store
#include <hibf/layout/hierarchical_binning.hpp> // for hierarchical_binning
#include <hibf/layout/layout.hpp> // for layout
#include <hibf/misc/timer.hpp> // for concurrent_timer
#include <hibf/sketch/hyperloglog.hpp> // for hyperloglog
#include <hibf/misc/iota_vector.hpp>
#include <hibf/misc/timer.hpp> // for concurrent_timer
#include <hibf/sketch/hyperloglog.hpp> // for hyperloglog

namespace seqan::hibf::layout
{

layout compute_layout(config const & config,
std::vector<size_t> const & kmer_counts,
std::vector<sketch::hyperloglog> const & sketches,
std::vector<size_t> && positions,
concurrent_timer & union_estimation_timer,
concurrent_timer & rearrangement_timer)
{
assert(kmer_counts.size() == sketches.size());
assert(positions.size() <= sketches.size());
assert(sketches.size() == config.number_of_user_bins);
assert(std::ranges::max(positions) <= config.number_of_user_bins);

layout resulting_layout{};

data_store store{.false_positive_rate = config.maximum_fpr,
.hibf_layout = &resulting_layout,
.kmer_counts = std::addressof(kmer_counts),
.sketches = std::addressof(sketches)};
.sketches = std::addressof(sketches),
.positions = std::move(positions)};

store.fpr_correction = compute_fpr_correction({.fpr = config.maximum_fpr, //
.hash_count = config.number_of_hash_functions,
Expand Down Expand Up @@ -59,7 +67,12 @@ layout compute_layout(config const & config,
concurrent_timer union_estimation_timer;
concurrent_timer rearrangement_timer;

return compute_layout(config, kmer_counts, sketches, union_estimation_timer, rearrangement_timer);
return compute_layout(config,
kmer_counts,
sketches,
iota_vector(config.number_of_user_bins),
union_estimation_timer,
rearrangement_timer);
}

} // namespace seqan::hibf::layout
15 changes: 10 additions & 5 deletions test/unit/hibf/layout/compute_layout_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@
#include <functional> // for function
#include <vector> // for vector, allocator

#include <hibf/config.hpp> // for insert_iterator, config
#include <hibf/layout/compute_layout.hpp> // for compute_layout
#include <hibf/layout/layout.hpp> // for layout
#include <hibf/config.hpp> // for insert_iterator, config
#include <hibf/layout/compute_layout.hpp> // for compute_layout
#include <hibf/layout/layout.hpp> // for layout
#include <hibf/misc/iota_vector.hpp>
#include <hibf/misc/timer.hpp> // for concurrent_timer
#include <hibf/sketch/compute_sketches.hpp> // for compute_sketches
#include <hibf/sketch/hyperloglog.hpp> // for hyperloglog
Expand Down Expand Up @@ -39,8 +40,12 @@ TEST(compute_layout, dispatch)
seqan::hibf::concurrent_timer union_estimation_timer{};
seqan::hibf::concurrent_timer rearrangement_timer{};

auto layout2 =
seqan::hibf::layout::compute_layout(config, kmer_counts, sketches, union_estimation_timer, rearrangement_timer);
auto layout2 = seqan::hibf::layout::compute_layout(config,
kmer_counts,
sketches,
seqan::hibf::iota_vector(config.number_of_user_bins),
union_estimation_timer,
rearrangement_timer);

EXPECT_TRUE(layout1 == layout2);
}

0 comments on commit fead38a

Please sign in to comment.