Skip to content

Commit

Permalink
Merge pull request #131 from eseiler/misc/insert_iterator
Browse files Browse the repository at this point in the history
[MISC] Allow insert_iterator on std::vector
  • Loading branch information
eseiler authored Oct 13, 2023
2 parents 570264f + cf2af40 commit 3458567
Show file tree
Hide file tree
Showing 6 changed files with 94 additions and 12 deletions.
8 changes: 2 additions & 6 deletions include/hibf/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,12 @@

#pragma once

#include <cinttypes> // for uint16_t, uint32_t, uint64_t, uint8_t
#include <cinttypes> // for uint32_t, uint8_t
#include <cstddef> // for size_t
#include <functional> // for function
#include <iosfwd> // for istream, ostream
#include <iterator> // for insert_iterator

#include <hibf/cereal/path.hpp> // IWYU pragma: keep
#include <hibf/contrib/robin_hood.hpp> // for unordered_flat_set
#include <hibf/misc/insert_iterator.hpp> // for insert_iterator
#include <hibf/platform.hpp>

#include <cereal/access.hpp> // for access
Expand All @@ -23,8 +21,6 @@
namespace seqan::hibf
{

using insert_iterator = std::insert_iterator<robin_hood::unordered_flat_set<uint64_t>>;

/*!\brief The configuration used to build an (H)IBF
* \ingroup hibf
*
Expand Down
86 changes: 86 additions & 0 deletions include/hibf/misc/insert_iterator.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
// --------------------------------------------------------------------------------------------------
// Copyright (c) 2006-2022, Knut Reinert & Freie Universität Berlin
// Copyright (c) 2016-2022, Knut Reinert & MPI für molekulare Genetik
// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
// shipped with this file and also available at: https://github.com/seqan/raptor/blob/main/LICENSE.md
// --------------------------------------------------------------------------------------------------

#pragma once

#include <algorithm> // for max
#include <cassert> // for assert
#include <cinttypes> // for uint64_t
#include <cstddef> // for ptrdiff_t
#include <iterator> // for output_iterator_tag
#include <utility> // for addressof, move
#include <vector> // for vector

#include <hibf/contrib/robin_hood.hpp> // for unordered_flat_set
#include <hibf/platform.hpp>

// IWYU pragma: private, include <hibf/config.hpp>

namespace seqan::hibf
{

class insert_iterator
{
public:
using iterator_category = std::output_iterator_tag;
using value_type = void;
using difference_type = ptrdiff_t;
using pointer = void;
using reference = void;

insert_iterator() = delete;
insert_iterator(insert_iterator const &) = default;
insert_iterator(insert_iterator &&) = default;
insert_iterator & operator=(insert_iterator const &) = default;
insert_iterator & operator=(insert_iterator &&) = default;
~insert_iterator() = default;

explicit constexpr insert_iterator(robin_hood::unordered_flat_set<uint64_t> & set) :
set{std::addressof(set)},
is_set{true}
{}

explicit constexpr insert_iterator(std::vector<uint64_t> & vec) : vec{std::addressof(vec)}, is_set{false}
{}

insert_iterator & operator=(uint64_t const value) noexcept
{
if (is_set)
{
assert(set != nullptr);
set->emplace(std::move(value));
}
else
{
assert(vec != nullptr);
vec->emplace_back(std::move(value));
}
return *this;
}

[[nodiscard]] constexpr insert_iterator & operator*() noexcept
{
return *this;
}

constexpr insert_iterator & operator++() noexcept
{
return *this;
}

constexpr insert_iterator operator++(int) noexcept
{
return *this;
}

private:
robin_hood::unordered_flat_set<uint64_t> * set{nullptr};
std::vector<uint64_t> * vec{nullptr};
bool is_set{false};
};

} // namespace seqan::hibf
2 changes: 1 addition & 1 deletion src/build/compute_kmers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ void compute_kmers(robin_hood::unordered_flat_set<uint64_t> & kmers,
{
timer<concurrent::no> local_user_bin_io_timer{};
local_user_bin_io_timer.start();
data.config.input_fn(record.idx, std::inserter(kmers, kmers.begin()));
data.config.input_fn(record.idx, insert_iterator{kmers});
local_user_bin_io_timer.stop();
data.user_bin_io_timer += local_user_bin_io_timer;
}
Expand Down
4 changes: 2 additions & 2 deletions src/build/insert_into_ibf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,11 @@ void insert_into_ibf(build_data const & data,
seqan::hibf::interleaved_bloom_filter & ibf)
{
auto const bin_index = seqan::hibf::bin_index{static_cast<size_t>(record.storage_TB_id)};
robin_hood::unordered_flat_set<uint64_t> values;
std::vector<uint64_t> values;

timer<concurrent::no> local_user_bin_io_timer{};
local_user_bin_io_timer.start();
data.config.input_fn(record.idx, std::inserter(values, values.begin()));
data.config.input_fn(record.idx, insert_iterator{values});
local_user_bin_io_timer.stop();
data.user_bin_io_timer += local_user_bin_io_timer;

Expand Down
4 changes: 2 additions & 2 deletions src/interleaved_bloom_filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ size_t max_bin_size(config & configuration)
for (size_t i = 0u; i < configuration.number_of_user_bins; ++i)
{
kmers.clear();
configuration.input_fn(i, std::inserter(kmers, kmers.begin()));
configuration.input_fn(i, insert_iterator{kmers});

#pragma omp critical
max_size = std::max(max_size, kmers.size());
Expand All @@ -81,7 +81,7 @@ interleaved_bloom_filter::interleaved_bloom_filter(config & configuration) :
for (size_t i = 0u; i < configuration.number_of_user_bins; ++i)
{
kmers.clear();
configuration.input_fn(i, std::inserter(kmers, kmers.begin()));
configuration.input_fn(i, insert_iterator{kmers});

for (uint64_t const hash : kmers)
emplace(hash, seqan::hibf::bin_index{i});
Expand Down
2 changes: 1 addition & 1 deletion src/sketch/compute_sketches.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ void compute_sketches(config const & config,
seqan::hibf::sketch::hyperloglog sketch(config.sketch_bits);

kmers.clear();
config.input_fn(i, std::inserter(kmers, kmers.begin()));
config.input_fn(i, insert_iterator{kmers});

for (auto k_hash : kmers)
sketch.add(k_hash);
Expand Down

1 comment on commit 3458567

@vercel
Copy link

@vercel vercel bot commented on 3458567 Oct 13, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Successfully deployed to the following URLs:

hibf – ./

hibf-git-main-seqan.vercel.app
hibf-seqan.vercel.app
hibf.vercel.app

Please sign in to comment.