diff --git a/include/hibf/interleaved_bloom_filter.hpp b/include/hibf/interleaved_bloom_filter.hpp index e8295edf..e7a28f91 100644 --- a/include/hibf/interleaved_bloom_filter.hpp +++ b/include/hibf/interleaved_bloom_filter.hpp @@ -25,9 +25,9 @@ #include // for cereal_archive #include // for config +#include -#include // for CEREAL_SERIALIZE_FUNCTION_NAME -#include // for bit_vector +#include // for CEREAL_SERIALIZE_FUNCTION_NAME namespace seqan::hibf { @@ -138,11 +138,11 @@ struct bin_index * For example, calls to `emplace` from multiple threads are safe if `thread_1` accesses bins 0-63, `thread_2` bins * 64-127, and so on. */ -class interleaved_bloom_filter +class interleaved_bloom_filter : private seqan::hibf::bit_vector { private: //!\brief The underlying datatype to use. - using data_type = sdsl::bit_vector; + using base_t = seqan::hibf::bit_vector; //!\brief The number of bins specified by the user. size_t bins{}; @@ -156,8 +156,6 @@ class interleaved_bloom_filter size_t bin_words{}; //!\brief The number of hash functions. size_t hash_funs{}; - //!\brief The bitvector. - data_type data{}; //!\brief Precalculated seeds for multiplicative hashing. We use large irrational numbers for a uniform hashing. static constexpr std::array hash_seeds{13572355802537770549ULL, // 2**64 / (e/2) 13043817825332782213ULL, // 2**64 / sqrt(2) @@ -276,7 +274,7 @@ class interleaved_bloom_filter for (size_t offset = 0, i = 0; i < bin_size_; offset += technical_bins, ++i) for (auto && bin : bin_range) - data[bin.value + offset] = 0; + (*this)[bin.value + offset] = 0; } /*!\brief Increases the number of bins stored in the Interleaved Bloom Filter. @@ -371,45 +369,14 @@ class interleaved_bloom_filter */ size_t bit_size() const noexcept { - return data.size(); + return base_t::size(); } //!\} /*!\name Comparison operators * \{ */ - /*!\brief Test for equality. - * \param[in] lhs A `seqan::hibf::interleaved_bloom_filter`. - * \param[in] rhs `seqan::hibf::interleaved_bloom_filter` to compare to. - * \returns `true` if equal, `false` otherwise. - */ - friend bool operator==(interleaved_bloom_filter const & lhs, interleaved_bloom_filter const & rhs) noexcept - { - return std::tie(lhs.bins, - lhs.technical_bins, - lhs.bin_size_, - lhs.hash_shift, - lhs.bin_words, - lhs.hash_funs, - lhs.data) - == std::tie(rhs.bins, - rhs.technical_bins, - rhs.bin_size_, - rhs.hash_shift, - rhs.bin_words, - rhs.hash_funs, - rhs.data); - } - - /*!\brief Test for inequality. - * \param[in] lhs A `seqan::hibf::interleaved_bloom_filter`. - * \param[in] rhs `seqan::hibf::interleaved_bloom_filter` to compare to. - * \returns `true` if unequal, `false` otherwise. - */ - friend bool operator!=(interleaved_bloom_filter const & lhs, interleaved_bloom_filter const & rhs) noexcept - { - return !(lhs == rhs); - } + constexpr bool operator==(interleaved_bloom_filter const &) const = default; //!\} /*!\name Access @@ -422,16 +389,7 @@ class interleaved_bloom_filter * * \noapi{The exact representation of the data is implementation defined.} */ - constexpr data_type & raw_data() noexcept - { - return data; - } - - //!\copydoc raw_data() - constexpr data_type const & raw_data() const noexcept - { - return data; - } + using base_t::data; //!\} /*!\cond DEV @@ -450,127 +408,11 @@ class interleaved_bloom_filter archive(hash_shift); archive(bin_words); archive(hash_funs); - archive(data); + archive(cereal::base_class(this)); } //!\endcond }; -/*!\brief A bitvector representing the result of a call to `bulk_contains` of the seqan::hibf::interleaved_bloom_filter. - * \ingroup ibf - */ -class binning_bitvector -{ -private: - //!\brief The underlying datatype to use. - using data_type = sdsl::bit_vector; - //!\brief The bitvector. - data_type data{}; - - friend class membership_agent_type; - -public: - /*!\name Constructors, destructor and assignment - * \{ - */ - binning_bitvector() = default; //!< Defaulted. - binning_bitvector(binning_bitvector const &) = default; //!< Defaulted. - binning_bitvector & operator=(binning_bitvector const &) = default; //!< Defaulted. - binning_bitvector(binning_bitvector &&) noexcept = default; //!< Defaulted. - binning_bitvector & operator=(binning_bitvector &&) noexcept = default; //!< Defaulted. - ~binning_bitvector() = default; //!< Defaulted. - - //!\brief Construct with given size. - explicit binning_bitvector(size_t const size) : data(size) - {} - //!\} - - //!\brief Returns the number of elements. - size_t size() const noexcept - { - return data.size(); - } - - /*!\name Iterators - * \{ - */ - //!\brief Returns an iterator to the first element of the container. - auto begin() noexcept - { - return data.begin(); - } - - //!\copydoc begin() - auto begin() const noexcept - { - return data.begin(); - } - - //!\brief Returns an iterator to the element following the last element of the container. - auto end() noexcept - { - return data.end(); - } - - //!\copydoc end() - auto end() const noexcept - { - return data.end(); - } - //!\} - - /*!\name Comparison operators - * \{ - */ - //!\brief Test for equality. - friend bool operator==(binning_bitvector const & lhs, binning_bitvector const & rhs) noexcept - { - return lhs.data == rhs.data; - } - - //!\brief Test for inequality. - friend bool operator!=(binning_bitvector const & lhs, binning_bitvector const & rhs) noexcept - { - return !(lhs == rhs); - } - //!\} - - /*!\name Access - * \{ - */ - //!\brief Return the i-th element. - auto operator[](size_t const i) noexcept - { - assert(i < size()); - return data[i]; - } - - //!\copydoc operator[]() - auto operator[](size_t const i) const noexcept - { - assert(i < size()); - return data[i]; - } - - /*!\brief Provides direct, unsafe access to the underlying data structure. - * \returns A reference to an SDSL bitvector. - * - * \details - * - * \noapi{The exact representation of the data is implementation defined.} - */ - constexpr data_type & raw_data() noexcept - { - return data; - } - - //!\copydoc raw_data() - constexpr data_type const & raw_data() const noexcept - { - return data; - } - //!\} -}; - /*!\brief Manages membership queries for the seqan::hibf::interleaved_bloom_filter. * \attention Calling seqan::hibf::interleaved_bloom_filter::increase_bin_number_to on `ibf` invalidates the * membership_agent. @@ -591,7 +433,7 @@ class interleaved_bloom_filter::membership_agent_type std::array bloom_filter_indices; //!\brief Stores the result of bulk_contains(). - binning_bitvector result_buffer; + bit_vector result_buffer; public: /*!\name Constructors, destructor and assignment @@ -634,11 +476,11 @@ class interleaved_bloom_filter::membership_agent_type * Concurrent invocations of this function are not thread safe, please create a * seqan::hibf::interleaved_bloom_filter::membership_agent_type for each thread. */ - [[nodiscard]] binning_bitvector const & bulk_contains(size_t const value) & noexcept; + [[nodiscard]] bit_vector const & bulk_contains(size_t const value) & noexcept; // `bulk_contains` cannot be called on a temporary, since the object the returned reference points to // is immediately destroyed. - [[nodiscard]] binning_bitvector const & bulk_contains(size_t const value) && noexcept = delete; + [[nodiscard]] bit_vector const & bulk_contains(size_t const value) && noexcept = delete; //!\} }; @@ -660,7 +502,7 @@ inline interleaved_bloom_filter::membership_agent_type interleaved_bloom_filter: * based on the k-mer counts. * * The seqan::hibf::counting_vector offers an easy way to add up the individual - * seqan::hibf::binning_bitvector by offering an `+=` operator. + * seqan::hibf::bit_vector by offering an `+=` operator. * * The `value_t` template parameter should be chosen in a way that no overflow occurs if all calls to `bulk_contains` * return a hit for a specific bin. For example, `uint8_t` will suffice when processing short Illumina reads, whereas @@ -691,9 +533,9 @@ class counting_vector : public std::vector using base_t::base_t; //!\} - /*!\brief Bin-wise adds the bits of a seqan::hibf::binning_bitvector. - * \param binning_bitvector The seqan::hibf::binning_bitvector. - * \attention The counting_vector must be at least as big as `binning_bitvector`. + /*!\brief Bin-wise adds the bits of a seqan::hibf::bit_vector. + * \param bit_vector The seqan::hibf::bit_vector. + * \attention The counting_vector must be at least as big as `bit_vector`. * * \details * @@ -701,9 +543,9 @@ class counting_vector : public std::vector * * \include test/snippet/ibf/counting_vector.cpp */ - counting_vector & operator+=(binning_bitvector const & binning_bitvector) + counting_vector & operator+=(bit_vector const & bit_vector) { - for_each_set_bin(binning_bitvector, + for_each_set_bin(bit_vector, [this](size_t const bin) { ++(*this)[bin]; @@ -711,13 +553,13 @@ class counting_vector : public std::vector return *this; } - /*!\brief Bin-wise subtracts the bits of a seqan::hibf::binning_bitvector. - * \param binning_bitvector The seqan::hibf::binning_bitvector. - * \attention The counting_vector must be at least as big as `binning_bitvector`. + /*!\brief Bin-wise subtracts the bits of a seqan::hibf::bit_vector. + * \param bit_vector The seqan::hibf::bit_vector. + * \attention The counting_vector must be at least as big as `bit_vector`. */ - counting_vector & operator-=(binning_bitvector const & binning_bitvector) + counting_vector & operator-=(bit_vector const & bit_vector) { - for_each_set_bin(binning_bitvector, + for_each_set_bin(bit_vector, [this](size_t const bin) { assert((*this)[bin] > 0); @@ -767,13 +609,13 @@ class counting_vector : public std::vector } private: - //!\brief Enumerates all bins of a seqan::hibf::binning_bitvector. + //!\brief Enumerates all bins of a seqan::hibf::bit_vector. template - void for_each_set_bin(binning_bitvector const & binning_bitvector, on_bin_fn_t && on_bin_fn) + void for_each_set_bin(bit_vector const & bit_vector, on_bin_fn_t && on_bin_fn) { - assert(this->size() >= binning_bitvector.size()); // The counting vector may be bigger than what we need. - size_t const words = (binning_bitvector.size() + 63u) >> 6; - uint64_t const * const bitvector_raw = binning_bitvector.raw_data().data(); + assert(this->size() >= bit_vector.size()); // The counting vector may be bigger than what we need. + size_t const words = (bit_vector.size() + 63u) >> 6; + uint64_t const * const bitvector_raw = bit_vector.data(); // Jump to the next 1 and return the number of places jumped in the bit_sequence auto jump_to_next_1bit = [](uint64_t & x) diff --git a/include/hibf/misc/bit_vector.hpp b/include/hibf/misc/bit_vector.hpp new file mode 100644 index 00000000..29bffe19 --- /dev/null +++ b/include/hibf/misc/bit_vector.hpp @@ -0,0 +1,946 @@ +// SPDX-FileCopyrightText: 2006-2023, Knut Reinert & Freie Universität Berlin +// SPDX-FileCopyrightText: 2016-2023, Knut Reinert & MPI für molekulare Genetik +// SPDX-License-Identifier: BSD-3-Clause + +/*!\file + * \brief Provides seqan::hibf::bit_vector. + * \author Rene Rahn + * \author Enrico Seiler + */ + +// Modified by Enrico Seiler : +// * To be a single header and non-template (was CRTP) +// * Changed default allocator to 64-byte aligned allocator +// * Changed `bit_vector operator~()` to be auto-vectorizable +// * Changed `binary_transform_impl` to be auto-vectorizable +// * Replaced `seqan3::detail::bits_of` with `sizeof(chunk_type) * CHAR_BIT` +// * Changed `difference_type` to `size_type` in `operator[]` +// * Changed `1` to `1ULL` in `(1ULL << to_local_chunk_position(size()))` of `resize()` +// * Changed `bit_reference & operator=` to be in accordance with STL +// * Changed `resize` to do nothing when reducing size besides setting new size. + +#pragma once + +#include +#include +#include +#include + +#include +#include + +#include + +namespace seqan::hibf +{ + +/*!\brief An allocator aware bit vector. + * + * \details + * + * Implements a bit vector on the basis of a std::vector with `uint64_t` as value type. The bit vector can be + * dynamically resized and provides additional interfaces to apply efficient bit-operations on it. + * + * The reference type is a special proxy that provides access to a single bit. Note that it is not a real reference + * but can be converter to a bool or assigned from a bool. + */ +class bit_vector : + public std::vector>::template rebind_alloc> +{ +private: + //!\brief The allocator type. + using allocator_t = seqan::hibf::contrib::aligned_allocator; + + //!\brief The base type. + using base_t = std::vector::template rebind_alloc>; + + //!\brief The type of the underlying chunk of bits. + using chunk_type = uint64_t; + + /*!\brief The bit proxy returned as reference. + * + * \tparam is_const A bool that indicates a const proxy if the value is `true`, or a non-const proxy otherwise. + * + * \details + * + * This proxy is returned as a proxy for the reference type since a single bit cannot be addressed directly. + * This proxy allows all operations that can be used on a single bit and is also implicitly convertible to a bool. + * It cannot be default constructed and can only be instantiated with a particular bit from the bit vector and its + * associated classes. + */ + template + class bit_reference + { + private: + //!\brief Befriend the bit vector so it can instantiate this proxy with a particular position. + friend class bit_vector; + + //!\brief The const or non-const chunk type to be represented. + using maybe_const_chunk_type = std::conditional_t; + + maybe_const_chunk_type * _chunk{}; //!< The underlying chunk. + chunk_type _chunk_mask{}; //!< The mask selecting the bit for this chunk. + + /*!\brief Constructs the refernce with represented bit position within the container. + * + * \param[in] chunk A pointer to the chunk that contains the represented bit. + * \param[in] local_chunk_position The position of the bit within the chunk. + */ + constexpr bit_reference(maybe_const_chunk_type * chunk, size_type const local_chunk_position) noexcept : + _chunk{chunk}, + _chunk_mask{static_cast(1) << to_local_chunk_position(local_chunk_position)} + {} + + public: + /*!\name Constructors, destructor and assignment + * \{ + */ + bit_reference() = delete; //!< Deleted. + bit_reference(bit_reference const & other) = default; + bit_reference(bit_reference && other) = default; + bit_reference & operator=(bit_reference const & other) + { + return *this = bool(other); + } + bit_reference & operator=(bit_reference && other) + { + return *this = bool(other); + } + + /*!\brief Assigns a bit to the referenced bit. + * + * \param[in] bit The bit to set. + */ + constexpr bit_reference & operator=(bool const bit) noexcept + { + bit ? set() : clear(); + return *this; + } + + //!\overload + // Needed to model std::output_iterator, which requires the assignment to an const && version + // of the proxy. + constexpr bit_reference const & operator=(bool const bit) const noexcept + requires (!is_const) + { + bit ? set() : clear(); + return *this; + } + //!\} + + //!\brief Converts this proxy to a bool. + constexpr operator bool() const noexcept + { + return *_chunk & _chunk_mask; + } + + //!\brief Flips the referenced bit. + constexpr bit_reference & flip() noexcept + { + (*this) ? clear() : set(); + return *this; + } + + private: + //!\brief Sets the bit at the specific position. + constexpr void set() noexcept + { + *_chunk |= _chunk_mask; + } + + //!\brief Clears the bit at the specific position. + constexpr void clear() noexcept + { + *_chunk &= ~_chunk_mask; + } + }; + + /*!\brief A random access iterator over the bit vector. + * \tparam is_const A bool that indicates a const iterator if the value is `true`, or a non-const iterator otherwise. + */ + template + class bit_iterator + { + private: + //!\brief Befriend the bit_iterator types with different constness. + template + friend class bit_iterator; + + //!\brief The type of the chunk. + using maybe_const_chunk_type = std::conditional_t; + + maybe_const_chunk_type * _chunk{}; //!< The underlying chunk. + size_type _chunk_position{}; //!< The bit position within the chunk. + + public: + /*!\name Associated types + * \{ + */ + using value_type = bool; //!< The value type. + using reference = bit_reference; //!< The proxy type used as reference. + using pointer = void; //!\< The pointer type is void. + using difference_type = std::ptrdiff_t; //!< The difference type. + using iterator_category = std::random_access_iterator_tag; //!< The iterator category. + using iterator_concept = std::random_access_iterator_tag; //!< The iterator concept. + //!\} + + /*!\name Constructors, destructor and assignment + * \{ + */ + bit_iterator() = default; //!< Default. + + /*!\brief Constructs the iterator set to the begin of the given chunk. + * + * \param[in] chunk A pointer to the chunk that contains the represented bit. + */ + explicit constexpr bit_iterator(maybe_const_chunk_type * chunk) noexcept : _chunk{chunk}, _chunk_position{0} + {} + + /*!\brief Copies from a non-const iterator. + * + * \param[in] other The other non-const iterator to copy from. + */ + constexpr bit_iterator(bit_iterator const & other) noexcept + requires (is_const) + : _chunk{other._chunk}, _chunk_position{other._chunk_position} + {} + //!\} + + /*!\name Element access + * \{ + */ + //!\brief Returns the currently pointer-to element. + constexpr reference operator*() const noexcept + { + return reference{_chunk, _chunk_position}; + } + + //!\brief Returns the element `count` positions away from the current iterator position. + constexpr reference operator[](difference_type const count) const noexcept + { + return *((*this) + count); + } + //!\} + + /*!\name Arithmetic operator + * \{ + */ + //!\brief Increments the iterator by one. + constexpr bit_iterator & operator++() noexcept + { + _chunk += !static_cast(to_local_chunk_position(++_chunk_position)); + return *this; + } + + //!\brief Increments the iterator by one and returns the iterator before the increment. + constexpr bit_iterator operator++(int) noexcept + { + bit_iterator tmp{*this}; + ++(*this); + return tmp; + } + + //!\brief Advances the iterator by `count` many elements. + constexpr bit_iterator & operator+=(difference_type const count) noexcept + { + // chunk:| 0 | 1 | 2 | 3 | 4 | 5 | + // |--------|--------|--------|--------|-x------|--------| + // chunk_position:|01234567|01234567|01234567|01234567|01234567|01234567| + // global position:|01234567|89012345|67890123|45678901|23456789|01234567| + // |0 | 1 | 2 | 3 | |4 | + if (count < 0) + { + size_type updated_count = modulo_mask - to_local_chunk_position(_chunk_position) - count; + _chunk_position = modulo_mask - to_local_chunk_position(updated_count); + _chunk -= to_chunk_position(updated_count); + //(to_chunk_position(-count) + (old_chunk_position < _chunk_position)); + } + else + { + _chunk += to_chunk_position(to_local_chunk_position(_chunk_position) + count); + _chunk_position = to_local_chunk_position(_chunk_position + count); + } + + return *this; + } + + //!\brief Returns a new iterator advanced by `count` many elements. + constexpr bit_iterator operator+(difference_type const count) const noexcept + { + bit_iterator tmp{*this}; + return tmp += count; + } + + //!\brief Returns a new iterator advanced by `count` many elements. + friend constexpr bit_iterator operator+(difference_type const count, bit_iterator rhs) noexcept + { + return rhs + count; + } + + //!\brief Decrements the iterator by one. + constexpr bit_iterator & operator--() noexcept + { + _chunk -= !static_cast(to_local_chunk_position(--_chunk_position)); + return *this; + } + + //!\brief Decrements the iterator by one and returns the iterator before the decrement. + constexpr bit_iterator operator--(int) noexcept + { + bit_iterator tmp{*this}; + --(*this); + return tmp; + } + + //!\brief Advances the iterator by `count` many elements. + constexpr bit_iterator & operator-=(difference_type const count) noexcept + { + return *this += -count; + } + + //!\brief Returns a new iterator advances by `count` many elements. + constexpr bit_iterator operator-(difference_type const count) const noexcept + { + bit_iterator tmp{*this}; + return tmp -= count; + } + + //!\brief Returns the distance between `this` and the `rhs` iterator. + template + constexpr difference_type operator-(bit_iterator rhs) const noexcept + { + return ((_chunk - rhs._chunk) << division_mask) - // number of bits between chunks. + to_local_chunk_position(rhs._chunk_position) + // minus the first bits in rhs. + to_local_chunk_position(_chunk_position); // plus the first bits of the lhs + } + //!\} + + /*!\name Comparison operators + * \{ + */ + //!\brief Compares with another iterator. + template + bool operator==(bit_iterator const & rhs) const + { + return _chunk == rhs._chunk + && (to_local_chunk_position(_chunk_position) == to_local_chunk_position(rhs._chunk_position)); + } + + //!\brief Compares the two iterator by their chunk position and local chunk position. + template + std::strong_ordering operator<=>(bit_iterator const & rhs) const + { + if (std::strong_ordering order = _chunk <=> rhs._chunk; order == std::strong_ordering::equivalent) + return to_local_chunk_position(_chunk_position) <=> to_local_chunk_position(rhs._chunk_position); + else + return order; + } + //!\} + }; + +public: + /*!\name Associated types + * \{ + */ + //!\brief The iterator over the bits. + using iterator = bit_iterator; + //!\brief The const iterator over the bits. + using const_iterator = bit_iterator; + //!\brief The value type is `bool`. + using value_type = std::iter_value_t; + //!\brief The reference type which is implemented as a proxy. + using reference = std::iter_reference_t; + //!\brief The const reference type which is implemented as a proxy. + using const_reference = std::iter_reference_t; + //!\brief The size_type. + using size_type = size_t; + //!\brief The difference type. + using difference_type = std::iter_difference_t; + //!\brief The allocator type to use. + using allocator_type = allocator_t; + //!\} + +private: + //!\brief The number of bits represented in one chunk, e.g. 64. + static constexpr size_type chunk_size = sizeof(chunk_type) * CHAR_BIT; + //!\brief The mask used for the modulo operations using the bitwise and operator, e.g. & 63. + static constexpr size_type modulo_mask = chunk_size - 1u; + //!\brief The mask used for the division operations using bitwise shift operator, e.g. >> 6. + static constexpr size_type division_mask = std::countr_zero(chunk_size); + + //!\brief The number of elements. + size_type _size{}; + +public: + /*!\name Constructors, destructor and assignment + * \{ + */ + //!\brief The default constructor which optionally sets the allocator. + HIBF_CONSTEXPR_VECTOR bit_vector(allocator_type const & alloc = allocator_type{}) : base_t{alloc} + {} + HIBF_CONSTEXPR_VECTOR bit_vector(bit_vector const &) = default; //!< Default. + HIBF_CONSTEXPR_VECTOR bit_vector(bit_vector &&) = default; //!< Default. + HIBF_CONSTEXPR_VECTOR bit_vector & operator=(bit_vector const &) = default; //!< Default. + HIBF_CONSTEXPR_VECTOR bit_vector & operator=(bit_vector &&) = default; //!< Default. + HIBF_CONSTEXPR_VECTOR ~bit_vector() = default; //!< Default. + + /*!\brief Constructs the bit vector with `count` copies of elements with value `bit`. + * + * \param[in] count The number of elements to create the bit vector with. + * \param[in] bit The bit to set during initialisation. + * \param[in] alloc The allocator to use [optional]. + */ + HIBF_CONSTEXPR_VECTOR + bit_vector(size_type const count, bool const bit, allocator_type const & alloc = allocator_type{}) : base_t{alloc} + { + assign(count, bit); + } + + /*!\brief Constructs the container initialised with the elements in `list`. + * + * \param[in] list An initialiser list with the bits set. + * \param[in] alloc The allocator to use [optional]. + */ + HIBF_CONSTEXPR_VECTOR bit_vector(std::initializer_list list, + allocator_type const & alloc = allocator_type{}) : + base_t{alloc} + { + assign(list); + } + + /*!\brief Constructs the container with `count` default-inserted instances of `bool`. No copies are made. + * + * \param[in] count The number of elements to create the bit vector with. + * \param[in] alloc The allocator to use [optional]. + */ + HIBF_CONSTEXPR_VECTOR bit_vector(size_type const count, allocator_type const & alloc = allocator_type{}) : + bit_vector{count, bool{}, alloc} + {} + //!\} + + /*!\name Member functions + * \{ + */ + + /*!\brief Assigns values to the container. + * + * \tparam iterator_t The type of the iterator; must model std::input_iterator. + * \tparam sentinel_t The type of the sentinel; must model std::sentinel_for `iterator_t`. + * + * \param[in] first An first element to copy elements from. + * \param[in] last The end of the range to copy elements from. + * + * \details + * + * Replaces the contents with copies of the range `[first, last)'. The behaviour is undefined if either argument + * is an iterator to `*this`. + * + * All iterators, pointers and references to the elements of the container are invalidated. + * The past-the-end iterator is also invalidated. + * + * ### Exception + * + * If an exception is thrown this function has no effect (strong exception guarantee). + * + * ### Complexity + * + * Linear in distance between first and last. + */ + template sentinel_t> + requires std::assignable_from> + constexpr void assign(iterator_t first, sentinel_t last) + { + bit_vector tmp{}; // To ensure strong exception guarantee. + if constexpr (std::sized_sentinel_for) + tmp.reserve(std::ranges::distance(first, last)); + + std::ranges::copy(first, last, std::back_inserter(tmp)); + + // ----- no exception after this. + swap(tmp); + set_new_size(std::ranges::distance(begin(), end())); + } + + /*!\brief Assigns values to the container. + * + * \param[in] ilist The initialiser list with the elements to insert. + * + * \details + * + * Replaces the contents with the elements from the initializer list ilist. + * + * All iterators, pointers and references to the elements of the container are invalidated. + * The past-the-end iterator is also invalidated. + * + * ### Exception + * + * If an exception is thrown this function has no effect (strong exception guarantee). + * + * ### Complexity + * + * Linear in `ilist.size()`. + */ + constexpr void assign(std::initializer_list const & ilist) + { + assign(std::ranges::begin(ilist), std::ranges::end(ilist)); + } + + /*!\brief Assigns values to the container. + * + * \param[in] count The new size of the container. + * \param[in] bit The value to initialize elements of the container with. + * + * \details + * + * Replaces the contents with `count` copies of value `bit`. + * + * All iterators, pointers and references to the elements of the container are invalidated. + * The past-the-end iterator is also invalidated. + * + * ### Exception + * + * If an exception is thrown this function has no effect (strong exception guarantee). + * + * ### Complexity + * + * Linear in count. + */ + HIBF_CONSTEXPR_VECTOR void assign(size_type const count, bool const bit) + { + resize(count, bit); + std::ranges::for_each(*as_base(), + [value = fill_chunk(bit)](chunk_type & chunk) + { + chunk = value; + }); + } + //!\} + + /*!\name Element access + * \{ + */ + //!\brief Access specified element. + HIBF_CONSTEXPR_VECTOR reference operator[](size_type const position) noexcept + { + assert(position < size()); + + return *std::ranges::next(begin(), position); + } + + //!\brief Access specified element. + HIBF_CONSTEXPR_VECTOR const_reference operator[](size_type const position) const noexcept + { + assert(position < size()); + + return *std::ranges::next(begin(), position); + } + + /*!\brief Access the last element. + * + * \returns A reference to the last element in the container. + * + * \details + * + * Calling back on an empty container causes underfined behaviour. + * + * ### Exception + * + * Throws nothing. + * + * ### Complexity + * + * Constant. + */ + HIBF_CONSTEXPR_VECTOR reference back() noexcept + { + assert(!empty()); // Calling on empty container is undefined behaviour. + + return (*this)[size() - 1u]; + } + + //!\overload + HIBF_CONSTEXPR_VECTOR const_reference back() const noexcept + { + assert(!empty()); // Calling on empty container is undefined behaviour. + + return (*this)[size() - 1u]; + } + + //!\brief Checks if all bits are set to `true`. + constexpr bool all() const noexcept + { + constexpr chunk_type mask = ~static_cast(0); + return std::ranges::all_of(*as_base(), + [](chunk_type const & chunk) + { + return chunk == mask; + }); + } + + //!\brief Checks if any bit is set to `true`. + constexpr bool any() const noexcept + { + constexpr chunk_type mask = static_cast(0); + return std::ranges::any_of(*as_base(), + [](chunk_type const & chunk) + { + return chunk | mask; + }); + } + + //!\brief Checks if none of the bits is set to `true`. + constexpr bool none() const noexcept + { + return !any(); + } + //!\} + + /*!\name Capacity + * \{ + */ + //!\brief Returns the number of elements. + constexpr size_type size() const noexcept + { + return _size; + } + + //!\brief Checks wether the container is empty. + constexpr bool empty() const noexcept + { + return _size == 0u; + } + + //!\brief Returns the capacity. + HIBF_CONSTEXPR_VECTOR size_type capacity() const noexcept + { + return base_t::capacity() * chunk_size; + } + + /*!\brief Reserves storage. + * + * \param[in] new_capacity The new capacity of the bit vector. + * + * \details + * + * Increase the capacity of the vector to a value that's greater or equal to new_capacity. If new_capacity is + * greater than the current capacity(), new storage is allocated, otherwise the method does nothing. + * reserve() does not change the size of the vector. If new_capacity is greater than capacity(), all iterators, + * including the past-the-end iterator, and all references to the elements are invalidated. Otherwise, no + * iterators or references are invalidated. + * + * ### Exceptions + * + * std::length_error if `new_capacity > max_size()` or any exception thrown by allocator_t::allocate(). + * If an exception is thrown this function has no effect (strong exception guarantee). + */ + HIBF_CONSTEXPR_VECTOR void reserve(size_type const new_capacity) + { + base_t::reserve(host_size_impl(new_capacity)); + } + //!\} + + /*!\name Modifiers + * \{ + */ + /*!\brief Adds an element to the end. + * + * \param bit The bit to add to the end. + * + * \details + * + * Appends the given element value to the end of the container. + * + * If the new size() is greater than capacity() then all iterators and references + * (including the past-the-end iterator) are invalidated. + * Otherwise only the past-the-end iterator is invalidated. + * + * ### Exception + * + * If an exception is thrown (which can be due to allocator_t::allocate(), this function has no effect + * (strong exception guarantee). + * + * ### Complexity + * + * Amortised constant. + */ + HIBF_CONSTEXPR_VECTOR void push_back(bool bit) + { + size_t const new_size = size() + 1u; + resize(new_size); + // ---- no exception after this point. + set_new_size(new_size); + back() = bit; // set the bit. + } + + //!\brief Changes the number of elements stored, where additional copies of `bit` are appended. + HIBF_CONSTEXPR_VECTOR void resize(size_type const count, bool const bit = {}) + { + base_t::resize(host_size_impl(count)); + + size_t const old_size = size(); + set_new_size(count); + if (size() > old_size) // If bit is true and we increase the size. + { + if (bit) + std::ranges::fill(begin() + old_size, end(), bit); + } + } + + //!\brief Performs binary AND between `this` and `rhs`. + constexpr bit_vector & operator&=(bit_vector const & rhs) noexcept + { + assert(rhs.size() == size()); + + return binary_transform_impl(rhs, + [](auto const & left_chunk, auto const & right_chunk) + { + return left_chunk & right_chunk; + }); + } + + //!\brief Performs binary OR between `this` and `rhs`. + constexpr bit_vector & operator|=(bit_vector const & rhs) noexcept + { + assert(rhs.size() == size()); + + return binary_transform_impl(rhs, + [](auto const & left_chunk, auto const & right_chunk) + { + return left_chunk | right_chunk; + }); + } + + //!\brief Performs binary XOR between `this` and `rhs`. + constexpr bit_vector & operator^=(bit_vector const & rhs) noexcept + { + assert(rhs.size() == size()); + + return binary_transform_impl(rhs, + [](auto const & left_chunk, auto const & right_chunk) + { + return left_chunk ^ right_chunk; + }); + } + + //!\brief Performs binary NOT. + HIBF_CONSTEXPR_VECTOR bit_vector operator~() const noexcept + { + bit_vector tmp(size()); + + tmp.binary_transform_impl(*this, + [](auto const &, auto const & right_chunk) + { + return ~right_chunk; + }); + + return tmp; + } + + //!\brief Performs binary AND. + HIBF_CONSTEXPR_VECTOR friend bit_vector operator&(bit_vector lhs, bit_vector const & rhs) noexcept + { + return lhs &= rhs; + } + + //!\brief Performs binary OR. + HIBF_CONSTEXPR_VECTOR friend bit_vector operator|(bit_vector lhs, bit_vector const & rhs) noexcept + { + return lhs |= rhs; + } + + //!\brief Performs binary XOR. + HIBF_CONSTEXPR_VECTOR friend bit_vector operator^(bit_vector lhs, bit_vector const & rhs) noexcept + { + return lhs ^= rhs; + } + + //!\brief Computes the bitwise `a &= ~b` operator without an additional copy. + constexpr bit_vector & and_not(bit_vector const & rhs) noexcept + { + assert(rhs.size() == size()); + + return binary_transform_impl(rhs, + [](auto const & left_chunk, auto const & right_chunk) + { + return left_chunk & ~right_chunk; + }); + } + + //!\brief Flips all bits in-place. + constexpr bit_vector & flip() noexcept + { + std::ranges::for_each(*as_base(), + [](chunk_type & chunk) + { + chunk = ~chunk; + }); + return *this; + } + + //!\brief Flips the bit at the given position. + HIBF_CONSTEXPR_VECTOR bit_vector & flip(size_type position) + { + using namespace std::literals; + + if (position >= size()) + throw std::out_of_range{"The given posisiton "s + std::to_string(position) + " is out of the range [0, "s + + std::to_string(size()) + ")!"s}; + + (*this)[position].flip(); + return *this; + } + + //!\brief Exchanges the contents of the container with those of others. + HIBF_CONSTEXPR_VECTOR void swap(bit_vector & other) noexcept + { + base_t::swap(*other.as_base()); + std::swap(_size, other._size); + } + //!\} + + /*!\name Iterators + * \{ + */ + //!\brief Returns an iterator to the beginning. + HIBF_CONSTEXPR_VECTOR iterator begin() noexcept + { + return iterator{base_t::data()}; + } + + //!\overload + HIBF_CONSTEXPR_VECTOR const_iterator begin() const noexcept + { + return const_iterator{base_t::data()}; + } + + //!\overload + HIBF_CONSTEXPR_VECTOR const_iterator cbegin() const noexcept + { + return begin(); + } + + //!\brief Returns an iterator to the end. + HIBF_CONSTEXPR_VECTOR iterator end() noexcept + { + return begin() + size(); + } + + //!\overload + HIBF_CONSTEXPR_VECTOR const_iterator end() const noexcept + { + return begin() + size(); + } + + //!\overload + HIBF_CONSTEXPR_VECTOR const_iterator cend() const noexcept + { + return end(); + } + //!\} + + [[gnu::always_inline]] inline HIBF_CONSTEXPR_VECTOR chunk_type * data() noexcept + { + return std::assume_aligned(base_t::data()); + } + + [[gnu::always_inline]] inline HIBF_CONSTEXPR_VECTOR chunk_type const * data() const noexcept + { + return std::assume_aligned(base_t::data()); + } + + /*!\name Serialisation + * \{ + */ + /*!\brief Saves this bit vector to the given output archive. + * + * \tparam output_archive_t The type of the output_archive; must model seqan::hibf::cereal_output_archive. + * + * \param[in, out] archive The archive to serialise this object to. + */ + template + void save(output_archive_t & archive) const + { + archive(cereal::base_class(this), _size); + } + + /*!\brief Loads this this bit vector from the given input archive. + * + * \tparam input_archive_t The type of the input_archive; must model seqan::hibf::cereal_input_archive. + * + * \param[in, out] archive The archive to serialise this object from. + */ + template + void load(input_archive_t & archive) + { + archive(cereal::base_class(this), _size); + } + //!\} + +private: + //!\brief Performs the binary bitwise-operation on the underlying chunks. + template + constexpr bit_vector & binary_transform_impl(bit_vector const & rhs, binary_operator_t && op) noexcept + { + chunk_type * const lhs_data = data(); + chunk_type const * const rhs_data = rhs.data(); + size_type const size = host_size_impl(this->size()); + + for (size_t i = 0; i < size; ++i) + lhs_data[i] = op(lhs_data[i], rhs_data[i]); + + return *this; + } + + //!\brief Computes the minimal size needed for the host vector. + //!\param[in] count The number of bits to allocate memory for. + constexpr size_type host_size_impl(size_type const count) const noexcept + { + return chunks_needed(count); + } + + //!\brief Sets the new size. + constexpr void set_new_size(size_type const new_size) noexcept + { + _size = new_size; + } + + //!\brief Casts `this` to its base class. + constexpr base_t const * as_base() const noexcept + { + return static_cast(this); + } + + //!\overload + constexpr base_t * as_base() noexcept + { + return static_cast(this); + } + + //!\brief Returns how many chunks are needed to store `count` many elements. + constexpr size_type chunks_needed(size_type const count) const noexcept + { + return (count + 63u) >> 6; // ceil(count/64) + } + + //!\brief Returns a new chunk filled with the given bit. + constexpr chunk_type fill_chunk(bool const bit) const noexcept + { + return (bit) ? ~chunk_type{} : chunk_type{}; + } + + //!\brief Converts the position to the local position within the chunk. + static constexpr size_type to_local_chunk_position(size_type const position) noexcept + { + return position & modulo_mask; // e.g. position % 64 + } + + //!\brief Converts the position to the chunk position. + static constexpr size_type to_chunk_position(size_type const position) noexcept + { + return position >> division_mask; // e.g. position / 64 + } +}; + +} // namespace seqan::hibf diff --git a/include/hibf/misc/print.hpp b/include/hibf/misc/print.hpp index 3a718d5a..e3e045b8 100644 --- a/include/hibf/misc/print.hpp +++ b/include/hibf/misc/print.hpp @@ -8,7 +8,7 @@ #include // for cout, ostream #include // for vector -#include // for counting_vector, binning_bitvector +#include // for counting_vector, bit_vector namespace seqan::hibf { @@ -20,7 +20,7 @@ namespace seqan::hibf // A free function for `std::vector` might also cause problems. struct print_t { - void operator()(seqan::hibf::binning_bitvector const & vector, std::ostream & stream = std::cout) const; + void operator()(seqan::hibf::bit_vector const & vector, std::ostream & stream = std::cout) const; void operator()(seqan::hibf::counting_vector const & vector, std::ostream & stream = std::cout) const; void operator()(seqan::hibf::counting_vector const & vector, std::ostream & stream = std::cout) const; void operator()(seqan::hibf::counting_vector const & vector, std::ostream & stream = std::cout) const; diff --git a/include/hibf/platform.hpp b/include/hibf/platform.hpp index eb112a34..58b284f4 100644 --- a/include/hibf/platform.hpp +++ b/include/hibf/platform.hpp @@ -142,6 +142,13 @@ // Workarounds // ============================================================================ +//!\brief std::vector constexpr support. +#if defined(__cpp_lib_constexpr_vector) +# define HIBF_CONSTEXPR_VECTOR constexpr +#else +# define HIBF_CONSTEXPR_VECTOR +#endif + /*!\brief Workaround bogus memcpy errors in GCC 12. (Wrestrict and Wstringop-overflow) * \see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105545 */ diff --git a/src/interleaved_bloom_filter.cpp b/src/interleaved_bloom_filter.cpp index 2c4af247..90707fdc 100644 --- a/src/interleaved_bloom_filter.cpp +++ b/src/interleaved_bloom_filter.cpp @@ -40,7 +40,7 @@ interleaved_bloom_filter::interleaved_bloom_filter(seqan::hibf::bin_count bins_, hash_shift = std::countl_zero(bin_size_); bin_words = (bins + 63) >> 6; // = ceil(bins/64) technical_bins = bin_words << 6; // = bin_words * 64 - data = sdsl::bit_vector(technical_bins * bin_size_); + resize(technical_bins * bin_size_); } size_t max_bin_size(config & configuration) @@ -92,8 +92,8 @@ void interleaved_bloom_filter::emplace(size_t const value, bin_index const bin) { size_t idx = hash_and_fit(value, hash_seeds[i]); idx += bin.value; - assert(idx < data.size()); - data[idx] = 1; + assert(idx < size()); + (*this)[idx] = 1; }; } @@ -101,7 +101,7 @@ void interleaved_bloom_filter::clear(bin_index const bin) noexcept { assert(bin.value < bins); for (size_t idx = bin.value, i = 0; i < bin_size_; idx += technical_bins, ++i) - data[idx] = 0; + (*this)[idx] = 0; } void interleaved_bloom_filter::increase_bin_number_to(seqan::hibf::bin_count const new_bins_) @@ -122,10 +122,12 @@ void interleaved_bloom_filter::increase_bin_number_to(seqan::hibf::bin_count con size_t new_technical_bins = new_bin_words << 6; size_t new_bits = bin_size_ * new_technical_bins; - size_t idx_{new_bits}, idx{data.size()}; + size_t idx_{new_bits}, idx{size()}; size_t delta = new_technical_bins - technical_bins + 64; - data.resize(new_bits); + resize(new_bits); + + uint64_t * const ptr = data(); for (size_t i = idx_, j = idx; j > 0; i -= new_technical_bins, j -= technical_bins) { @@ -133,9 +135,9 @@ void interleaved_bloom_filter::increase_bin_number_to(seqan::hibf::bin_count con for (size_t ii = i - delta, jj = j - 64; stop && ii >= stop; ii -= 64, jj -= 64) { - uint64_t old = data.get_int(jj); - data.set_int(jj, 0); - data.set_int(ii, old); + uint64_t old = ptr[jj >> 6]; + ptr[jj >> 6] = 0ULL; + ptr[ii >> 6] = old; } } @@ -147,7 +149,7 @@ void interleaved_bloom_filter::increase_bin_number_to(seqan::hibf::bin_count con # pragma GCC diagnostic push # pragma GCC diagnostic ignored "-Wattributes" #endif // HIBF_COMPILER_IS_GCC -[[gnu::always_inline]] binning_bitvector const & +[[gnu::always_inline]] bit_vector const & interleaved_bloom_filter::membership_agent_type::bulk_contains(size_t const value) & noexcept { #if HIBF_COMPILER_IS_GCC @@ -174,93 +176,67 @@ interleaved_bloom_filter::membership_agent_type::bulk_contains(size_t const valu for (size_t i = 0; i < hash_funs; ++i) bloom_filter_indices[i] = ibf_ptr->hash_and_fit(value, ibf_ptr->hash_seeds[i]) >> 6; - uint64_t * const raw = result_buffer.raw_data().data(); // TODO: std::assume_aligned<64> once memory-aligned - uint64_t const * const ibf_data = ibf_ptr->data.data(); // TODO: std::assume_aligned<64> once memory-aligned + uint64_t * const raw = result_buffer.data(); + uint64_t const * const ibf_data = ibf_ptr->data(); std::memcpy(raw, ibf_data + bloom_filter_indices[0], sizeof(uint64_t) * bin_words); - // https://godbolt.org/z/1nbhvqeGj - // Having the loop inside is faster. // GCOVR_EXCL_START - switch (bin_words) + auto impl = [&]() { - case 1u: // 1 AND (64 bit) for (size_t i = 1; i < hash_funs; ++i) { uint64_t const * const ibf_raw = ibf_data + bloom_filter_indices[i]; - raw[0] &= ibf_raw[0]; + + if constexpr (extent == 0u) + { +#pragma omp simd + for (size_t i = 0; i < bin_words; ++i) + raw[i] &= ibf_raw[i]; + } + else if constexpr (extent == 2u || extent == 4u || extent == 8u) + { +#pragma omp simd + for (size_t i = 0; i < extent; ++i) + raw[i] &= ibf_raw[i]; + } + else + { + for (size_t i = 0; i < extent; ++i) + raw[i] &= ibf_raw[i]; + } } + }; + + // https://godbolt.org/z/rqaeWGGer + // Having the loop inside impl instead of around the switch/case is faster. + switch (bin_words) + { + case 1u: // 1 AND (64 bit) + impl.operator()<1u>(); break; case 2u: // 1 SSE4 instruction (128 bit) - for (size_t i = 1; i < hash_funs; ++i) - { - uint64_t const * const ibf_raw = ibf_data + bloom_filter_indices[i]; -#pragma omp simd - for (size_t batch = 0; batch < 2u; ++batch) - raw[batch] &= ibf_raw[batch]; - } + impl.operator()<2u>(); break; - case 3u: // 1 SSE4 instruction (128 bit) + 1 AND (64 bit) - for (size_t i = 1; i < hash_funs; ++i) - { - uint64_t const * const ibf_raw = ibf_data + bloom_filter_indices[i]; -#pragma omp simd - for (size_t batch = 0; batch < 3u; ++batch) - raw[batch] &= ibf_raw[batch]; - } + case 3u: // 3 AND (64 bit) + Loop Unroll + impl.operator()<3u>(); break; case 4u: // 1 AVX2 instruction (256 bit) - for (size_t i = 1; i < hash_funs; ++i) - { - uint64_t const * const ibf_raw = ibf_data + bloom_filter_indices[i]; -#pragma omp simd - for (size_t batch = 0; batch < 4u; ++batch) - raw[batch] &= ibf_raw[batch]; - } + impl.operator()<4u>(); break; - case 5u: // 1 AVX2 instruction (256 bit) + 1 AND (64 bit) - for (size_t i = 1; i < hash_funs; ++i) - { - uint64_t const * const ibf_raw = ibf_data + bloom_filter_indices[i]; -#pragma omp simd - for (size_t batch = 0; batch < 5u; ++batch) - raw[batch] &= ibf_raw[batch]; - } + case 5u: // 5 AND (64 bit) + Loop Unroll + impl.operator()<5u>(); break; - case 6u: // 1 AVX2 instruction (256 bit) + 1 SSE4 instruction (128 bit) - for (size_t i = 1; i < hash_funs; ++i) - { - uint64_t const * const ibf_raw = ibf_data + bloom_filter_indices[i]; -#pragma omp simd - for (size_t batch = 0; batch < 6u; ++batch) - raw[batch] &= ibf_raw[batch]; - } + case 6u: // 6 AND (64 bit) + Loop Unroll + impl.operator()<6u>(); break; - case 7u: // 1 AVX2 instruction (256 bit) + 1 SSE4 instruction (128 bit) + 1 AND (64 bit) - for (size_t i = 1; i < hash_funs; ++i) - { - uint64_t const * const ibf_raw = ibf_data + bloom_filter_indices[i]; -#pragma omp simd - for (size_t batch = 0; batch < 7u; ++batch) - raw[batch] &= ibf_raw[batch]; - } + case 7u: // 7 AND (64 bit) + Loop Unroll + impl.operator()<7u>(); break; case 8u: // 1 AVX512 instruction (512 bit) - for (size_t i = 1; i < hash_funs; ++i) - { - uint64_t const * const ibf_raw = ibf_data + bloom_filter_indices[i]; -#pragma omp simd - for (size_t batch = 0; batch < 8u; ++batch) - raw[batch] &= ibf_raw[batch]; - } + impl.operator()<8u>(); break; default: // Auto vectorize. Might create different versions. - for (size_t i = 1; i < hash_funs; ++i) - { - uint64_t const * const ibf_raw = ibf_data + bloom_filter_indices[i]; -#pragma omp simd - for (size_t batch = 0; batch < bin_words; ++batch) - raw[batch] &= ibf_raw[batch]; - } + impl(); } // GCOVR_EXCL_STOP diff --git a/src/misc/print.cpp b/src/misc/print.cpp index a5549b18..9dde501f 100644 --- a/src/misc/print.cpp +++ b/src/misc/print.cpp @@ -8,7 +8,7 @@ #include // for range_value_t, empty #include // for vector -#include // for counting_vector, binning_bitvector +#include // for counting_vector, bit_vector #include // for print_t namespace seqan::hibf @@ -55,7 +55,7 @@ void print_impl(vector_t const & vector, std::ostream & stream) stream << "]\n"; } -void print_t::operator()(seqan::hibf::binning_bitvector const & vector, std::ostream & stream) const +void print_t::operator()(seqan::hibf::bit_vector const & vector, std::ostream & stream) const { print_impl(vector, stream); } diff --git a/test/documentation/hibf_doxygen_cfg.in b/test/documentation/hibf_doxygen_cfg.in index aece7f16..66e45151 100644 --- a/test/documentation/hibf_doxygen_cfg.in +++ b/test/documentation/hibf_doxygen_cfg.in @@ -341,7 +341,7 @@ INCLUDE_PATH = ${HIBF_DOXYGEN_SOURCE_DIR}/include INCLUDE_FILE_PATTERNS = PREDEFINED = "HIBF_DOXYGEN_ONLY(x)= x" \ ${HIBF_DOXYGEN_PREDEFINED_NDEBUG} \ - HIBF_WORKAROUND_LITERAL=constexpr + HIBF_CONSTEXPR_VECTOR=constexpr EXPAND_AS_DEFINED = SKIP_FUNCTION_MACROS = NO #--------------------------------------------------------------------------- diff --git a/test/header/CMakeLists.txt b/test/header/CMakeLists.txt index 0d991f81..d2575f1d 100644 --- a/test/header/CMakeLists.txt +++ b/test/header/CMakeLists.txt @@ -126,4 +126,4 @@ hibf_require_test () # note: hibf/version.hpp is one of the only header that is not required to have a hibf/core/platform.hpp include hibf_header_test (hibf "${HIBF_SOURCE_DIR}/include" "hibf/version.hpp|hibf/contrib|hibf/") -hibf_header_test (hibf_test "${HIBF_SOURCE_DIR}/test/include" "") +hibf_header_test (hibf_test "${HIBF_SOURCE_DIR}/test/include" "hibf/test/iterator_test_template.hpp") diff --git a/test/include/hibf/test/iterator_test_template.hpp b/test/include/hibf/test/iterator_test_template.hpp new file mode 100644 index 00000000..f3a5feaf --- /dev/null +++ b/test/include/hibf/test/iterator_test_template.hpp @@ -0,0 +1,782 @@ +// SPDX-FileCopyrightText: 2006-2023, Knut Reinert & Freie Universität Berlin +// SPDX-FileCopyrightText: 2016-2023, Knut Reinert & MPI für molekulare Genetik +// SPDX-License-Identifier: BSD-3-Clause + +// https://github.com/seqan/seqan3/blob/3.3.0/test/unit/range/iterator_test_template.hpp + +// GCOVR_EXCL_START +#pragma once + +#include + +#include +#include +#include + +#include + +template +struct iterator_fixture : public ::testing::Test +{ + /* Please provide the following members: + -------------------------------------------------------------------------------------------------------------------- + using iterator_tag = ... // Defines the iterator functionality you want to test. + // One of: + // std::input_iterator_tag + // std::forward_iterator_tag + // std::bidirectional_iterator_tag + // std::random_access_iterator_tag + // std::contiguous_iterator_tag + + static constexpr bool const_iterable = true/false; // Also test const_iterability. (const begin/end required) + + t1 test_range; // The range to test the iterators (begin/end required). + t2 expected_range; // Used to compare the iterator range with. + + -------------------------------------------------------------------------------------------------------------------- + Note: if the reference type of your iterator is not comparable via operator==() to the reference type of + `expected_range you can additionally specify a custom expect_eq function: + + template + static void expect_eq(A && begin_iterator_value, B && expected_range_value) + { + EXPECT_EQ(begin_iterator_value, expected_range_value); + } + */ +}; + +// Helper concept to check whether the test fixture has a member function expect_eq. +template +concept has_expect_equal_member_function = + requires (t & a) { + { + t::expect_eq(*std::ranges::begin(a.test_range), *std::ranges::begin(a.expected_range)) + } -> std::same_as; + }; + +// Delegates to the test fixture member function `expect_eq` if available and falls back to EXPECT_EQ otherwise. +template +void expect_iter_value_equal(A && a, B && b) +{ + if constexpr (has_expect_equal_member_function>) + iterator_fixture>::expect_eq(a, b); + else + EXPECT_EQ(a, b); +} + +template +void expect_iter_equal(it_t && it, rng_it_t && rng_it) +{ + expect_iter_value_equal(*it, *rng_it); +} + +// std c++20 input iterator aren't required to have an operator==(iterator_t, iterator_t), but if they have one we +// test the semantic +template +concept iterator_is_equality_comparable = + std::derived_from::iterator_tag, std::forward_iterator_tag> + || requires (iterator_fixture & fixture) { + typename std::ranges::iterator_t; + + requires requires (std::ranges::iterator_t & it) { + // we don't assume anything about the return type, this will be done in the tests + { + it == it + }; + }; + }; + +TYPED_TEST_SUITE_P(iterator_fixture); + +TYPED_TEST_P(iterator_fixture, concept_check) +{ + using iterator_type = decltype(std::ranges::begin(this->test_range)); + // Ensure that reference types are comparable if no equal_eq function was defined. + if constexpr (!has_expect_equal_member_function>) + { + static_assert(std::equality_comparable_withtest_range)), + decltype(*std::ranges::begin(this->expected_range))>, + "The reference types of begin_iterator and expected_range must be equality comparable. " + "If they are not, you may specify a custom void expect_eq(i1, r2) function in the fixture."); + } + + // input iterator must always be satisfied + static_assert(std::input_iteratorexpected_range))>, + "expected_range must have a begin member function and " + "the returned iterator must model std::input_iterator."); + EXPECT_TRUE(std::input_iterator); + + EXPECT_EQ(std::forward_iterator, + (std::derived_from)); + + EXPECT_EQ(std::bidirectional_iterator, + (std::derived_from)); + + EXPECT_EQ(std::random_access_iterator, + (std::derived_from)); + + EXPECT_EQ(std::contiguous_iterator, + (std::derived_from)); + + if constexpr (TestFixture::const_iterable) + { + using const_iterator_type = decltype(std::ranges::cbegin(this->test_range)); + EXPECT_TRUE(std::input_iterator); + + EXPECT_EQ(std::forward_iterator, + (std::derived_from)); + + EXPECT_EQ(std::bidirectional_iterator, + (std::derived_from)); + + EXPECT_EQ(std::random_access_iterator, + (std::derived_from)); + + EXPECT_EQ(std::contiguous_iterator, + (std::derived_from)); + } + + if (!std::derived_from) + { + FAIL() << "The iterator tag member type must be one of std::input_iterator_tag, " + << "std::forward_iterator_tag, std::bidirectional_iterator_tag, std::random_access_iterator_tag, or " + << "std::contiguous_iterator_tag."; + } +} + +TYPED_TEST_P(iterator_fixture, const_non_const_compatibility) +{ + if constexpr (TestFixture::const_iterable) + { + using const_iterator_type = decltype(std::ranges::cbegin(this->test_range)); + + [[maybe_unused]] const_iterator_type it{std::ranges::begin(this->test_range)}; + + const_iterator_type it2{}; + it2 = std::ranges::begin(this->test_range); + + if constexpr (iterator_is_equality_comparable) + { + EXPECT_EQ(it, it2); + } + } +} + +// --------------------------------------------------------------------------------------------------------------------- +// Input & Forward Iterator +// --------------------------------------------------------------------------------------------------------------------- + +TYPED_TEST_P(iterator_fixture, dereference) +{ + expect_iter_equal(std::ranges::begin(this->test_range), std::ranges::begin(this->expected_range)); + + if constexpr (TestFixture::const_iterable) + expect_iter_equal(std::ranges::cbegin(this->test_range), std::ranges::begin(this->expected_range)); +} + +TYPED_TEST_P(iterator_fixture, compare) +{ + EXPECT_FALSE(std::ranges::begin(this->test_range) == std::ranges::end(this->test_range)); + EXPECT_TRUE(std::ranges::begin(this->test_range) != std::ranges::end(this->test_range)); + EXPECT_FALSE(std::ranges::end(this->test_range) == std::ranges::begin(this->test_range)); + EXPECT_TRUE(std::ranges::end(this->test_range) != std::ranges::begin(this->test_range)); + + if constexpr (iterator_is_equality_comparable) + { + EXPECT_TRUE(std::ranges::begin(this->test_range) == std::ranges::begin(this->test_range)); + EXPECT_FALSE(std::ranges::begin(this->test_range) != std::ranges::begin(this->test_range)); + } + + if constexpr (TestFixture::const_iterable) + { + if constexpr (iterator_is_equality_comparable) + { + EXPECT_TRUE(std::ranges::cbegin(this->test_range) == std::ranges::cbegin(this->test_range)); + EXPECT_FALSE(std::ranges::cbegin(this->test_range) != std::ranges::cbegin(this->test_range)); + } + + EXPECT_FALSE(std::ranges::cbegin(this->test_range) == std::ranges::cend(this->test_range)); + EXPECT_TRUE(std::ranges::cbegin(this->test_range) != std::ranges::cend(this->test_range)); + EXPECT_FALSE(std::ranges::cend(this->test_range) == std::ranges::cbegin(this->test_range)); + EXPECT_TRUE(std::ranges::cend(this->test_range) != std::ranges::cbegin(this->test_range)); + + // (non-const lhs) + if constexpr (iterator_is_equality_comparable) + { + EXPECT_TRUE(std::ranges::begin(this->test_range) == std::ranges::cbegin(this->test_range)); + EXPECT_FALSE(std::ranges::begin(this->test_range) != std::ranges::cbegin(this->test_range)); + } + + EXPECT_FALSE(std::ranges::begin(this->test_range) == std::ranges::cend(this->test_range)); + EXPECT_TRUE(std::ranges::begin(this->test_range) != std::ranges::cend(this->test_range)); + EXPECT_FALSE(std::ranges::end(this->test_range) == std::ranges::cbegin(this->test_range)); + EXPECT_TRUE(std::ranges::end(this->test_range) != std::ranges::cbegin(this->test_range)); + + // (non-const rhs) + if constexpr (iterator_is_equality_comparable) + { + EXPECT_TRUE(std::ranges::cbegin(this->test_range) == std::ranges::begin(this->test_range)); + EXPECT_FALSE(std::ranges::cbegin(this->test_range) != std::ranges::begin(this->test_range)); + } + + EXPECT_FALSE(std::ranges::cend(this->test_range) == std::ranges::begin(this->test_range)); + EXPECT_TRUE(std::ranges::cend(this->test_range) != std::ranges::begin(this->test_range)); + EXPECT_FALSE(std::ranges::cbegin(this->test_range) == std::ranges::end(this->test_range)); + EXPECT_TRUE(std::ranges::cbegin(this->test_range) != std::ranges::end(this->test_range)); + } +} + +template +inline void move_forward_pre_test(it_begin_t && it_begin, it_sentinel_t && it_end, rng_t && rng) +{ + // pre-increment + auto rng_it = std::ranges::begin(rng); + auto rng_it_end = std::ranges::end(rng); + auto it = std::move(it_begin); + + EXPECT_NE(rng_it, rng_it_end); + EXPECT_NE(it, it_end); + + for (; true;) + { + // if it_begin_t is copy_constructible copy result, otherwise take it by reference (if move-only iterator) + using it_copy_or_reference_t = + std::conditional_t, std::remove_reference_t, it_begin_t &>; + it_copy_or_reference_t it_copy_or_reference = ++it; + ++rng_it; + + if (it == it_end || rng_it == rng_it_end) + break; + + expect_iter_equal(it_copy_or_reference, rng_it); + } + EXPECT_EQ(rng_it, rng_it_end); + EXPECT_EQ(it, it_end); +} + +template +inline void move_forward_post_test(it_begin_t && it_begin, it_sentinel_t && it_end, rng_t && rng) +{ + // post-increment + auto rng_it = std::ranges::begin(rng); + auto rng_it_end = std::ranges::end(rng); + auto it = std::move(it_begin); + + EXPECT_NE(rng_it, rng_it_end); + EXPECT_NE(it, it_end); + + static constexpr bool is_cpp20_input_iterator = std::same_as; + + if constexpr (is_cpp20_input_iterator) + { + // input iterator can return void for post-increment (expressed by std::weakly_incrementable) + EXPECT_TRUE(std::input_iterator); + // forward iterator require std::incrementable which requires `{ i++ } -> same_as;` + EXPECT_FALSE(std::forward_iterator); + } + + for (; it != it_end && rng_it != rng_it_end;) + { + expect_iter_equal(it, rng_it); + + if constexpr (!is_cpp20_input_iterator) + { + expect_iter_equal(it++, rng_it++); + } + else + { + it++; + rng_it++; + } + } + EXPECT_EQ(rng_it, rng_it_end); + EXPECT_EQ(it, it_end); +} + +TYPED_TEST_P(iterator_fixture, move_forward_pre) +{ + move_forward_pre_test(std::ranges::begin(this->test_range), + std::ranges::end(this->test_range), + this->expected_range); + + // iterate over it again + if constexpr (std::derived_from) + move_forward_pre_test(std::ranges::begin(this->test_range), + std::ranges::end(this->test_range), + this->expected_range); +} + +TYPED_TEST_P(iterator_fixture, move_forward_pre_const) +{ + if constexpr (TestFixture::const_iterable) + { + move_forward_pre_test(std::ranges::cbegin(this->test_range), + std::ranges::cend(this->test_range), + this->expected_range); + + // iterate over it again + if constexpr (std::derived_from) + move_forward_pre_test(std::ranges::cbegin(this->test_range), + std::ranges::cend(this->test_range), + this->expected_range); + } +} + +TYPED_TEST_P(iterator_fixture, move_forward_post) +{ + move_forward_post_test(std::ranges::begin(this->test_range), + std::ranges::end(this->test_range), + this->expected_range); + + // iterate over it again + if constexpr (std::derived_from) + move_forward_post_test(std::ranges::begin(this->test_range), + std::ranges::end(this->test_range), + this->expected_range); +} + +TYPED_TEST_P(iterator_fixture, move_forward_post_const) +{ + if constexpr (TestFixture::const_iterable) + { + move_forward_post_test(std::ranges::cbegin(this->test_range), + std::ranges::cend(this->test_range), + this->expected_range); + + // iterate over it again + if constexpr (std::derived_from) + move_forward_post_test(std::ranges::cbegin(this->test_range), + std::ranges::cend(this->test_range), + this->expected_range); + } +} + +// --------------------------------------------------------------------------------------------------------------------- +// Bidirectional Iterator +// --------------------------------------------------------------------------------------------------------------------- + +template +inline auto last_iterators(it_begin_t const & it_begin, it_sentinel_t const & it_end, rng_t && rng) +{ + it_begin_t it = it_begin; + auto rng_it = std::ranges::begin(rng); + + for (auto const rng_it_end = std::ranges::end(rng); + std::ranges::next(it) != it_end && std::ranges::next(rng_it) != rng_it_end; + ++it, ++rng_it) + ; + + return std::pair{it, rng_it}; +} + +template +inline void move_backward_pre_test(it_begin_t && it_begin, it_sentinel_t && it_end, rng_t && rng) +{ + // move to last position + auto && [last_it, rng_last_it] = last_iterators(it_begin, it_end, rng); + auto const rng_it_begin = std::ranges::begin(rng); + + // pre-decrement + auto it = last_it; + auto rng_it = rng_last_it; + for (; it != it_begin && rng_it != rng_it_begin; --rng_it) + { + expect_iter_equal(it, rng_it); + --it; + } + + expect_iter_equal(it_begin, rng_it_begin); +} + +template +inline void move_backward_post_test(it_begin_t && it_begin, it_sentinel_t && it_end, rng_t && rng) +{ + // move to last position + auto && [last_it, rng_last_it] = last_iterators(it_begin, it_end, rng); + auto const rng_it_begin = std::ranges::begin(rng); + + // post-decrement + auto it = last_it; + auto rng_it = rng_last_it; + for (; it != it_begin && rng_it != rng_it_begin; --rng_it) + { + expect_iter_equal(it--, rng_it); + } + + expect_iter_equal(it_begin, rng_it_begin); +} + +TYPED_TEST_P(iterator_fixture, move_backward_pre) +{ + if constexpr (std::derived_from) + { + move_backward_pre_test(std::ranges::begin(this->test_range), + std::ranges::end(this->test_range), + this->expected_range); + + if constexpr (TestFixture::const_iterable) + move_backward_pre_test(std::ranges::cbegin(this->test_range), + std::ranges::cend(this->test_range), + this->expected_range); + } +} + +TYPED_TEST_P(iterator_fixture, move_backward_post) +{ + if constexpr (std::derived_from) + { + move_backward_post_test(std::ranges::begin(this->test_range), + std::ranges::end(this->test_range), + this->expected_range); + + if constexpr (TestFixture::const_iterable) + move_backward_post_test(std::ranges::cbegin(this->test_range), + std::ranges::cend(this->test_range), + this->expected_range); + } +} + +// --------------------------------------------------------------------------------------------------------------------- +// Random Access Iterator +// --------------------------------------------------------------------------------------------------------------------- + +template +inline void jump_forward_test(it_begin_t && it_begin, rng_t && rng) +{ + size_t sz = std::ranges::distance(rng); + auto rng_it_begin = std::ranges::begin(rng); + + // Forward + for (size_t n = 0; n < sz; ++n) + { + auto it = it_begin; + expect_iter_equal(it += n, rng_it_begin + n); + expect_iter_equal(it, rng_it_begin + n); + } + + // Forward copy + for (size_t n = 0; n < sz; ++n) + { + expect_iter_equal(it_begin + n, rng_it_begin + n); + expect_iter_equal(it_begin, rng_it_begin); + } + + // Forward copy friend + for (size_t n = 0; n < sz; ++n) + { + expect_iter_equal(n + it_begin, rng_it_begin + n); + expect_iter_equal(it_begin, rng_it_begin); + } +} + +TYPED_TEST_P(iterator_fixture, jump_forward) +{ + if constexpr (std::derived_from) + { + jump_forward_test(std::ranges::begin(this->test_range), this->expected_range); + + if constexpr (TestFixture::const_iterable) + jump_forward_test(std::ranges::cbegin(this->test_range), std::as_const(this->expected_range)); + } +} + +template +inline void jump_backward_test(it_begin_t && it_begin, rng_t && rng) +{ + size_t sz = std::ranges::distance(rng); + auto rng_it_begin = std::ranges::begin(rng); + + auto pre_end_it = it_begin + (sz - 1); + auto pre_end_rng_it = rng_it_begin + (sz - 1); + + // Backward + for (size_t n = 0; n < sz; ++n) + { + auto it = pre_end_it; + expect_iter_equal(it -= n, pre_end_rng_it - n); + expect_iter_equal(it, pre_end_rng_it - n); + } + + // Backward copy + for (size_t n = 0; n < sz; ++n) + { + expect_iter_equal(pre_end_it - n, pre_end_rng_it - n); + expect_iter_equal(pre_end_it, pre_end_rng_it); + } + + // Backward copy it + (-n) + for (size_t n = 0; n < sz; ++n) + { + expect_iter_equal(pre_end_it + (-1 * n), pre_end_rng_it - n); + expect_iter_equal(pre_end_it, pre_end_rng_it); + } + + // Backward copy friend through (-n) + it + for (size_t n = 0; n < sz; ++n) + { + expect_iter_equal((-1 * n) + pre_end_it, pre_end_rng_it - n); + expect_iter_equal(pre_end_it, pre_end_rng_it); + } +} + +TYPED_TEST_P(iterator_fixture, jump_backward) +{ + if constexpr (std::derived_from) + { + jump_backward_test(std::ranges::begin(this->test_range), this->expected_range); + + if constexpr (TestFixture::const_iterable) + jump_backward_test(std::ranges::cbegin(this->test_range), std::as_const(this->expected_range)); + } +} + +template +inline void jump_random_test(it_begin_t && it_begin, rng_t && rng) +{ + size_t sz = std::ranges::distance(rng); + + for (size_t n = 0; n < sz; ++n) + expect_iter_value_equal(it_begin[n], rng[n]); +} + +TYPED_TEST_P(iterator_fixture, jump_random) +{ + if constexpr (std::derived_from) + { + jump_random_test(std::ranges::begin(this->test_range), this->expected_range); + + if constexpr (TestFixture::const_iterable) + jump_random_test(std::ranges::cbegin(this->test_range), std::as_const(this->expected_range)); + } +} + +template +inline void difference_test(iterator_t && it_begin, iterator_t && it_end, rng_t && rng) +{ + using difference_t = std::iter_difference_t; + difference_t size = std::ranges::distance(rng); + + for (difference_t n = 0; n <= size; ++n) + { + EXPECT_EQ(n, (it_begin + n) - it_begin); + EXPECT_EQ(-n, it_begin - (it_begin + n)); + } + + for (difference_t n = 0; n <= size; ++n) + { + EXPECT_EQ(n, it_end - (it_end - n)); + EXPECT_EQ(-n, (it_end - n) - it_end); + } +} + +TYPED_TEST_P(iterator_fixture, difference_common) +{ + static constexpr bool is_random_access = + std::derived_from; + + if constexpr (is_random_access) + { + auto it = std::ranges::begin(this->test_range); + auto sentinel = std::ranges::next(it, std::ranges::end(this->test_range)); + difference_test(it, sentinel, this->expected_range); + } + + if constexpr (is_random_access && TestFixture::const_iterable) + { + auto const_it = std::ranges::cbegin(this->test_range); + auto const_sentinel = std::ranges::next(const_it, std::ranges::cend(this->test_range)); + difference_test(const_it, const_sentinel, std::as_const(this->expected_range)); + } +} + +TYPED_TEST_P(iterator_fixture, difference_sentinel) +{ + if constexpr (std::derived_from) + { + using difference_t = std::ranges::range_difference_ttest_range)>; + + auto && begin = std::ranges::begin(this->test_range); + auto && end = std::ranges::end(this->test_range); + difference_t size = std::ranges::distance(this->expected_range); + + EXPECT_EQ(size, end - begin); + EXPECT_EQ(-size, begin - end); + + if constexpr (TestFixture::const_iterable) + { + auto && cbegin = std::ranges::cbegin(this->test_range); + auto && cend = std::ranges::cend(this->test_range); + + EXPECT_EQ(size, cend - cbegin); + EXPECT_EQ(-size, cbegin - cend); + + EXPECT_EQ(size, end - cbegin); + EXPECT_EQ(-size, cbegin - end); + + EXPECT_EQ(size, cend - begin); + EXPECT_EQ(-size, begin - cend); + } + } +} + +TYPED_TEST_P(iterator_fixture, compare_less) +{ + if constexpr (std::derived_from) + { + EXPECT_FALSE(std::ranges::begin(this->test_range) < std::ranges::begin(this->test_range)); + EXPECT_TRUE(std::ranges::begin(this->test_range) < std::ranges::next(std::ranges::begin(this->test_range))); + } + + if constexpr (std::derived_from + && TestFixture::const_iterable) + { + EXPECT_FALSE(std::ranges::cbegin(this->test_range) < std::ranges::cbegin(this->test_range)); + EXPECT_TRUE(std::ranges::cbegin(this->test_range) < std::ranges::next(std::ranges::cbegin(this->test_range))); + + // mix + EXPECT_FALSE(std::ranges::begin(this->test_range) < std::ranges::cbegin(this->test_range)); + EXPECT_TRUE(std::ranges::begin(this->test_range) < std::ranges::next(std::ranges::cbegin(this->test_range))); + EXPECT_FALSE(std::ranges::cbegin(this->test_range) < std::ranges::begin(this->test_range)); + EXPECT_TRUE(std::ranges::cbegin(this->test_range) < std::ranges::next(std::ranges::begin(this->test_range))); + } +} + +TYPED_TEST_P(iterator_fixture, compare_greater) +{ + if constexpr (std::derived_from) + { + EXPECT_FALSE(std::ranges::begin(this->test_range) > std::ranges::begin(this->test_range)); + EXPECT_FALSE(std::ranges::begin(this->test_range) > std::ranges::next(std::ranges::begin(this->test_range))); + } + + if constexpr (std::derived_from + && TestFixture::const_iterable) + { + EXPECT_FALSE(std::ranges::cbegin(this->test_range) > std::ranges::cbegin(this->test_range)); + EXPECT_FALSE(std::ranges::cbegin(this->test_range) > std::ranges::next(std::ranges::cbegin(this->test_range))); + + // mix + EXPECT_FALSE(std::ranges::begin(this->test_range) > std::ranges::cbegin(this->test_range)); + EXPECT_FALSE(std::ranges::begin(this->test_range) > std::ranges::next(std::ranges::cbegin(this->test_range))); + EXPECT_FALSE(std::ranges::cbegin(this->test_range) > std::ranges::begin(this->test_range)); + EXPECT_FALSE(std::ranges::cbegin(this->test_range) > std::ranges::next(std::ranges::begin(this->test_range))); + } +} + +TYPED_TEST_P(iterator_fixture, compare_leq) +{ + if constexpr (std::derived_from) + { + EXPECT_TRUE(std::ranges::begin(this->test_range) <= std::ranges::begin(this->test_range)); + EXPECT_TRUE(std::ranges::begin(this->test_range) <= std::ranges::next(std::ranges::begin(this->test_range))); + } + + if constexpr (std::derived_from + && TestFixture::const_iterable) + { + EXPECT_TRUE(std::ranges::cbegin(this->test_range) <= std::ranges::cbegin(this->test_range)); + EXPECT_TRUE(std::ranges::cbegin(this->test_range) <= std::ranges::next(std::ranges::cbegin(this->test_range))); + + // mix + EXPECT_TRUE(std::ranges::begin(this->test_range) <= std::ranges::cbegin(this->test_range)); + EXPECT_TRUE(std::ranges::begin(this->test_range) <= std::ranges::next(std::ranges::cbegin(this->test_range))); + EXPECT_TRUE(std::ranges::cbegin(this->test_range) <= std::ranges::begin(this->test_range)); + EXPECT_TRUE(std::ranges::cbegin(this->test_range) <= std::ranges::next(std::ranges::begin(this->test_range))); + } +} + +TYPED_TEST_P(iterator_fixture, compare_geq) +{ + if constexpr (std::derived_from) + { + EXPECT_TRUE(std::ranges::begin(this->test_range) >= std::ranges::begin(this->test_range)); + EXPECT_FALSE(std::ranges::begin(this->test_range) >= std::ranges::next(std::ranges::begin(this->test_range))); + } + + if constexpr (std::derived_from + && TestFixture::const_iterable) + { + EXPECT_TRUE(std::ranges::cbegin(this->test_range) >= std::ranges::cbegin(this->test_range)); + EXPECT_FALSE(std::ranges::cbegin(this->test_range) >= std::ranges::next(std::ranges::cbegin(this->test_range))); + + // mix + EXPECT_TRUE(std::ranges::begin(this->test_range) >= std::ranges::cbegin(this->test_range)); + EXPECT_FALSE(std::ranges::begin(this->test_range) >= std::ranges::next(std::ranges::cbegin(this->test_range))); + EXPECT_TRUE(std::ranges::cbegin(this->test_range) >= std::ranges::begin(this->test_range)); + EXPECT_FALSE(std::ranges::cbegin(this->test_range) >= std::ranges::next(std::ranges::begin(this->test_range))); + } +} + +// --------------------------------------------------------------------------------------------------------------------- +// Contiguous Iterator +// --------------------------------------------------------------------------------------------------------------------- + +template +inline void address_difference_test(iterator_t it_begin, iterator_t it_end) +{ + // contiguous_iterator only requires to_address of the iterator_t, but not sentinel_t. + using difference_t = std::iter_difference_t; + + difference_t const size = it_end - it_begin; + + for (difference_t i = 0u; i <= size; ++i) + { + iterator_t it = it_begin + i; + + if (it != it_end) + { + // https://eel.is/c++draft/iterator.concept.contiguous#2.1 + // to_address(a) == addressof(*a) + EXPECT_EQ(std::to_address(it), std::addressof(*it)); + } + + // https://eel.is/c++draft/iterator.concept.contiguous#2.2 + // to_address(b) == to_address(a) + D(b - a) + // to_address(c) == to_address(a) + D(c - a) + EXPECT_EQ(std::to_address(it), std::to_address(it_begin) + static_cast(it - it_begin)); + EXPECT_EQ(std::to_address(it), std::to_address(it_end) - static_cast(it_end - it)); + } +} + +TYPED_TEST_P(iterator_fixture, address_difference) +{ + static constexpr bool is_contiguous = + std::derived_from; + + if constexpr (is_contiguous) + { + auto it = std::ranges::begin(this->test_range); + auto sentinel_it = std::ranges::next(it, std::ranges::end(this->test_range)); + address_difference_test(it, sentinel_it); + } + + if constexpr (is_contiguous && TestFixture::const_iterable) + { + auto it = std::ranges::cbegin(this->test_range); + auto sentinel_it = std::ranges::next(it, std::ranges::cend(this->test_range)); + address_difference_test(it, sentinel_it); + } +} + +REGISTER_TYPED_TEST_SUITE_P(iterator_fixture, + concept_check, + const_non_const_compatibility, + dereference, + compare, + move_forward_pre, + move_forward_pre_const, + move_forward_post, + move_forward_post_const, + move_backward_pre, + move_backward_post, + jump_forward, + jump_backward, + jump_random, + difference_common, + difference_sentinel, + compare_less, + compare_greater, + compare_leq, + compare_geq, + address_difference); +// GCOVR_EXCL_END diff --git a/test/performance/ibf/CMakeLists.txt b/test/performance/ibf/CMakeLists.txt index 5d12a74c..69378a60 100644 --- a/test/performance/ibf/CMakeLists.txt +++ b/test/performance/ibf/CMakeLists.txt @@ -2,5 +2,5 @@ # SPDX-FileCopyrightText: 2016-2023, Knut Reinert & MPI für molekulare Genetik # SPDX-License-Identifier: BSD-3-Clause -hibf_benchmark (binning_bitvector_benchmark.cpp) +hibf_benchmark (bit_vector_benchmark.cpp) hibf_benchmark (interleaved_bloom_filter_benchmark.cpp) diff --git a/test/performance/ibf/binning_bitvector_benchmark.cpp b/test/performance/ibf/binning_bitvector_benchmark.cpp deleted file mode 100644 index 9d499918..00000000 --- a/test/performance/ibf/binning_bitvector_benchmark.cpp +++ /dev/null @@ -1,151 +0,0 @@ -// SPDX-FileCopyrightText: 2006-2023, Knut Reinert & Freie Universität Berlin -// SPDX-FileCopyrightText: 2016-2023, Knut Reinert & MPI für molekulare Genetik -// SPDX-License-Identifier: BSD-3-Clause - -#include // for State, DoNotOptimize, Benchmark, BENCHMARK_DEFINE_F, BENCHMARK_... - -#include // for all_of, copy, __all_of_fn -#include // for uint64_t -#include // for size_t -#include // for identity -#include // for allocator, vector - -#include // for binning_bitvector - -#include // for operator- - -using bitvector_t = seqan::hibf::binning_bitvector; - -static void arguments(benchmark::internal::Benchmark * b) -{ - // 1,024 bits (1 KiB) - b->Args({1024, 1024}); // No bit set - b->Args({1024, 0}); // First bit set -#if 0 - b->Args({1024, 64}); // First bit in second word set - b->Args({1024, 512}); // First bit set halfway - - // 8,192 bits (8 KiB) - b->Args({8192, 8192}); // No bit set - b->Args({8192, 0}); // First bit set - b->Args({8192, 64}); // First bit in second word set - b->Args({8192, 4096}); // First bit set halfway - - // 1,048,576 bits (1 MiB) - b->Args({1LL << 20, 1LL << 20}); // No bit set - b->Args({1LL << 20, 0}); // First bit set - b->Args({1LL << 20, 64}); // First bit in second word set - b->Args({1LL << 20, 1LL << 19}); // First bit set halfway -#endif -} - -class all_zero : public benchmark::Fixture -{ -public: - void SetUp(benchmark::State const & state) - { - size_t const size_in_bits = state.range(0); - size_t const first_set_bit = state.range(1); - - bitvector = bitvector_t(size_in_bits); - if (first_set_bit < size_in_bits) - bitvector[first_set_bit] = true; - } - - void TearDown(benchmark::State const &) - { - bitvector.raw_data().clear(); - } - - bitvector_t const & get_bitvector() noexcept - { - return bitvector; - } - -private: - bitvector_t bitvector{}; -}; - -BENCHMARK_DEFINE_F(all_zero, std_all_of)(benchmark::State & state) -{ - bitvector_t const & bitvector = get_bitvector(); - - for (auto _ : state) - { - bool result = std::all_of(bitvector.begin(), - bitvector.end(), - [](bool const value) - { - return !value; - }); - benchmark::DoNotOptimize(result); - } -} - -BENCHMARK_DEFINE_F(all_zero, std_ranges_all_of)(benchmark::State & state) -{ - bitvector_t const & bitvector = get_bitvector(); - - for (auto _ : state) - { - bool result = std::ranges::all_of(bitvector, - [](bool const value) - { - return !value; - }); - benchmark::DoNotOptimize(result); - } -} - -bool no_early_termination(bitvector_t const & bitvector) noexcept -{ - uint64_t const * const ptr = bitvector.raw_data().data(); - size_t const number_of_words{(bitvector.size() + 63u) >> 6}; - bool result{false}; - - for (size_t i{}; i < number_of_words; ++i) - result |= ptr[i]; - - return !result; -} - -BENCHMARK_DEFINE_F(all_zero, ptr_no_early_termination)(benchmark::State & state) -{ - bitvector_t const & bitvector = get_bitvector(); - - for (auto _ : state) - { - bool result = no_early_termination(bitvector); - benchmark::DoNotOptimize(result); - } -} - -bool with_early_termination(bitvector_t const & bitvector) noexcept -{ - uint64_t const * const ptr = bitvector.raw_data().data(); - size_t const number_of_words{(bitvector.size() + 63u) >> 6}; - bool result{false}; - - for (size_t i{}; !result && i < number_of_words; ++i) - result |= ptr[i]; - - return !result; -} - -BENCHMARK_DEFINE_F(all_zero, ptr_with_early_termination)(benchmark::State & state) -{ - bitvector_t const & bitvector = get_bitvector(); - - for (auto _ : state) - { - bool result = with_early_termination(bitvector); - benchmark::DoNotOptimize(result); - } -} - -BENCHMARK_REGISTER_F(all_zero, std_all_of)->Apply(arguments); -BENCHMARK_REGISTER_F(all_zero, std_ranges_all_of)->Apply(arguments); -BENCHMARK_REGISTER_F(all_zero, ptr_no_early_termination)->Apply(arguments); -BENCHMARK_REGISTER_F(all_zero, ptr_with_early_termination)->Apply(arguments); - -BENCHMARK_MAIN(); diff --git a/test/performance/ibf/bit_vector_benchmark.cpp b/test/performance/ibf/bit_vector_benchmark.cpp new file mode 100644 index 00000000..76a05ac8 --- /dev/null +++ b/test/performance/ibf/bit_vector_benchmark.cpp @@ -0,0 +1,134 @@ +// SPDX-FileCopyrightText: 2006-2023, Knut Reinert & Freie Universität Berlin +// SPDX-FileCopyrightText: 2016-2023, Knut Reinert & MPI für molekulare Genetik +// SPDX-License-Identifier: BSD-3-Clause + +// Authored by: Rene Rahn + +#include + +#include + +#include + +seqan::hibf::bit_vector generate_bit_vector(size_t const size, size_t const seed) +{ + std::mt19937_64 engine{seed}; + std::uniform_int_distribution dist{0u, 1u}; + + auto gen = [&dist, &engine]() + { + return dist(engine); + }; + seqan::hibf::bit_vector vec(size); + std::ranges::generate(vec, gen); + + return vec; +} + +auto generate_bit_vector_pair(size_t const size) +{ + seqan::hibf::bit_vector random_bit_vector_first = generate_bit_vector(size, 0u); + seqan::hibf::bit_vector random_bit_vector_second = generate_bit_vector(size, size); + + return std::pair{std::move(random_bit_vector_first), std::move(random_bit_vector_second)}; +} + +template +void random_bit_vector(benchmark::State & state, operation_t operation) +{ + auto [test_vector_lhs, test_vector_rhs] = generate_bit_vector_pair(state.range(0)); + + using result_t = std::invoke_result_t; + + for (auto _ : state) + { + result_t result = operation(test_vector_lhs, test_vector_rhs); + benchmark::DoNotOptimize(result); + } +} + +template +void one_set_bit_vector(benchmark::State & state, operation_t operation) +{ + seqan::hibf::bit_vector vec(state.range(0)); + vec.back() = true; + + for (auto _ : state) + { + bool result = operation(vec); + benchmark::DoNotOptimize(result); + } +} + +// ---------------------------------------------------------------------------- +// Benchmark operations +// ---------------------------------------------------------------------------- + +inline auto binary_and = [](bv_t & lhs, bv_t & rhs) constexpr -> bv_t & +{ + return lhs &= rhs; +}; + +inline auto binary_or = [](bv_t & lhs, bv_t & rhs) constexpr -> bv_t & +{ + return lhs |= rhs; +}; + +inline auto binary_xor = [](bv_t & lhs, bv_t & rhs) constexpr -> bv_t & +{ + return lhs ^= rhs; +}; + +inline auto binary_not = [](bv_t & lhs, [[maybe_unused]] auto const &... args) constexpr -> bv_t +{ + return ~lhs; +}; + +inline auto binary_flip = [](bv_t & lhs, [[maybe_unused]] auto const &... args) constexpr -> bv_t & +{ + return lhs.flip(); +}; + +inline auto none_fn = [](auto & lhs) constexpr -> bool +{ + return lhs.none(); +}; + +inline auto all_fn = [](auto & lhs) constexpr -> bool +{ + return lhs.all(); +}; + +inline auto any_fn = [](auto & lhs) constexpr -> bool +{ + return lhs.any(); +}; + +#if 0 +static constexpr int32_t min_range = 262'144; +static constexpr int32_t max_range = 1'048'576; +#else +static constexpr int32_t min_range = 1'024; +static constexpr int32_t max_range = 2'048; +#endif +static constexpr int32_t range_multiplier = 2; + +// ---------------------------------------------------------------------------- +// Benchmark +// ---------------------------------------------------------------------------- + +BENCHMARK_CAPTURE(random_bit_vector, and, binary_and)->RangeMultiplier(range_multiplier)->Range(min_range, max_range); +BENCHMARK_CAPTURE(random_bit_vector, or, binary_or)->RangeMultiplier(range_multiplier)->Range(min_range, max_range); +BENCHMARK_CAPTURE(random_bit_vector, xor, binary_xor)->RangeMultiplier(range_multiplier)->Range(min_range, max_range); +BENCHMARK_CAPTURE(random_bit_vector, not, binary_not)->RangeMultiplier(range_multiplier)->Range(min_range, max_range); +BENCHMARK_CAPTURE(random_bit_vector, flip, binary_flip)->RangeMultiplier(range_multiplier)->Range(min_range, max_range); + +BENCHMARK_CAPTURE(one_set_bit_vector, none, none_fn)->RangeMultiplier(range_multiplier)->Range(min_range, max_range); +BENCHMARK_CAPTURE(one_set_bit_vector, all, all_fn)->RangeMultiplier(range_multiplier)->Range(min_range, max_range); +BENCHMARK_CAPTURE(one_set_bit_vector, any, any_fn)->RangeMultiplier(range_multiplier)->Range(min_range, max_range); + +// ---------------------------------------------------------------------------- +// Run benchmark +// ---------------------------------------------------------------------------- + +BENCHMARK_MAIN(); diff --git a/test/performance/ibf/interleaved_bloom_filter_benchmark.cpp b/test/performance/ibf/interleaved_bloom_filter_benchmark.cpp index fd08d3b0..60c38826 100644 --- a/test/performance/ibf/interleaved_bloom_filter_benchmark.cpp +++ b/test/performance/ibf/interleaved_bloom_filter_benchmark.cpp @@ -33,6 +33,10 @@ static void arguments(benchmark::internal::Benchmark * b) b->Args({128, 1LL << 16, 2, 1LL << 17}); b->Args({192, 1LL << 16, 2, 1LL << 17}); b->Args({256, 1LL << 15, 2, 1LL << 17}); + // b->Args({320, 1LL << 15, 2, 1LL << 17}); + // b->Args({384, 1LL << 14, 2, 1LL << 17}); + // b->Args({448, 1LL << 14, 2, 1LL << 17}); + // b->Args({512, 1LL << 13, 2, 1LL << 17}); b->Args({1024, 1LL << 10, 2, 1LL << 17}); } #else diff --git a/test/unit/hibf/CMakeLists.txt b/test/unit/hibf/CMakeLists.txt index cac2b2ba..8100e5a2 100644 --- a/test/unit/hibf/CMakeLists.txt +++ b/test/unit/hibf/CMakeLists.txt @@ -4,6 +4,7 @@ add_subdirectories () +hibf_test (bit_vector_test.cpp) hibf_test (config_test.cpp) hibf_test (hierarchical_interleaved_bloom_filter_test.cpp) hibf_test (interleaved_bloom_filter_test.cpp) diff --git a/test/unit/hibf/bit_vector_test.cpp b/test/unit/hibf/bit_vector_test.cpp new file mode 100644 index 00000000..17728802 --- /dev/null +++ b/test/unit/hibf/bit_vector_test.cpp @@ -0,0 +1,788 @@ +// SPDX-FileCopyrightText: 2006-2023, Knut Reinert & Freie Universität Berlin +// SPDX-FileCopyrightText: 2016-2023, Knut Reinert & MPI für molekulare Genetik +// SPDX-License-Identifier: BSD-3-Clause + +// Authored by: Rene Rahn + +#include + +#include +#include + +#include +#include + +// ---------------------------------------------------------------------------- +// associated types +// ---------------------------------------------------------------------------- + +TEST(bit_vector_test, value_type) +{ + EXPECT_TRUE((std::same_as)); +} + +TEST(bit_vector_test, size_type) +{ + EXPECT_TRUE((std::same_as)); +} + +TEST(bit_vector_test, difference_type) +{ + EXPECT_TRUE((std::integral)); +} + +TEST(bit_vector_test, reference) +{ + EXPECT_TRUE((std::convertible_to)); +} + +TEST(bit_vector_test, const_reference) +{ + EXPECT_TRUE((std::convertible_to)); +} + +// ---------------------------------------------------------------------------- +// Construction and assignment +// ---------------------------------------------------------------------------- + +TEST(bit_vector_test, construct_with_count) +{ + { + seqan::hibf::bit_vector test_vector(1000); + EXPECT_EQ(test_vector.size(), 1000u); + + seqan::hibf::bit_vector test_vector2{test_vector}; + EXPECT_EQ(test_vector2.size(), 1000u); + + seqan::hibf::bit_vector test_vector3{std::move(test_vector2)}; + EXPECT_EQ(test_vector3.size(), 1000u); + + seqan::hibf::bit_vector test_vector4{}; + test_vector4 = test_vector3; + EXPECT_EQ(test_vector4.size(), 1000u); + + seqan::hibf::bit_vector test_vector5{}; + test_vector5 = std::move(test_vector3); + EXPECT_EQ(test_vector5.size(), 1000u); + } + + { + seqan::hibf::bit_vector test_vector(64); + + EXPECT_EQ(test_vector.size(), 64u); + } + + { + seqan::hibf::bit_vector test_vector(512); + + EXPECT_EQ(test_vector.size(), 512u); + } + + { + seqan::hibf::bit_vector test_vector(1); + + EXPECT_EQ(test_vector.size(), 1u); + } +} + +TEST(bit_vector_test, construct_with_count_and_allocator) +{ + seqan::hibf::bit_vector test_vector(1000, true); + + EXPECT_EQ(test_vector.size(), 1000u); + + std::for_each(test_vector.begin(), + test_vector.end(), + [](bool const bit) + { + EXPECT_TRUE(bit); + }); +} + +TEST(bit_vector_test, construct_from_initialiser_list) +{ + { // From the concrete values. + seqan::hibf::bit_vector test_vector{true, false, true, false, false, true, true}; + auto it = test_vector.begin(); + EXPECT_EQ(test_vector.size(), 7u); + EXPECT_EQ(*it, true); + EXPECT_EQ(*++it, false); + EXPECT_EQ(*++it, true); + EXPECT_EQ(*++it, false); + EXPECT_EQ(*++it, false); + EXPECT_EQ(*++it, true); + EXPECT_EQ(*++it, true); + } + + { // From integers convertible to bool. + seqan::hibf::bit_vector test_vector{1, 0, 1, 0, 0, 1, 1}; + auto it = test_vector.begin(); + EXPECT_EQ(test_vector.size(), 7u); + EXPECT_EQ(*it, true); + EXPECT_EQ(*++it, false); + EXPECT_EQ(*++it, true); + EXPECT_EQ(*++it, false); + EXPECT_EQ(*++it, false); + EXPECT_EQ(*++it, true); + EXPECT_EQ(*++it, true); + } +} + +TEST(bit_vector_test, assign_with_count) +{ + { + seqan::hibf::bit_vector test_vector{}; + test_vector.assign(1000, false); + + EXPECT_EQ(test_vector.size(), 1000u); + } + + { + seqan::hibf::bit_vector test_vector{}; + test_vector.assign(64, true); + + EXPECT_EQ(test_vector.size(), 64u); + } + + { + seqan::hibf::bit_vector test_vector{}; + test_vector.assign(0, false); + + EXPECT_EQ(test_vector.size(), 0u); + } + + { + seqan::hibf::bit_vector test_vector{}; + test_vector.assign(1, true); + + EXPECT_EQ(test_vector.size(), 1u); + } +} + +TEST(bit_vector_test, assign_from_initialiser_list) +{ + { // From the concrete values. + seqan::hibf::bit_vector test_vector{}; + test_vector.assign({true, false, true, false, false, true, true}); + + auto it = test_vector.begin(); + EXPECT_EQ(test_vector.size(), 7u); + EXPECT_EQ(*it, true); + EXPECT_EQ(*++it, false); + EXPECT_EQ(*++it, true); + EXPECT_EQ(*++it, false); + EXPECT_EQ(*++it, false); + EXPECT_EQ(*++it, true); + EXPECT_EQ(*++it, true); + } + + { // From integers convertible to bool. + seqan::hibf::bit_vector test_vector{}; + test_vector.assign({1, 0, 1, 0, 0, 1, 1}); + + auto it = test_vector.begin(); + EXPECT_EQ(test_vector.size(), 7u); + EXPECT_EQ(*it, true); + EXPECT_EQ(*++it, false); + EXPECT_EQ(*++it, true); + EXPECT_EQ(*++it, false); + EXPECT_EQ(*++it, false); + EXPECT_EQ(*++it, true); + EXPECT_EQ(*++it, true); + } + + { + seqan::hibf::bit_vector test_vector{}; + test_vector.assign(std::initializer_list{}); + + EXPECT_EQ(test_vector.size(), 0u); + } +} + +// ---------------------------------------------------------------------------- +// iterators +// ---------------------------------------------------------------------------- + +TEST(bit_vector_test, begin) +{ + { + seqan::hibf::bit_vector test_vector(1000, true); + + auto it = test_vector.begin(); + EXPECT_TRUE(*it); + } + + { + seqan::hibf::bit_vector test_vector(64, true); + + auto it = test_vector.begin(); + EXPECT_TRUE(*it); + } +} + +TEST(bit_vector_test, cbegin) +{ + { + seqan::hibf::bit_vector test_vector(1000, true); + auto cit = std::as_const(test_vector).begin(); + EXPECT_TRUE(*cit); + } + + { + seqan::hibf::bit_vector test_vector(64, true); + auto cit = std::ranges::cbegin(std::as_const(test_vector)); + EXPECT_TRUE(*cit); + } +} + +TEST(bit_vector_test, end) +{ + { + seqan::hibf::bit_vector test_vector(1000, true); + + EXPECT_TRUE(test_vector.begin() != test_vector.end()); + } + + { + seqan::hibf::bit_vector test_vector(64, true); + EXPECT_TRUE(test_vector.begin() != test_vector.end()); + } + + { // empty vector: + seqan::hibf::bit_vector test_vector{}; + EXPECT_TRUE(test_vector.begin() == test_vector.end()); + } +} + +TEST(bit_vector_test, cend) +{ + { + seqan::hibf::bit_vector test_vector(1000, true); + + EXPECT_TRUE(std::as_const(test_vector).begin() != std::as_const(test_vector).end()); + } + + { + seqan::hibf::bit_vector test_vector(64, true); + EXPECT_TRUE(std::as_const(test_vector).begin() != std::ranges::cend(std::as_const(test_vector))); + } + + { // empty vector: + seqan::hibf::bit_vector test_vector{}; + EXPECT_TRUE(std::as_const(test_vector).begin() == std::as_const(test_vector).end()); + } +} + +TEST(bit_vector_test, iterate) +{ + seqan::hibf::bit_vector test_vector(70, true); + auto it = test_vector.begin(); + for (size_t i = 0; i < test_vector.size(); ++i) + { + EXPECT_EQ(*it, true); + ++it; + } + EXPECT_TRUE(it == test_vector.end()); +} + +// ---------------------------------------------------------------------------- +// Element access +// ---------------------------------------------------------------------------- + +TEST(bit_vector_test, subscript_operator) +{ + seqan::hibf::bit_vector test_vector{true, false, true, false, false, true, true}; + EXPECT_EQ(test_vector.size(), 7u); + EXPECT_EQ(test_vector[0], true); + EXPECT_EQ(test_vector[1], false); + EXPECT_EQ(test_vector[2], true); + EXPECT_EQ(std::as_const(test_vector)[3], false); + EXPECT_EQ(std::as_const(test_vector)[4], false); + EXPECT_EQ(std::as_const(test_vector)[5], true); + EXPECT_EQ(std::as_const(test_vector)[6], true); +} + +TEST(bit_vector_test, back) +{ + seqan::hibf::bit_vector test_vector{true, false, true, false, false, true, true}; + EXPECT_EQ(test_vector.size(), 7u); + EXPECT_EQ(test_vector.back(), true); + EXPECT_EQ(std::as_const(test_vector).back(), true); +} + +TEST(bit_vector_test, all) +{ + { // empty vector + seqan::hibf::bit_vector test_vector{}; + EXPECT_TRUE(test_vector.all()); + } + + { + seqan::hibf::bit_vector test_vector(250, true); + EXPECT_TRUE(test_vector.all()); + + test_vector[249] = false; + EXPECT_FALSE(test_vector.all()); + + test_vector[249] = true; + test_vector[0] = false; + EXPECT_FALSE(test_vector.all()); + } +} + +TEST(bit_vector_test, any) +{ + { // empty vector + seqan::hibf::bit_vector test_vector{}; + EXPECT_FALSE(test_vector.any()); + } + + { + seqan::hibf::bit_vector test_vector(250, false); + EXPECT_FALSE(test_vector.any()); + + test_vector[249] = true; + EXPECT_TRUE(test_vector.any()); + + test_vector[249] = false; + test_vector[0] = true; + EXPECT_TRUE(test_vector.any()); + } +} + +TEST(bit_vector_test, none) +{ + { // empty vector + seqan::hibf::bit_vector test_vector{}; + EXPECT_TRUE(test_vector.none()); + } + + { + seqan::hibf::bit_vector test_vector(250, false); + EXPECT_TRUE(test_vector.none()); + + test_vector[249] = true; + EXPECT_FALSE(test_vector.none()); + + test_vector[249] = false; + test_vector[0] = true; + EXPECT_FALSE(test_vector.none()); + } +} + +// ---------------------------------------------------------------------------- +// Modifiers +// ---------------------------------------------------------------------------- + +TEST(bit_vector_test, resize) +{ + seqan::hibf::bit_vector test_vector{}; + + EXPECT_EQ(test_vector.size(), 0u); + test_vector.resize(64); + EXPECT_EQ(test_vector.size(), 64u); + EXPECT_TRUE(test_vector.none()); + + test_vector.resize(128, true); + EXPECT_EQ(test_vector.size(), 128u); + EXPECT_TRUE(test_vector.any()); + + test_vector.resize(1, true); + EXPECT_EQ(test_vector.size(), 1u); + EXPECT_TRUE(test_vector.none()); + + test_vector.resize(0, true); + EXPECT_EQ(test_vector.size(), 0u); + EXPECT_TRUE(test_vector.none()); +} + +TEST(bit_vector_test, push_back) +{ + seqan::hibf::bit_vector test_vector{}; + + EXPECT_EQ(test_vector.size(), 0u); + test_vector.push_back(true); + EXPECT_EQ(test_vector.size(), 1u); + EXPECT_TRUE(test_vector.back()); + + test_vector.resize(128, true); + test_vector.push_back(false); + EXPECT_EQ(test_vector.size(), 129u); + EXPECT_FALSE(test_vector.back()); +} + +TEST(bit_vector_test, swap) +{ + seqan::hibf::bit_vector test_vector_left{}; + seqan::hibf::bit_vector test_vector_right(250, true); + + test_vector_left.swap(test_vector_right); + + EXPECT_EQ(test_vector_left.size(), 250u); + EXPECT_EQ(test_vector_right.size(), 0u); + EXPECT_TRUE(test_vector_left.all()); + + test_vector_right.resize(78); + test_vector_left.swap(test_vector_right); + EXPECT_EQ(test_vector_left.size(), 78u); + EXPECT_EQ(test_vector_right.size(), 250u); + EXPECT_TRUE(test_vector_left.none()); + EXPECT_TRUE(test_vector_right.all()); +} + +TEST(bit_vector_test, operator_binary_and) +{ + seqan::hibf::bit_vector test_vector(250, false); + seqan::hibf::bit_vector test_vector_all(250, true); + + test_vector &= test_vector_all; + + EXPECT_EQ(test_vector.size(), 250u); + EXPECT_EQ(test_vector_all.size(), 250u); + + EXPECT_TRUE(test_vector.none()); + EXPECT_TRUE(test_vector_all.all()); + + test_vector[0] = true; + test_vector[10] = true; + test_vector[63] = true; + test_vector[64] = true; + test_vector[127] = true; + test_vector[128] = true; + test_vector[200] = true; + test_vector[249] = true; + + test_vector = test_vector & test_vector_all; + EXPECT_FALSE(test_vector.none()); + EXPECT_TRUE(test_vector_all.all()); + + EXPECT_TRUE(test_vector[0]); + EXPECT_FALSE(test_vector[1]); + EXPECT_FALSE(test_vector[9]); + EXPECT_TRUE(test_vector[10]); + EXPECT_FALSE(test_vector[11]); + EXPECT_FALSE(test_vector[62]); + EXPECT_TRUE(test_vector[63]); + EXPECT_TRUE(test_vector[64]); + EXPECT_FALSE(test_vector[65]); + EXPECT_FALSE(test_vector[126]); + EXPECT_TRUE(test_vector[127]); + EXPECT_TRUE(test_vector[128]); + EXPECT_FALSE(test_vector[129]); + EXPECT_FALSE(test_vector[199]); + EXPECT_TRUE(test_vector[200]); + EXPECT_FALSE(test_vector[201]); + EXPECT_FALSE(test_vector[248]); + EXPECT_TRUE(test_vector[249]); +} + +TEST(bit_vector_test, operator_binary_or) +{ + seqan::hibf::bit_vector test_vector(250, false); + seqan::hibf::bit_vector test_vector_all(250, true); + + test_vector |= test_vector_all; + + EXPECT_EQ(test_vector.size(), 250u); + EXPECT_EQ(test_vector_all.size(), 250u); + + EXPECT_TRUE(test_vector.all()); + EXPECT_TRUE(test_vector_all.all()); + + test_vector[0] = false; + test_vector[10] = false; + test_vector[63] = false; + test_vector[64] = false; + test_vector[127] = false; + test_vector[128] = false; + test_vector[200] = false; + test_vector[249] = false; + + test_vector_all[0] = false; + test_vector_all[10] = false; + test_vector_all[127] = false; + test_vector_all[128] = false; + test_vector_all[249] = false; + + test_vector = test_vector | test_vector_all; + EXPECT_FALSE(test_vector.all()); + EXPECT_FALSE(test_vector_all.all()); + + EXPECT_FALSE(test_vector[0]); + EXPECT_TRUE(test_vector[1]); + EXPECT_TRUE(test_vector[9]); + EXPECT_FALSE(test_vector[10]); + EXPECT_TRUE(test_vector[11]); + EXPECT_TRUE(test_vector[62]); + EXPECT_TRUE(test_vector[63]); + EXPECT_TRUE(test_vector[64]); + EXPECT_TRUE(test_vector[65]); + EXPECT_TRUE(test_vector[126]); + EXPECT_FALSE(test_vector[127]); + EXPECT_FALSE(test_vector[128]); + EXPECT_TRUE(test_vector[129]); + EXPECT_TRUE(test_vector[199]); + EXPECT_TRUE(test_vector[200]); + EXPECT_TRUE(test_vector[201]); + EXPECT_TRUE(test_vector[248]); + EXPECT_FALSE(test_vector[249]); +} + +TEST(bit_vector_test, operator_binary_xor) +{ + seqan::hibf::bit_vector test_vector(250, false); + seqan::hibf::bit_vector test_vector_all(250, true); + + test_vector ^= test_vector_all; + + EXPECT_EQ(test_vector.size(), 250u); + EXPECT_EQ(test_vector_all.size(), 250u); + + EXPECT_TRUE(test_vector.all()); + EXPECT_TRUE(test_vector_all.all()); + + test_vector[0] = false; + test_vector[10] = false; + test_vector[63] = false; + test_vector[64] = false; + test_vector[127] = false; + test_vector[128] = false; + test_vector[200] = false; + test_vector[249] = false; + + test_vector_all[0] = false; + test_vector_all[10] = false; + test_vector_all[127] = false; + test_vector_all[128] = false; + test_vector_all[249] = false; + + test_vector = test_vector ^ test_vector_all; + EXPECT_FALSE(test_vector.all()); + EXPECT_FALSE(test_vector_all.all()); + + EXPECT_FALSE(test_vector[0]); + EXPECT_FALSE(test_vector[1]); + EXPECT_FALSE(test_vector[9]); + EXPECT_FALSE(test_vector[10]); + EXPECT_FALSE(test_vector[11]); + EXPECT_FALSE(test_vector[62]); + EXPECT_TRUE(test_vector[63]); + EXPECT_TRUE(test_vector[64]); + EXPECT_FALSE(test_vector[65]); + EXPECT_FALSE(test_vector[126]); + EXPECT_FALSE(test_vector[127]); + EXPECT_FALSE(test_vector[128]); + EXPECT_FALSE(test_vector[129]); + EXPECT_FALSE(test_vector[199]); + EXPECT_TRUE(test_vector[200]); + EXPECT_FALSE(test_vector[201]); + EXPECT_FALSE(test_vector[248]); + EXPECT_FALSE(test_vector[249]); +} + +TEST(bit_vector_test, operator_binary_not) +{ + seqan::hibf::bit_vector test_vector(250, false); + EXPECT_EQ(test_vector.size(), 250u); + EXPECT_TRUE(test_vector.none()); + + seqan::hibf::bit_vector expected_vector = ~test_vector; + EXPECT_EQ(expected_vector.size(), 250u); + EXPECT_TRUE(test_vector.none()); + EXPECT_TRUE(expected_vector.all()); + + test_vector[0] = true; + test_vector[10] = true; + test_vector[63] = true; + test_vector[64] = true; + test_vector[127] = true; + test_vector[128] = true; + test_vector[200] = true; + test_vector[249] = true; + + expected_vector = ~test_vector; + EXPECT_FALSE(expected_vector.all()); + + EXPECT_FALSE(expected_vector[0]); + EXPECT_TRUE(expected_vector[1]); + EXPECT_TRUE(expected_vector[9]); + EXPECT_FALSE(expected_vector[10]); + EXPECT_TRUE(expected_vector[11]); + EXPECT_TRUE(expected_vector[62]); + EXPECT_FALSE(expected_vector[63]); + EXPECT_FALSE(expected_vector[64]); + EXPECT_TRUE(expected_vector[65]); + EXPECT_TRUE(expected_vector[126]); + EXPECT_FALSE(expected_vector[127]); + EXPECT_FALSE(expected_vector[128]); + EXPECT_TRUE(expected_vector[129]); + EXPECT_TRUE(expected_vector[199]); + EXPECT_FALSE(expected_vector[200]); + EXPECT_TRUE(expected_vector[201]); + EXPECT_TRUE(expected_vector[248]); + EXPECT_FALSE(expected_vector[249]); +} + +TEST(bit_vector_test, flip) +{ + seqan::hibf::bit_vector test_vector(250, false); + EXPECT_EQ(test_vector.size(), 250u); + EXPECT_TRUE(test_vector.none()); + + test_vector.flip(); + EXPECT_TRUE(test_vector.all()); + + test_vector[0] = false; + test_vector[10] = false; + test_vector[63] = false; + test_vector[64] = false; + test_vector[127] = false; + test_vector[128] = false; + test_vector[200] = false; + test_vector[249] = false; + + test_vector.flip(); + + EXPECT_TRUE(test_vector[0]); + EXPECT_FALSE(test_vector[1]); + EXPECT_FALSE(test_vector[9]); + EXPECT_TRUE(test_vector[10]); + EXPECT_FALSE(test_vector[11]); + EXPECT_FALSE(test_vector[62]); + EXPECT_TRUE(test_vector[63]); + EXPECT_TRUE(test_vector[64]); + EXPECT_FALSE(test_vector[65]); + EXPECT_FALSE(test_vector[126]); + EXPECT_TRUE(test_vector[127]); + EXPECT_TRUE(test_vector[128]); + EXPECT_FALSE(test_vector[129]); + EXPECT_FALSE(test_vector[199]); + EXPECT_TRUE(test_vector[200]); + EXPECT_FALSE(test_vector[201]); + EXPECT_FALSE(test_vector[248]); + EXPECT_TRUE(test_vector[249]); +} + +TEST(bit_vector_test, flip_single_bit) +{ + seqan::hibf::bit_vector test_vector(250, false); + EXPECT_EQ(test_vector.size(), 250u); + EXPECT_TRUE(test_vector.none()); + + test_vector.flip(0); + test_vector.flip(10); + test_vector.flip(63); + test_vector.flip(64); + test_vector.flip(127); + test_vector.flip(128); + test_vector.flip(200); + test_vector.flip(249); + + EXPECT_TRUE(test_vector[0]); + EXPECT_FALSE(test_vector[1]); + EXPECT_FALSE(test_vector[9]); + EXPECT_TRUE(test_vector[10]); + EXPECT_FALSE(test_vector[11]); + EXPECT_FALSE(test_vector[62]); + EXPECT_TRUE(test_vector[63]); + EXPECT_TRUE(test_vector[64]); + EXPECT_FALSE(test_vector[65]); + EXPECT_FALSE(test_vector[126]); + EXPECT_TRUE(test_vector[127]); + EXPECT_TRUE(test_vector[128]); + EXPECT_FALSE(test_vector[129]); + EXPECT_FALSE(test_vector[199]); + EXPECT_TRUE(test_vector[200]); + EXPECT_FALSE(test_vector[201]); + EXPECT_FALSE(test_vector[248]); + EXPECT_TRUE(test_vector[249]); + + test_vector.flip(0); + test_vector.flip(10); + test_vector.flip(63); + test_vector.flip(64); + + EXPECT_FALSE(test_vector[0]); + EXPECT_FALSE(test_vector[10]); + EXPECT_FALSE(test_vector[63]); + EXPECT_FALSE(test_vector[64]); + + EXPECT_THROW(test_vector.flip(250), std::out_of_range); +} + +// ---------------------------------------------------------------------------- +// capacity +// ---------------------------------------------------------------------------- + +TEST(bit_vector_test, size) +{ + { + seqan::hibf::bit_vector test_vector(1000); + EXPECT_EQ(test_vector.size(), 1000u); + } + + { + seqan::hibf::bit_vector test_vector(64); + EXPECT_EQ(test_vector.size(), 64u); + } + + { + seqan::hibf::bit_vector test_vector(1); + EXPECT_EQ(test_vector.size(), 1u); + } + + { + seqan::hibf::bit_vector test_vector{}; + EXPECT_EQ(test_vector.size(), 0u); + } +} + +TEST(bit_vector_test, empty) +{ + { + seqan::hibf::bit_vector test_vector{}; + EXPECT_TRUE(test_vector.empty()); + } + + { + seqan::hibf::bit_vector test_vector(1); + EXPECT_FALSE(test_vector.empty()); + } + + { + seqan::hibf::bit_vector test_vector(1000); + EXPECT_FALSE(test_vector.empty()); + } +} + +// ---------------------------------------------------------------------------- +// Iterator test +// ---------------------------------------------------------------------------- + +using bit_vector_iterator = typename seqan::hibf::bit_vector::iterator; + +template <> +struct iterator_fixture : public ::testing::Test +{ + using iterator_tag = std::random_access_iterator_tag; + + static constexpr bool const_iterable = true; + + seqan::hibf::bit_vector test_range = seqan::hibf::bit_vector(100, true); + std::vector expected_range = std::vector(100, true); +}; + +INSTANTIATE_TYPED_TEST_SUITE_P(bit_vector_iterator_test, iterator_fixture, ::testing::Types, ); + +TEST(bit_vector_test, output_iterator) +{ + EXPECT_TRUE((std::output_iterator)); + EXPECT_FALSE((std::output_iterator)); + + seqan::hibf::bit_vector test_vector(100, true); + for (auto it = test_vector.begin(); it != test_vector.end(); it += 2) + *it = false; + + for (auto it = test_vector.begin(); it != test_vector.end(); ++it) + EXPECT_EQ(*it, (it - test_vector.begin()) % 2); +} diff --git a/test/unit/hibf/interleaved_bloom_filter_test.cpp b/test/unit/hibf/interleaved_bloom_filter_test.cpp index 4b267e62..d0588347 100644 --- a/test/unit/hibf/interleaved_bloom_filter_test.cpp +++ b/test/unit/hibf/interleaved_bloom_filter_test.cpp @@ -2,7 +2,7 @@ // SPDX-FileCopyrightText: 2016-2023, Knut Reinert & MPI für molekulare Genetik // SPDX-License-Identifier: BSD-3-Clause -#include // for Message, TestPartResult, AssertionResult, EXPECT_EQ, TYPED_TEST +#include // for Message, TestPartResult, AssertionResult, EXPECT_EQ, TEST #include // for for_each #include // for array @@ -16,28 +16,9 @@ #include // for bin_index, interleaved_bloom_filter, bin_count, bin_size, hash_... #include // for expect_range_eq, EXPECT_RANGE_EQ -#include // for size_in_mega_bytes +using TypeParam = seqan::hibf::interleaved_bloom_filter; -template -struct interleaved_bloom_filter_test : public ::testing::Test -{ - static ibf_type make_ibf(seqan::hibf::bin_count bins, seqan::hibf::bin_size bits) - { - return ibf_type{seqan::hibf::interleaved_bloom_filter{bins, bits}}; - } - - static ibf_type - make_ibf(seqan::hibf::bin_count bins, seqan::hibf::bin_size bits, seqan::hibf::hash_function_count funs) - { - return ibf_type{seqan::hibf::interleaved_bloom_filter{bins, bits, funs}}; - } -}; - -using ibf_types = ::testing::Types; - -TYPED_TEST_SUITE(interleaved_bloom_filter_test, ibf_types, ); - -TYPED_TEST(interleaved_bloom_filter_test, construction) +TEST(ibf_test, construction) { EXPECT_TRUE(std::is_default_constructible_v); EXPECT_TRUE(std::is_copy_constructible_v); @@ -47,59 +28,88 @@ TYPED_TEST(interleaved_bloom_filter_test, construction) EXPECT_TRUE(std::is_destructible_v); // num hash functions defaults to two - TypeParam ibf1{TestFixture::make_ibf(seqan::hibf::bin_count{64u}, seqan::hibf::bin_size{1024u})}; - TypeParam ibf2{TestFixture::make_ibf(seqan::hibf::bin_count{64u}, - seqan::hibf::bin_size{1024u}, - seqan::hibf::hash_function_count{2u})}; + TypeParam ibf1{seqan::hibf::bin_count{64u}, seqan::hibf::bin_size{1024u}}; + TypeParam ibf2{seqan::hibf::bin_count{64u}, seqan::hibf::bin_size{1024u}, seqan::hibf::hash_function_count{2u}}; EXPECT_TRUE(ibf1 == ibf2); // bin_size parameter is too small - EXPECT_THROW((TestFixture::make_ibf(seqan::hibf::bin_count{64u}, seqan::hibf::bin_size{0u})), std::logic_error); + EXPECT_THROW((TypeParam{seqan::hibf::bin_count{64u}, seqan::hibf::bin_size{0u}}), std::logic_error); // not enough bins - EXPECT_THROW((TestFixture::make_ibf(seqan::hibf::bin_count{0u}, seqan::hibf::bin_size{32u})), std::logic_error); + EXPECT_THROW((TypeParam{seqan::hibf::bin_count{0u}, seqan::hibf::bin_size{32u}}), std::logic_error); // not enough hash functions - EXPECT_THROW((TestFixture::make_ibf(seqan::hibf::bin_count{64u}, - seqan::hibf::bin_size{32u}, - seqan::hibf::hash_function_count{0u})), - std::logic_error); + EXPECT_THROW( + (TypeParam{seqan::hibf::bin_count{64u}, seqan::hibf::bin_size{32u}, seqan::hibf::hash_function_count{0u}}), + std::logic_error); // too many hash functions - EXPECT_THROW((TestFixture::make_ibf(seqan::hibf::bin_count{64u}, - seqan::hibf::bin_size{32u}, - seqan::hibf::hash_function_count{6u})), - std::logic_error); + EXPECT_THROW( + (TypeParam{seqan::hibf::bin_count{64u}, seqan::hibf::bin_size{32u}, seqan::hibf::hash_function_count{6u}}), + std::logic_error); +} + +TEST(ibf_test, construction_from_config) +{ + std::vector> hashes{{1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u}, {0u, 2u, 3u, 4u, 5u}}; + size_t const number_of_ub{hashes.size()}; + + seqan::hibf::config ibf_config{.input_fn = + [&](size_t const num, seqan::hibf::insert_iterator it) + { + for (auto const hash : hashes[num]) + it = hash; + }, + .number_of_user_bins = number_of_ub}; + + TypeParam ibf{ibf_config}; + + auto agent = ibf.membership_agent(); + + std::vector query{1, 2, 3, 4, 5}; + + // value 2 is in both user bins + std::vector expected_v2(number_of_ub); + expected_v2[0] = 1; + expected_v2[1] = 1; + // value 8 is only in user bin 0 + std::vector expected_v8(number_of_ub); + expected_v8[0] = 1; + // value 0 is only in user bin 1 + std::vector expected_v0(number_of_ub); + expected_v0[1] = 1; + + EXPECT_RANGE_EQ(agent.bulk_contains(2), expected_v2); + EXPECT_RANGE_EQ(agent.bulk_contains(8), expected_v8); + EXPECT_RANGE_EQ(agent.bulk_contains(0), expected_v0); } -TYPED_TEST(interleaved_bloom_filter_test, member_getter) +TEST(ibf_test, member_getter) { - TypeParam t1{TestFixture::make_ibf(seqan::hibf::bin_count{64u}, seqan::hibf::bin_size{1024u})}; - EXPECT_EQ(t1.bin_count(), 64u); - EXPECT_EQ(t1.bin_size(), 1024u); - EXPECT_EQ(t1.bit_size(), 65'536ull); - EXPECT_EQ(t1.hash_function_count(), 2u); - - TypeParam t2{TestFixture::make_ibf(seqan::hibf::bin_count{73u}, - seqan::hibf::bin_size{1019u}, - seqan::hibf::hash_function_count{3u})}; - EXPECT_EQ(t2.bin_count(), 73u); - EXPECT_EQ(t2.bin_size(), 1019u); - EXPECT_EQ(t2.bit_size(), 130'432ull); - EXPECT_EQ(t2.hash_function_count(), 3u); + TypeParam ibf{seqan::hibf::bin_count{64u}, seqan::hibf::bin_size{1024u}}; + EXPECT_EQ(ibf.bin_count(), 64u); + EXPECT_EQ(ibf.bin_size(), 1024u); + EXPECT_EQ(ibf.bit_size(), 65'536ull); + EXPECT_EQ(ibf.hash_function_count(), 2u); + + ibf = TypeParam{seqan::hibf::bin_count{73u}, seqan::hibf::bin_size{1019u}, seqan::hibf::hash_function_count{3u}}; + EXPECT_EQ(ibf.bin_count(), 73u); + EXPECT_EQ(ibf.bin_size(), 1019u); + EXPECT_EQ(ibf.bit_size(), 130'432ull); + EXPECT_EQ(ibf.hash_function_count(), 3u); } -TYPED_TEST(interleaved_bloom_filter_test, bulk_contains) +TEST(ibf_test, bulk_contains) { - TypeParam ibf{TestFixture::make_ibf(seqan::hibf::bin_count{64u}, seqan::hibf::bin_size{1024u})}; + TypeParam ibf{seqan::hibf::bin_count{64u}, seqan::hibf::bin_size{1024u}}; std::vector expected(64); // empty bitvector is expected since we did not insert anything auto agent = ibf.membership_agent(); - for (size_t hash : std::views::iota(0, 64)) // test correct resize for each bin individually + for (size_t hash : std::views::iota(0, 64)) { auto & res = agent.bulk_contains(hash); EXPECT_RANGE_EQ(res, expected); } // Test iterator interface - for (size_t hash : std::views::iota(0, 64)) // test correct resize for each bin individually + for (size_t hash : std::views::iota(0, 64)) { auto & res = agent.bulk_contains(hash); size_t i = 0; @@ -111,7 +121,7 @@ TYPED_TEST(interleaved_bloom_filter_test, bulk_contains) } // Test operator[] interface - for (size_t hash : std::views::iota(0, 64)) // test correct resize for each bin individually + for (size_t hash : std::views::iota(0, 64)) { auto & res = agent.bulk_contains(hash); EXPECT_EQ(expected.size(), res.size()); @@ -122,57 +132,17 @@ TYPED_TEST(interleaved_bloom_filter_test, bulk_contains) } } -TEST(interleaved_bloom_filter_single_test, construction_from_config) +TEST(ibf_test, emplace) { - std::vector> hashes{{1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u}, {0u, 2u, 3u, 4u, 5u}}; - size_t const number_of_ub{hashes.size()}; - - seqan::hibf::config ibf_config{.input_fn = - [&](size_t const num, seqan::hibf::insert_iterator it) - { - for (auto const hash : hashes[num]) - it = hash; - }, - .number_of_user_bins = number_of_ub}; - - seqan::hibf::interleaved_bloom_filter ibf{ibf_config}; - - { - auto agent = ibf.membership_agent(); - - std::vector query{1, 2, 3, 4, 5}; - - // value 2 is in both user bins - std::vector expected_v2(number_of_ub); - expected_v2[0] = 1; - expected_v2[1] = 1; - // value 8 is only in user bin 0 - std::vector expected_v8(number_of_ub); - expected_v8[0] = 1; - // value 0 is only in user bin 1 - std::vector expected_v0(number_of_ub); - expected_v0[1] = 1; - - EXPECT_RANGE_EQ(agent.bulk_contains(2), expected_v2); - EXPECT_RANGE_EQ(agent.bulk_contains(8), expected_v8); - EXPECT_RANGE_EQ(agent.bulk_contains(0), expected_v0); - } -} - -TYPED_TEST(interleaved_bloom_filter_test, emplace) -{ - // 1. Test uncompressed interleaved_bloom_filter directly because the compressed one is not mutable. - seqan::hibf::interleaved_bloom_filter ibf{seqan::hibf::bin_count{64u}, - seqan::hibf::bin_size{1024u}, - seqan::hibf::hash_function_count{2u}}; + // 1. Construct and emplace + TypeParam ibf{seqan::hibf::bin_count{64u}, seqan::hibf::bin_size{1024u}, seqan::hibf::hash_function_count{2u}}; for (size_t bin_idx : std::views::iota(0, 64)) for (size_t hash : std::views::iota(0, 64)) ibf.emplace(hash, seqan::hibf::bin_index{bin_idx}); - // 2. Construct either the uncompressed or compressed interleaved_bloom_filter and test set with bulk_contains - TypeParam ibf2{ibf}; - auto agent = ibf2.membership_agent(); + // 2. Test for correctness + auto agent = ibf.membership_agent(); std::vector expected(64, 1); // every hash value should be set for every bin for (size_t hash : std::views::iota(0, 64)) // test correct resize for each bin individually { @@ -181,12 +151,10 @@ TYPED_TEST(interleaved_bloom_filter_test, emplace) } } -TYPED_TEST(interleaved_bloom_filter_test, clear) +TEST(ibf_test, clear) { - // 1. Test uncompressed interleaved_bloom_filter directly because the compressed one is not mutable. - seqan::hibf::interleaved_bloom_filter ibf{seqan::hibf::bin_count{64u}, - seqan::hibf::bin_size{1024u}, - seqan::hibf::hash_function_count{2u}}; + // 1. Construct and emplace + TypeParam ibf{seqan::hibf::bin_count{64u}, seqan::hibf::bin_size{1024u}, seqan::hibf::hash_function_count{2u}}; for (size_t bin_idx : std::views::iota(0, 64)) for (size_t hash : std::views::iota(0, 64)) @@ -195,9 +163,8 @@ TYPED_TEST(interleaved_bloom_filter_test, clear) // 2. Clear a bin ibf.clear(seqan::hibf::bin_index{17u}); - // 3. Construct either the uncompressed or compressed interleaved_bloom_filter and test set with bulk_contains - TypeParam ibf2{ibf}; - auto agent = ibf2.membership_agent(); + // 3. Test for correctness + auto agent = ibf.membership_agent(); std::vector expected(64, 1); // every hash value should be set for every bin... expected[17] = 0; // ...except bin 17 for (size_t hash : std::views::iota(0, 64)) @@ -207,12 +174,10 @@ TYPED_TEST(interleaved_bloom_filter_test, clear) } } -TYPED_TEST(interleaved_bloom_filter_test, clear_range) +TEST(ibf_test, clear_range) { - // 1. Test uncompressed interleaved_bloom_filter directly because the compressed one is not mutable. - seqan::hibf::interleaved_bloom_filter ibf{seqan::hibf::bin_count{64u}, - seqan::hibf::bin_size{1024u}, - seqan::hibf::hash_function_count{2u}}; + // 1. Construct and emplace + TypeParam ibf{seqan::hibf::bin_count{64u}, seqan::hibf::bin_size{1024u}, seqan::hibf::hash_function_count{2u}}; for (size_t bin_idx : std::views::iota(0, 64)) for (size_t hash : std::views::iota(0, 64)) @@ -224,9 +189,8 @@ TYPED_TEST(interleaved_bloom_filter_test, clear_range) seqan::hibf::bin_index{45u}}; ibf.clear(bin_range); - // 3. Construct either the uncompressed or compressed interleaved_bloom_filter and test set with bulk_contains - TypeParam ibf2{ibf}; - auto agent = ibf2.membership_agent(); + // 3. Test for correctness + auto agent = ibf.membership_agent(); std::vector expected(64, 1); // every hash value should be set for every bin... expected[8] = 0; // ...except bin 8 expected[17] = 0; // ...except bin 17 @@ -238,21 +202,18 @@ TYPED_TEST(interleaved_bloom_filter_test, clear_range) } } -TYPED_TEST(interleaved_bloom_filter_test, counting) +TEST(ibf_test, counting) { - // 1. Test uncompressed interleaved_bloom_filter directly because the compressed one is not mutable. - seqan::hibf::interleaved_bloom_filter ibf{seqan::hibf::bin_count{128u}, - seqan::hibf::bin_size{1024u}, - seqan::hibf::hash_function_count{2u}}; + // 1. Construct and emplace + TypeParam ibf{seqan::hibf::bin_count{128u}, seqan::hibf::bin_size{1024u}, seqan::hibf::hash_function_count{2u}}; for (size_t bin_idx : std::views::iota(0, 128)) for (size_t hash : std::views::iota(0, 128)) ibf.emplace(hash, seqan::hibf::bin_index{bin_idx}); - // 2. Construct either the uncompressed or compressed interleaved_bloom_filter and test set with bulk_contains - TypeParam ibf2{ibf}; + // 2. Test for correctness seqan::hibf::counting_vector counting(128, 0); - auto agent = ibf2.membership_agent(); + auto agent = ibf.membership_agent(); for (size_t hash : std::views::iota(0, 128)) // test correct resize for each bin individually { counting += agent.bulk_contains(hash); @@ -265,7 +226,7 @@ TYPED_TEST(interleaved_bloom_filter_test, counting) counting += counting; EXPECT_EQ(counting, expected2); - // minus binning_bitvector + // minus bit_vector for (size_t hash : std::views::iota(0, 128)) // test correct resize for each bin individually { counting -= agent.bulk_contains(hash); @@ -277,21 +238,18 @@ TYPED_TEST(interleaved_bloom_filter_test, counting) EXPECT_EQ(counting, std::vector(128, 42)); } -TYPED_TEST(interleaved_bloom_filter_test, counting_agent) +TEST(ibf_test, counting_agent) { - // 1. Test uncompressed interleaved_bloom_filter directly because the compressed one is not mutable. - seqan::hibf::interleaved_bloom_filter ibf{seqan::hibf::bin_count{128u}, - seqan::hibf::bin_size{1024u}, - seqan::hibf::hash_function_count{2u}}; + // 1. Construct and emplace + TypeParam ibf{seqan::hibf::bin_count{128u}, seqan::hibf::bin_size{1024u}, seqan::hibf::hash_function_count{2u}}; for (size_t bin_idx : std::views::iota(0, 128)) for (size_t hash : std::views::iota(0, 128)) ibf.emplace(hash, seqan::hibf::bin_index{bin_idx}); - // 2. Construct either the uncompressed or compressed interleaved_bloom_filter and test set with bulk_count - TypeParam ibf2{ibf}; - auto agent = ibf2.counting_agent(); - auto agent2 = ibf2.template counting_agent(); + // 2. Test for correctness + auto agent = ibf.counting_agent(); + auto agent2 = ibf.template counting_agent(); std::vector expected(128, 128); EXPECT_RANGE_EQ(agent.bulk_count(std::views::iota(0u, 128u)), expected); @@ -299,21 +257,18 @@ TYPED_TEST(interleaved_bloom_filter_test, counting_agent) } // Check special case where there is only one `1` in the bitvector. -TYPED_TEST(interleaved_bloom_filter_test, counting_no_ub) +TEST(ibf_test, counting_no_ub) { - // 1. Test uncompressed interleaved_bloom_filter directly because the compressed one is not mutable. - seqan::hibf::interleaved_bloom_filter ibf{seqan::hibf::bin_count{128u}, - seqan::hibf::bin_size{1024u}, - seqan::hibf::hash_function_count{2u}}; + // 1. Construct and emplace + TypeParam ibf{seqan::hibf::bin_count{128u}, seqan::hibf::bin_size{1024u}, seqan::hibf::hash_function_count{2u}}; for (size_t bin_idx : std::array{63, 127}) for (size_t hash : std::views::iota(0, 128)) ibf.emplace(hash, seqan::hibf::bin_index{bin_idx}); - // 2. Construct either the uncompressed or compressed interleaved_bloom_filter and test set with bulk_contains - TypeParam ibf2{ibf}; + // 2. Test for correctness seqan::hibf::counting_vector counting(128, 0); - auto agent = ibf2.membership_agent(); + auto agent = ibf.membership_agent(); for (size_t hash : std::views::iota(0, 128)) // test correct resize for each bin individually { counting += agent.bulk_contains(hash); @@ -332,21 +287,18 @@ TYPED_TEST(interleaved_bloom_filter_test, counting_no_ub) } // Check special case where there is only one `1` in the bitvector. -TYPED_TEST(interleaved_bloom_filter_test, counting_agent_no_ub) +TEST(ibf_test, counting_agent_no_ub) { - // 1. Test uncompressed interleaved_bloom_filter directly because the compressed one is not mutable. - seqan::hibf::interleaved_bloom_filter ibf{seqan::hibf::bin_count{128u}, - seqan::hibf::bin_size{1024u}, - seqan::hibf::hash_function_count{2u}}; + // 1. Construct and emplace + TypeParam ibf{seqan::hibf::bin_count{128u}, seqan::hibf::bin_size{1024u}, seqan::hibf::hash_function_count{2u}}; for (size_t bin_idx : std::array{63, 127}) for (size_t hash : std::views::iota(0, 128)) ibf.emplace(hash, seqan::hibf::bin_index{bin_idx}); - // 2. Construct either the uncompressed or compressed interleaved_bloom_filter and test set with bulk_contains - TypeParam ibf2{ibf}; - auto agent = ibf2.counting_agent(); - auto agent2 = ibf2.template counting_agent(); + // 2. Test for correctness + auto agent = ibf.counting_agent(); + auto agent2 = ibf.template counting_agent(); std::vector expected(128, 0); expected[63] = 128; @@ -355,24 +307,27 @@ TYPED_TEST(interleaved_bloom_filter_test, counting_agent_no_ub) EXPECT_RANGE_EQ(agent2.bulk_count(std::views::iota(0u, 128u)), expected); } -TYPED_TEST(interleaved_bloom_filter_test, increase_bin_number_to) +TEST(ibf_test, increase_bin_number_to) { - seqan::hibf::interleaved_bloom_filter ibf1{seqan::hibf::bin_count{73u}, seqan::hibf::bin_size{1024u}}; - seqan::hibf::interleaved_bloom_filter ibf2{ibf1}; + TypeParam ibf{seqan::hibf::bin_count{73u}, seqan::hibf::bin_size{1024u}}; + size_t const original_bitsize{ibf.bit_size()}; // 1. Throw if trying to reduce number of bins. - EXPECT_THROW(ibf1.increase_bin_number_to(seqan::hibf::bin_count{62u}), std::invalid_argument); + EXPECT_THROW(ibf.increase_bin_number_to(seqan::hibf::bin_count{62u}), std::invalid_argument); + EXPECT_EQ(ibf.bin_count(), 73u); + EXPECT_EQ(ibf.bit_size(), original_bitsize); // 2. No change in bin_words implies no change in size. - ibf2.increase_bin_number_to({seqan::hibf::bin_count{127u}}); - EXPECT_EQ(ibf1.bit_size(), ibf2.bit_size()); - EXPECT_EQ(ibf2.bin_count(), 127u); + ibf.increase_bin_number_to({seqan::hibf::bin_count{127u}}); + EXPECT_EQ(ibf.bit_size(), original_bitsize); + EXPECT_EQ(ibf.bin_count(), 127u); // 3. If resizing takes place, the inserted values must still be valid. auto hashes = std::views::iota(0, 64); for (size_t current_bin : std::views::iota(0, 64)) // test correct resize for each bin individually { - seqan::hibf::interleaved_bloom_filter ibf{seqan::hibf::bin_count{64u}, seqan::hibf::bin_size{1024u}}; + TypeParam ibf{seqan::hibf::bin_count{64u}, seqan::hibf::bin_size{8u}}; + EXPECT_EQ(ibf.bit_size(), 512u); std::ranges::for_each(hashes, [&ibf, ¤t_bin](auto const h) { @@ -382,12 +337,11 @@ TYPED_TEST(interleaved_bloom_filter_test, increase_bin_number_to) ibf.increase_bin_number_to(seqan::hibf::bin_count{73u}); EXPECT_EQ(ibf.bin_count(), 73u); - EXPECT_GE(ibf.bit_size(), 1024u); + EXPECT_EQ(ibf.bit_size(), 1024u); std::vector expected(73, 0); expected[current_bin] = 1; // none of the bins except current_bin stores the hash values. - TypeParam tibf{ibf}; // test output on compressed and uncompressed - auto agent = tibf.membership_agent(); + auto agent = ibf.membership_agent(); for (size_t const h : hashes) { auto & res = agent.bulk_contains(h); @@ -395,37 +349,3 @@ TYPED_TEST(interleaved_bloom_filter_test, increase_bin_number_to) } } } - -TYPED_TEST(interleaved_bloom_filter_test, data_access) -{ - seqan::hibf::interleaved_bloom_filter ibf{seqan::hibf::bin_count{1024u}, seqan::hibf::bin_size{1024u}}; - - EXPECT_LE(sdsl::size_in_mega_bytes(ibf.raw_data()), 1.0f); -} - -// MIGRATION_TODO -// TYPED_TEST(interleaved_bloom_filter_test, serialisation) -// { -// TypeParam ibf{TestFixture::make_ibf(seqan::hibf::bin_count{73u}, seqan::hibf::bin_size{1024u})}; -// seqan::hibf::test::do_serialisation(ibf); -// } - -// TEST(interleaved_bloom_filter_test, decompression) -// { -// seqan::hibf::interleaved_bloom_filter ibf{seqan::hibf::bin_count{64u}, seqan::hibf::bin_size{1024u}}; - -// // Only use every other bin. -// auto take_odd = [](auto number) -// { -// return number & 1; -// }; -// for (size_t bin_idx : std::views::iota(0, 64) | std::views::filter(take_odd)) -// for (size_t hash : std::views::iota(0, 64)) -// ibf.emplace(hash, seqan::hibf::bin_index{bin_idx}); - -// seqan::hibf::interleaved_bloom_filter ibf_compressed{ibf}; - -// seqan::hibf::interleaved_bloom_filter ibf_decompressed{ibf_compressed}; - -// EXPECT_TRUE(ibf == ibf_decompressed); -// } diff --git a/test/unit/hibf/print_test.cpp b/test/unit/hibf/print_test.cpp index 7b69b391..5a70343d 100644 --- a/test/unit/hibf/print_test.cpp +++ b/test/unit/hibf/print_test.cpp @@ -10,13 +10,13 @@ #include // for range_value_t #include // for allocator, vector -#include // for counting_vector, binning_bitvector +#include // for counting_vector, bit_vector #include // for print template using print_test = ::testing::Test; -using test_types = ::testing::Types, seqan::hibf::counting_vector, seqan::hibf::counting_vector, @@ -42,7 +42,7 @@ TYPED_TEST(print_test, to_stdout) { testing::internal::CaptureStdout(); testing::internal::CaptureStderr(); - if constexpr (std::same_as) + if constexpr (std::same_as) { TypeParam vector(5u); vector[0] = vector[2] = vector[4] = true; @@ -70,7 +70,7 @@ TYPED_TEST(print_test, to_stderr) { testing::internal::CaptureStdout(); testing::internal::CaptureStderr(); - if constexpr (std::same_as) + if constexpr (std::same_as) { TypeParam vector(5u); vector[0] = vector[2] = vector[4] = true;