Skip to content

Commit

Permalink
[FEATURE] Add data_store::validate
Browse files Browse the repository at this point in the history
  • Loading branch information
eseiler committed Sep 10, 2024
1 parent 2e0456e commit 4dd58ff
Show file tree
Hide file tree
Showing 10 changed files with 174 additions and 7 deletions.
4 changes: 4 additions & 0 deletions include/hibf/layout/data_store.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ struct data_store
std::vector<size_t> positions = [this]()
{
std::vector<size_t> ps;
if (this->kmer_counts == nullptr)
return ps; // GCOVR_EXCL_LINE
ps.resize(this->kmer_counts->size());
std::iota(ps.begin(), ps.end(), 0);
return ps;
Expand All @@ -90,6 +92,8 @@ struct data_store
//!\brief Tracks the time the algorithm spends on rearranging user bins (merged bins).
concurrent_timer rearrangement_timer{};
//!\}

void validate() const;
};

} // namespace seqan::hibf::layout
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ set (HIBF_SOURCE_FILES
layout/compute_fpr_correction.cpp
layout/compute_layout.cpp
layout/compute_relaxed_fpr_correction.cpp
layout/data_store.cpp
sketch/compute_sketches.cpp
layout/graph.cpp
layout/hierarchical_binning.cpp
Expand Down
3 changes: 1 addition & 2 deletions src/layout/compute_layout.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@ layout compute_layout(config const & config,

layout resulting_layout{};

data_store store{.false_positive_rate = config.maximum_fpr,
.hibf_layout = &resulting_layout,
data_store store{.hibf_layout = &resulting_layout,
.kmer_counts = std::addressof(kmer_counts),
.sketches = std::addressof(sketches),
.positions = std::move(positions)};
Expand Down
29 changes: 29 additions & 0 deletions src/layout/data_store.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// SPDX-FileCopyrightText: 2006-2024, Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2024, Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: BSD-3-Clause

#include <hibf/layout/data_store.hpp> // for data_store

namespace seqan::hibf::layout
{

void data_store::validate() const
{
if (hibf_layout == nullptr)
throw std::invalid_argument{"[HIBF ERROR] data_store::hibf_layout must not be nullptr."};

if (kmer_counts == nullptr)
throw std::invalid_argument{"[HIBF ERROR] data_store::kmer_counts must not be nullptr."};

if (sketches != nullptr && kmer_counts->size() != sketches->size())
throw std::invalid_argument{
"[HIBF ERROR] data_store::kmer_counts and data_store::sketches must have the same size."};

if (fpr_correction.empty())
throw std::invalid_argument{"[HIBF ERROR] data_store::fpr_correction must not be empty."};

if (relaxed_fpr_correction <= 0.0 || relaxed_fpr_correction > 1.0)
throw std::invalid_argument{"[HIBF ERROR] data_store::relaxed_fpr_correction must be in (0.0,1.0]."};
}

} // namespace seqan::hibf::layout
6 changes: 3 additions & 3 deletions src/layout/hierarchical_binning.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,7 @@ namespace seqan::hibf::layout
size_t hierarchical_binning::execute()
{
assert(data != nullptr);
assert(data->kmer_counts != nullptr);
assert(data->positions.size() <= data->kmer_counts->size());
data->validate();

static constexpr size_t max_size_t{std::numeric_limits<size_t>::max()};

Expand All @@ -40,7 +39,8 @@ size_t hierarchical_binning::execute()

if (!config.disable_estimate_union && !config.disable_rearrangement)
{
assert(data->sketches != nullptr);
if (data->sketches == nullptr)
throw std::invalid_argument{"[HIBF ERROR] data_store::sketches must not be nullptr."};
data->rearrangement_timer.start();
sketch::toolbox::rearrange_bins(*data->sketches,
*data->kmer_counts,
Expand Down
1 change: 1 addition & 0 deletions src/layout/simple_binning.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ namespace seqan::hibf::layout
size_t simple_binning::execute()
{
assert(data != nullptr);
data->validate();
assert(num_technical_bins > 0u);
assert(num_user_bins > 0u);

Expand Down
56 changes: 56 additions & 0 deletions test/include/hibf/test/expect_throw_msg.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// SPDX-FileCopyrightText: 2006-2024, Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2024, Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: BSD-3-Clause

/*!\file
* \brief Provides EXPECT_THROW_MSG.
* \author Enrico Seiler <enrico.seiler AT fu-berlin.de>
*/

#pragma once

#include <gtest/gtest.h>

#include <hibf/platform.hpp>

#ifdef EXPECT_THROW_MSG
# warning "EXPECT_THROW_MSG is already defined."
#else
# define EXPECT_THROW_MSG(statement, expected_exception, expected_message) \
try \
{ \
statement; \
std::string const message = "Expected: " #statement " throws an exception of type " #expected_exception \
".\n Actual: it throws nothing."; \
GTEST_NONFATAL_FAILURE_(message.data()); \
} \
catch (expected_exception const & exception) \
{ \
if (auto result = ::testing::internal::EqHelper::Compare("Expected", \
"Actual", \
std::string_view{expected_message}, \
std::string_view{exception.what()}); \
!result) \
{ \
std::string message = #statement " throws the correct exception, but the description is incorrect.\n"; \
message += result.failure_message(); \
GTEST_NONFATAL_FAILURE_(message.data()); \
} \
} \
catch (std::exception const & exception) \
{ \
std::string message = "Expected: " #statement " throws an exception of type " #expected_exception ".\n "; \
message += "Actual: it throws "; \
message += ::testing::internal::GetTypeName(typeid(exception)); \
message += " with description \""; \
message += exception.what(); \
message += "\"."; \
GTEST_NONFATAL_FAILURE_(message.data()); \
} \
catch (...) \
{ \
std::string message = "Expected: " #statement " throws an exception of type " #expected_exception ".\n "; \
message += "Actual: it throws an unknown exception."; \
GTEST_NONFATAL_FAILURE_(message.data()); \
}
#endif
1 change: 1 addition & 0 deletions test/unit/hibf/layout/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ hibf_test (layout_test.cpp)
hibf_test (fpr_correction_test.cpp)
hibf_test (graph_test.cpp)
hibf_test (compute_layout_test.cpp)
hibf_test (data_store_test.cpp)
74 changes: 74 additions & 0 deletions test/unit/hibf/layout/data_store_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
// SPDX-FileCopyrightText: 2006-2024, Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2024, Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: BSD-3-Clause

#include <gtest/gtest.h>

#include <hibf/layout/data_store.hpp>
#include <hibf/test/expect_throw_msg.hpp>

TEST(data_store_test, validate)
{
seqan::hibf::layout::layout layout{};
std::vector<size_t> kmer_counts(3);
std::vector<seqan::hibf::sketch::hyperloglog> sketches(3);

// hibf_layout must not be nullptr
{
seqan::hibf::layout::data_store store{};
EXPECT_THROW_MSG(store.validate(),
std::invalid_argument,
"[HIBF ERROR] data_store::hibf_layout must not be nullptr.");
}

// kmer_counts must not be nullptr
{
seqan::hibf::layout::data_store store{.hibf_layout = &layout};
EXPECT_THROW_MSG(store.validate(),
std::invalid_argument,
"[HIBF ERROR] data_store::kmer_counts must not be nullptr.");
}

// kmer_counts and sketches must have the same size
{
std::vector<seqan::hibf::sketch::hyperloglog> wrong_sketches(2);

seqan::hibf::layout::data_store store{.hibf_layout = &layout,
.kmer_counts = &kmer_counts,
.sketches = &wrong_sketches};
EXPECT_THROW_MSG(store.validate(),
std::invalid_argument,
"[HIBF ERROR] data_store::kmer_counts and data_store::sketches must have the same size.");
}

// fpr_correction must not be empty
{
seqan::hibf::layout::data_store store{.hibf_layout = &layout,
.kmer_counts = &kmer_counts,
.sketches = &sketches};

EXPECT_THROW_MSG(store.validate(),
std::invalid_argument,
"[HIBF ERROR] data_store::fpr_correction must not be empty.");
}

// relaxed_fpr_correction must be in (0.0,1.0]
{
seqan::hibf::layout::data_store store{.hibf_layout = &layout,
.kmer_counts = &kmer_counts,
.sketches = &sketches,
.fpr_correction = {1.0, 2.0, 3.0}};

EXPECT_THROW_MSG(store.validate(),
std::invalid_argument,
"[HIBF ERROR] data_store::relaxed_fpr_correction must be in (0.0,1.0].");

store.relaxed_fpr_correction = std::nextafter(1.0, 2.0);
EXPECT_THROW_MSG(store.validate(),
std::invalid_argument,
"[HIBF ERROR] data_store::relaxed_fpr_correction must be in (0.0,1.0].");

store.relaxed_fpr_correction = 0.5;
EXPECT_NO_THROW(store.validate());
}
}
6 changes: 4 additions & 2 deletions test/unit/hibf/layout/simple_binning_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ TEST(simple_binning_test, small_example)

seqan::hibf::layout::data_store data{.hibf_layout = &hibf_layout,
.kmer_counts = &kmer_counts,
.fpr_correction = std::vector<double>(65, 1.0)};
.fpr_correction = std::vector<double>(65, 1.0),
.relaxed_fpr_correction = 1.0};

seqan::hibf::layout::simple_binning algo{data, 9};
size_t max_bin = algo.execute();
Expand All @@ -41,7 +42,8 @@ TEST(simple_binning_test, uniform_distribution)

seqan::hibf::layout::data_store data{.hibf_layout = &hibf_layout,
.kmer_counts = &kmer_counts,
.fpr_correction = std::vector<double>(65, 1.0)};
.fpr_correction = std::vector<double>(65, 1.0),
.relaxed_fpr_correction = 1.0};

seqan::hibf::layout::simple_binning algo{data, 4u};
size_t max_bin = algo.execute();
Expand Down

0 comments on commit 4dd58ff

Please sign in to comment.