diff --git a/include/hibf/layout/data_store.hpp b/include/hibf/layout/data_store.hpp index 07ac4846..f38b20af 100644 --- a/include/hibf/layout/data_store.hpp +++ b/include/hibf/layout/data_store.hpp @@ -65,6 +65,8 @@ struct data_store std::vector positions = [this]() { std::vector ps; + if (this->kmer_counts == nullptr) + return ps; // GCOVR_EXCL_LINE ps.resize(this->kmer_counts->size()); std::iota(ps.begin(), ps.end(), 0); return ps; @@ -90,6 +92,8 @@ struct data_store //!\brief Tracks the time the algorithm spends on rearranging user bins (merged bins). concurrent_timer rearrangement_timer{}; //!\} + + void validate() const; }; } // namespace seqan::hibf::layout diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5b64369c..9d240b38 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -11,6 +11,7 @@ set (HIBF_SOURCE_FILES layout/compute_fpr_correction.cpp layout/compute_layout.cpp layout/compute_relaxed_fpr_correction.cpp + layout/data_store.cpp sketch/compute_sketches.cpp layout/graph.cpp layout/hierarchical_binning.cpp diff --git a/src/layout/compute_layout.cpp b/src/layout/compute_layout.cpp index 9b5c70d9..af656a76 100644 --- a/src/layout/compute_layout.cpp +++ b/src/layout/compute_layout.cpp @@ -37,8 +37,7 @@ layout compute_layout(config const & config, layout resulting_layout{}; - data_store store{.false_positive_rate = config.maximum_fpr, - .hibf_layout = &resulting_layout, + data_store store{.hibf_layout = &resulting_layout, .kmer_counts = std::addressof(kmer_counts), .sketches = std::addressof(sketches), .positions = std::move(positions)}; diff --git a/src/layout/data_store.cpp b/src/layout/data_store.cpp new file mode 100644 index 00000000..f556ff95 --- /dev/null +++ b/src/layout/data_store.cpp @@ -0,0 +1,29 @@ +// SPDX-FileCopyrightText: 2006-2024, Knut Reinert & Freie Universität Berlin +// SPDX-FileCopyrightText: 2016-2024, Knut Reinert & MPI für molekulare Genetik +// SPDX-License-Identifier: BSD-3-Clause + +#include // for data_store + +namespace seqan::hibf::layout +{ + +void data_store::validate() const +{ + if (hibf_layout == nullptr) + throw std::invalid_argument{"[HIBF ERROR] data_store::hibf_layout must not be nullptr."}; + + if (kmer_counts == nullptr) + throw std::invalid_argument{"[HIBF ERROR] data_store::kmer_counts must not be nullptr."}; + + if (sketches != nullptr && kmer_counts->size() != sketches->size()) + throw std::invalid_argument{ + "[HIBF ERROR] data_store::kmer_counts and data_store::sketches must have the same size."}; + + if (fpr_correction.empty()) + throw std::invalid_argument{"[HIBF ERROR] data_store::fpr_correction must not be empty."}; + + if (relaxed_fpr_correction <= 0.0 || relaxed_fpr_correction > 1.0) + throw std::invalid_argument{"[HIBF ERROR] data_store::relaxed_fpr_correction must be in (0.0,1.0]."}; +} + +} // namespace seqan::hibf::layout diff --git a/src/layout/hierarchical_binning.cpp b/src/layout/hierarchical_binning.cpp index 5fef875d..c5ebc455 100644 --- a/src/layout/hierarchical_binning.cpp +++ b/src/layout/hierarchical_binning.cpp @@ -29,8 +29,7 @@ namespace seqan::hibf::layout size_t hierarchical_binning::execute() { assert(data != nullptr); - assert(data->kmer_counts != nullptr); - assert(data->positions.size() <= data->kmer_counts->size()); + data->validate(); static constexpr size_t max_size_t{std::numeric_limits::max()}; @@ -40,7 +39,8 @@ size_t hierarchical_binning::execute() if (!config.disable_estimate_union && !config.disable_rearrangement) { - assert(data->sketches != nullptr); + if (data->sketches == nullptr) + throw std::invalid_argument{"[HIBF ERROR] data_store::sketches must not be nullptr."}; data->rearrangement_timer.start(); sketch::toolbox::rearrange_bins(*data->sketches, *data->kmer_counts, diff --git a/src/layout/simple_binning.cpp b/src/layout/simple_binning.cpp index d1b9cb5e..2fae7337 100644 --- a/src/layout/simple_binning.cpp +++ b/src/layout/simple_binning.cpp @@ -19,6 +19,7 @@ namespace seqan::hibf::layout size_t simple_binning::execute() { assert(data != nullptr); + data->validate(); assert(num_technical_bins > 0u); assert(num_user_bins > 0u); diff --git a/test/include/hibf/test/expect_throw_msg.hpp b/test/include/hibf/test/expect_throw_msg.hpp new file mode 100644 index 00000000..688b917d --- /dev/null +++ b/test/include/hibf/test/expect_throw_msg.hpp @@ -0,0 +1,56 @@ +// SPDX-FileCopyrightText: 2006-2024, Knut Reinert & Freie Universität Berlin +// SPDX-FileCopyrightText: 2016-2024, Knut Reinert & MPI für molekulare Genetik +// SPDX-License-Identifier: BSD-3-Clause + +/*!\file + * \brief Provides EXPECT_THROW_MSG. + * \author Enrico Seiler + */ + +#pragma once + +#include + +#include + +#ifdef EXPECT_THROW_MSG +# warning "EXPECT_THROW_MSG is already defined." +#else +# define EXPECT_THROW_MSG(statement, expected_exception, expected_message) \ + try \ + { \ + statement; \ + std::string const message = "Expected: " #statement " throws an exception of type " #expected_exception \ + ".\n Actual: it throws nothing."; \ + GTEST_NONFATAL_FAILURE_(message.data()); \ + } \ + catch (expected_exception const & exception) \ + { \ + if (auto result = ::testing::internal::EqHelper::Compare("Expected", \ + "Actual", \ + std::string_view{expected_message}, \ + std::string_view{exception.what()}); \ + !result) \ + { \ + std::string message = #statement " throws the correct exception, but the description is incorrect.\n"; \ + message += result.failure_message(); \ + GTEST_NONFATAL_FAILURE_(message.data()); \ + } \ + } \ + catch (std::exception const & exception) \ + { \ + std::string message = "Expected: " #statement " throws an exception of type " #expected_exception ".\n "; \ + message += "Actual: it throws "; \ + message += ::testing::internal::GetTypeName(typeid(exception)); \ + message += " with description \""; \ + message += exception.what(); \ + message += "\"."; \ + GTEST_NONFATAL_FAILURE_(message.data()); \ + } \ + catch (...) \ + { \ + std::string message = "Expected: " #statement " throws an exception of type " #expected_exception ".\n "; \ + message += "Actual: it throws an unknown exception."; \ + GTEST_NONFATAL_FAILURE_(message.data()); \ + } +#endif diff --git a/test/unit/hibf/layout/CMakeLists.txt b/test/unit/hibf/layout/CMakeLists.txt index 452ce8e1..95b3fc94 100644 --- a/test/unit/hibf/layout/CMakeLists.txt +++ b/test/unit/hibf/layout/CMakeLists.txt @@ -8,3 +8,4 @@ hibf_test (layout_test.cpp) hibf_test (fpr_correction_test.cpp) hibf_test (graph_test.cpp) hibf_test (compute_layout_test.cpp) +hibf_test (data_store_test.cpp) diff --git a/test/unit/hibf/layout/data_store_test.cpp b/test/unit/hibf/layout/data_store_test.cpp new file mode 100644 index 00000000..cc18ecbb --- /dev/null +++ b/test/unit/hibf/layout/data_store_test.cpp @@ -0,0 +1,74 @@ +// SPDX-FileCopyrightText: 2006-2024, Knut Reinert & Freie Universität Berlin +// SPDX-FileCopyrightText: 2016-2024, Knut Reinert & MPI für molekulare Genetik +// SPDX-License-Identifier: BSD-3-Clause + +#include + +#include +#include + +TEST(data_store_test, validate) +{ + seqan::hibf::layout::layout layout{}; + std::vector kmer_counts(3); + std::vector sketches(3); + + // hibf_layout must not be nullptr + { + seqan::hibf::layout::data_store store{}; + EXPECT_THROW_MSG(store.validate(), + std::invalid_argument, + "[HIBF ERROR] data_store::hibf_layout must not be nullptr."); + } + + // kmer_counts must not be nullptr + { + seqan::hibf::layout::data_store store{.hibf_layout = &layout}; + EXPECT_THROW_MSG(store.validate(), + std::invalid_argument, + "[HIBF ERROR] data_store::kmer_counts must not be nullptr."); + } + + // kmer_counts and sketches must have the same size + { + std::vector wrong_sketches(2); + + seqan::hibf::layout::data_store store{.hibf_layout = &layout, + .kmer_counts = &kmer_counts, + .sketches = &wrong_sketches}; + EXPECT_THROW_MSG(store.validate(), + std::invalid_argument, + "[HIBF ERROR] data_store::kmer_counts and data_store::sketches must have the same size."); + } + + // fpr_correction must not be empty + { + seqan::hibf::layout::data_store store{.hibf_layout = &layout, + .kmer_counts = &kmer_counts, + .sketches = &sketches}; + + EXPECT_THROW_MSG(store.validate(), + std::invalid_argument, + "[HIBF ERROR] data_store::fpr_correction must not be empty."); + } + + // relaxed_fpr_correction must be in (0.0,1.0] + { + seqan::hibf::layout::data_store store{.hibf_layout = &layout, + .kmer_counts = &kmer_counts, + .sketches = &sketches, + .fpr_correction = {1.0, 2.0, 3.0}}; + + EXPECT_THROW_MSG(store.validate(), + std::invalid_argument, + "[HIBF ERROR] data_store::relaxed_fpr_correction must be in (0.0,1.0]."); + + store.relaxed_fpr_correction = std::nextafter(1.0, 2.0); + EXPECT_THROW_MSG(store.validate(), + std::invalid_argument, + "[HIBF ERROR] data_store::relaxed_fpr_correction must be in (0.0,1.0]."); + + store.relaxed_fpr_correction = 0.5; + EXPECT_NO_THROW(store.validate()); + } +} diff --git a/test/unit/hibf/layout/simple_binning_test.cpp b/test/unit/hibf/layout/simple_binning_test.cpp index eb97278e..edf7a9d7 100644 --- a/test/unit/hibf/layout/simple_binning_test.cpp +++ b/test/unit/hibf/layout/simple_binning_test.cpp @@ -20,7 +20,8 @@ TEST(simple_binning_test, small_example) seqan::hibf::layout::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts, - .fpr_correction = std::vector(65, 1.0)}; + .fpr_correction = std::vector(65, 1.0), + .relaxed_fpr_correction = 1.0}; seqan::hibf::layout::simple_binning algo{data, 9}; size_t max_bin = algo.execute(); @@ -41,7 +42,8 @@ TEST(simple_binning_test, uniform_distribution) seqan::hibf::layout::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts, - .fpr_correction = std::vector(65, 1.0)}; + .fpr_correction = std::vector(65, 1.0), + .relaxed_fpr_correction = 1.0}; seqan::hibf::layout::simple_binning algo{data, 4u}; size_t max_bin = algo.execute();