Skip to content

Commit

Permalink
Merge pull request #3300 from eseiler/fix/sam_file_output
Browse files Browse the repository at this point in the history
[FIX] sam_file_output
  • Loading branch information
eseiler authored Nov 7, 2024
2 parents 4d03890 + 5093b36 commit 77db997
Show file tree
Hide file tree
Showing 5 changed files with 114 additions and 35 deletions.
43 changes: 13 additions & 30 deletions include/seqan3/io/sam_file/header.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,24 +49,17 @@ class sam_file_header
/*!\name Constructors, destructor and assignment
* \{
*/
sam_file_header() = default; //!< Defaulted.
sam_file_header(sam_file_header const &) = delete; //!< Deleted. Holds a unique_ptr.
sam_file_header & operator=(sam_file_header const &) = delete; //!< Deleted. Holds a unique_ptr.
sam_file_header(sam_file_header &&) = default; //!< Defaulted.
sam_file_header & operator=(sam_file_header &&) = default; //!< Defaulted.
~sam_file_header() = default; //!< Defaulted.

/*!\brief Construct from a range of reference ids which redirects the `ref_ids_ptr` member (non-owning).
* \param[in] ref_ids The range over reference ids to redirect the pointer at.
sam_file_header() = default; //!< Defaulted.
sam_file_header(sam_file_header const &) = default; //!< Defaulted.
sam_file_header & operator=(sam_file_header const &) = default; //!< Defaulted.
sam_file_header(sam_file_header &&) = default; //!< Defaulted.
sam_file_header & operator=(sam_file_header &&) = default; //!< Defaulted.
~sam_file_header() = default; //!< Defaulted.

/*!\brief Construct from a range of reference ids.
* \param[in] ref_ids The range over reference ids.
*/
sam_file_header(ref_ids_type & ref_ids) : ref_ids_ptr{&ref_ids, ref_ids_deleter_noop}
{}

/*!\brief Construct from a rvalue range of reference ids which is moved into the `ref_ids_ptr` (owning).
* \param[in] ref_ids The range over reference ids to own.
*/
sam_file_header(ref_ids_type && ref_ids) :
ref_ids_ptr{new ref_ids_type{std::move(ref_ids)}, ref_ids_deleter_default}
sam_file_header(ref_ids_type ref_ids) : reference_ids{std::move(ref_ids)}
{}
//!\}

Expand All @@ -83,22 +76,12 @@ class sam_file_header
std::vector<std::string> comments; //!< The list of comments.

private:
//!\brief The type of the internal ref_ids pointer. Allows dynamically setting ownership management.
using ref_ids_ptr_t = std::unique_ptr<ref_ids_type, std::function<void(ref_ids_type *)>>;
//!\brief Stream deleter that does nothing (no ownership assumed).
static void ref_ids_deleter_noop(ref_ids_type *)
{}
//!\brief Stream deleter with default behaviour (ownership assumed).
static void ref_ids_deleter_default(ref_ids_type * ptr)
{
delete ptr;
}
//!\brief The key's type of ref_dict.
using key_type = std::conditional_t<std::ranges::contiguous_range<std::ranges::range_reference_t<ref_ids_type>>,
std::span<range_innermost_value_t<ref_ids_type> const>,
type_reduce_t<std::ranges::range_reference_t<ref_ids_type>>>;
//!\brief The pointer to reference ids information (non-owning if reference information is given).
ref_ids_ptr_t ref_ids_ptr{new ref_ids_type{}, ref_ids_deleter_default};
//!\brief The reference ids.
ref_ids_type reference_ids{};

//!\brief Custom hash function since std::hash is not defined for all range types (e.g. std::span<char>).
struct key_hasher
Expand Down Expand Up @@ -140,7 +123,7 @@ class sam_file_header
*/
ref_ids_type & ref_ids()
{
return *ref_ids_ptr;
return reference_ids;
}

/*!\brief The reference information. (used by the SAM/BAM format)
Expand Down
5 changes: 4 additions & 1 deletion include/seqan3/io/sam_file/output.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,10 @@ class sam_file_output
//!\brief The destructor will write the header if it has not been written before.
~sam_file_output()
{
if (header_has_been_written)
// !primary_stream indicates moved-from object
// unique_ptr holds a nullptr after being moved from
// See https://eel.is/c++draft/unique.ptr#single.ctor-18
if (header_has_been_written || !primary_stream)
return;

assert(!format.valueless_by_exception());
Expand Down
26 changes: 26 additions & 0 deletions test/unit/io/sam_file/format_bam_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,32 @@ struct sam_file_read<seqan3::format_bam> : public sam_file_data
'\x14', '\x00', '\x00', '\x00', '\x10', '\x00', '\x00', '\x00', '\x12', '\x00', '\x00', '\x00', '\x10', '\x00',
'\x00', '\x00', '\x11', '\x00', '\x00', '\x00', '\x12', '\x48', '\x00', '\x02', '\x02', '\x03', '\x62', '\x48',
'\x48', '\x31', '\x41', '\x45', '\x33', '\x30', '\x00'};

std::vector<std::string> issue3299_output{
{'\x42', '\x41', '\x4d', '\x01', '\x35', '\x00', '\x00', '\x00', '\x40', '\x48', '\x44', '\x09', '\x56', '\x4e',
'\x3a', '\x31', '\x2e', '\x36', '\x0a', '\x40', '\x53', '\x51', '\x09', '\x53', '\x4e', '\x3a', '\x68', '\x65',
'\x6c', '\x6c', '\x6f', '\x09', '\x4c', '\x4e', '\x3a', '\x31', '\x30', '\x30', '\x30', '\x0a', '\x40', '\x53',
'\x51', '\x09', '\x53', '\x4e', '\x3a', '\x77', '\x6f', '\x72', '\x6c', '\x64', '\x09', '\x4c', '\x4e', '\x3a',
'\x32', '\x30', '\x30', '\x30', '\x0a', '\x02', '\x00', '\x00', '\x00', '\x06', '\x00', '\x00', '\x00', '\x68',
'\x65', '\x6c', '\x6c', '\x6f', '\x00', '\xe8', '\x03', '\x00', '\x00', '\x06', '\x00', '\x00', '\x00', '\x77',
'\x6f', '\x72', '\x6c', '\x64', '\x00', '\xd0', '\x07', '\x00', '\x00'},
{'\x42', '\x41', '\x4d', '\x01', '\x3b', '\x00', '\x00', '\x00', '\x40', '\x48', '\x44', '\x09', '\x56', '\x4e',
'\x3a', '\x31', '\x2e', '\x36', '\x0a', '\x40', '\x53', '\x51', '\x09', '\x53', '\x4e', '\x3a', '\x68', '\x65',
'\x6c', '\x6c', '\x6f', '\x66', '\x6f', '\x6f', '\x09', '\x4c', '\x4e', '\x3a', '\x31', '\x30', '\x30', '\x31',
'\x0a', '\x40', '\x53', '\x51', '\x09', '\x53', '\x4e', '\x3a', '\x77', '\x6f', '\x72', '\x6c', '\x64', '\x66',
'\x6f', '\x6f', '\x09', '\x4c', '\x4e', '\x3a', '\x32', '\x30', '\x30', '\x31', '\x0a', '\x02', '\x00', '\x00',
'\x00', '\x09', '\x00', '\x00', '\x00', '\x68', '\x65', '\x6c', '\x6c', '\x6f', '\x66', '\x6f', '\x6f', '\x00',
'\xe9', '\x03', '\x00', '\x00', '\x09', '\x00', '\x00', '\x00', '\x77', '\x6f', '\x72', '\x6c', '\x64', '\x66',
'\x6f', '\x6f', '\x00', '\xd1', '\x07', '\x00', '\x00'},
{'\x42', '\x41', '\x4d', '\x01', '\x41', '\x00', '\x00', '\x00', '\x40', '\x48', '\x44', '\x09', '\x56',
'\x4e', '\x3a', '\x31', '\x2e', '\x36', '\x0a', '\x40', '\x53', '\x51', '\x09', '\x53', '\x4e', '\x3a',
'\x68', '\x65', '\x6c', '\x6c', '\x6f', '\x66', '\x6f', '\x6f', '\x66', '\x6f', '\x6f', '\x09', '\x4c',
'\x4e', '\x3a', '\x31', '\x30', '\x30', '\x32', '\x0a', '\x40', '\x53', '\x51', '\x09', '\x53', '\x4e',
'\x3a', '\x77', '\x6f', '\x72', '\x6c', '\x64', '\x66', '\x6f', '\x6f', '\x66', '\x6f', '\x6f', '\x09',
'\x4c', '\x4e', '\x3a', '\x32', '\x30', '\x30', '\x32', '\x0a', '\x02', '\x00', '\x00', '\x00', '\x0c',
'\x00', '\x00', '\x00', '\x68', '\x65', '\x6c', '\x6c', '\x6f', '\x66', '\x6f', '\x6f', '\x66', '\x6f',
'\x6f', '\x00', '\xea', '\x03', '\x00', '\x00', '\x0c', '\x00', '\x00', '\x00', '\x77', '\x6f', '\x72',
'\x6c', '\x64', '\x66', '\x6f', '\x6f', '\x66', '\x6f', '\x6f', '\x00', '\xd2', '\x07', '\x00', '\x00'}};
};

// ---------------------------------------------------------------------------------------------------------------------
Expand Down
20 changes: 17 additions & 3 deletions test/unit/io/sam_file/format_sam_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,20 @@ read1 41 * 1 61 1S1M1D1M1I * 0 0 ACGT !##$
std::string wrong_hexadecimal_tag{
R"(@SQ SN:ref LN:150
read1 41 ref 1 61 1S1M1D1M1I = 10 300 ACGT !##$ bH:H:1AE30
)"};

std::vector<std::string> issue3299_output{
R"(@HD VN:1.6
@SQ SN:hello LN:1000
@SQ SN:world LN:2000
)",
R"(@HD VN:1.6
@SQ SN:hellofoo LN:1001
@SQ SN:worldfoo LN:2001
)",
R"(@HD VN:1.6
@SQ SN:hellofoofoo LN:1002
@SQ SN:worldfoofoo LN:2002
)"};
};

Expand Down Expand Up @@ -384,9 +398,9 @@ TEST_F(sam_format, write_different_header)

write_header();
ostream.flush();
EXPECT_EQ(
ostream.str(),
"@HD\tVN:1.6\tSO:coordinate\tSS:query\tGO:reference\n@SQ\tSN:ref\tLN:34\n*\t0\tref\t1\t0\t*\t*\t0\t0\t*\t*\n");
EXPECT_EQ(ostream.str(),
"@HD\tVN:1.6\tSO:coordinate\tSS:query\tGO:reference\n@SQ\tSN:ref\tLN:34\n*\t0\tref\t1\t0\t*\t*"
"\t0\t0\t*\t*\n");
}

TEST_F(sam_format, issue2195)
Expand Down
55 changes: 54 additions & 1 deletion test/unit/io/sam_file/sam_file_format_test_template.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -705,6 +705,58 @@ TYPED_TEST_P(sam_file_write, format_errors)
seqan3::format_error);
}

TYPED_TEST_P(sam_file_write, issue3299)
{
using sam_file_output_t = seqan3::sam_file_output<typename seqan3::sam_file_output<>::selected_field_ids,
seqan3::type_list<TypeParam>,
std::vector<std::string>>;
std::vector<std::string> seq_names{"hello", "world"};
std::vector<size_t> seq_lengths{1000, 2000};

// Issue: Moved-from sam_file_output would try to write header on destruction
{
sam_file_output_t fout1{std::ostringstream{}, seq_names, seq_lengths, TypeParam{}};
sam_file_output_t fout2{std::move(fout1)};
}

// Issue: Header does not own ref_ids: ref_ids outlives sam_file_output
{
std::vector<sam_file_output_t> alignment_streams;
auto seq_names_copy = seq_names;
alignment_streams.emplace_back(std::ostringstream{}, seq_names_copy, seq_lengths, TypeParam{});
// Destructor calls:
// 1) seq_names_copy
// 2) alignment_streams, starting with the one element it holds
}

// Issue: Header does not own ref_ids: ref_ids may change
size_t const iterations{this->issue3299_output.size()};
// Order of destruction of vector elements differs between GCC and Clang
std::vector<std::ostringstream> outputs(iterations);
{
std::vector<sam_file_output_t> alignment_streams;
for (size_t i = 0; i < iterations; ++i)
{
alignment_streams.emplace_back(outputs[i], seq_names, seq_lengths, TypeParam{});

std::ranges::for_each(seq_names,
[](std::string & str)
{
str += "foo";
});
std::ranges::for_each(seq_lengths,
[](size_t & len)
{
++len;
});
}
}
for (size_t i = 0; i < iterations; ++i)
{
EXPECT_EQ(outputs[i].str(), this->issue3299_output[i]) << "Iteration: " << i;
}
}

REGISTER_TYPED_TEST_SUITE_P(sam_file_read,
input_concept,
header_sucess,
Expand All @@ -729,4 +781,5 @@ REGISTER_TYPED_TEST_SUITE_P(sam_file_write,
with_header,
cigar_vector,
special_cases,
format_errors);
format_errors,
issue3299);

0 comments on commit 77db997

Please sign in to comment.