From e746fcb5a5f0a47805a088750b2787d077610a31 Mon Sep 17 00:00:00 2001 From: Enrico Seiler Date: Sun, 12 Nov 2023 16:54:22 +0100 Subject: [PATCH] [FIX] genbank/embl parsing --- include/seqan3/io/sequence_file/format_embl.hpp | 8 +++----- include/seqan3/io/sequence_file/format_genbank.hpp | 9 +++++---- .../sequence_file/sequence_file_format_genbank_test.cpp | 3 +-- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/include/seqan3/io/sequence_file/format_embl.hpp b/include/seqan3/io/sequence_file/format_embl.hpp index 79c46bad71..5c74aeaad2 100644 --- a/include/seqan3/io/sequence_file/format_embl.hpp +++ b/include/seqan3/io/sequence_file/format_embl.hpp @@ -197,12 +197,10 @@ class format_embl } else { - detail::consume(stream_view | detail::take_until(is_end)); + detail::consume(stream_view | detail::take_until_or_throw(is_end)); // consume until "//" } - //Jump over // and cntrl - ++stream_it; - ++stream_it; - ++stream_it; + + std::ranges::advance(stream_it, 3u, std::ranges::end(stream_view)); // Skip `//` and potentially '\n' } //!\copydoc sequence_file_output_format::write_sequence_record diff --git a/include/seqan3/io/sequence_file/format_genbank.hpp b/include/seqan3/io/sequence_file/format_genbank.hpp index de25a225b7..6ad6465a27 100644 --- a/include/seqan3/io/sequence_file/format_genbank.hpp +++ b/include/seqan3/io/sequence_file/format_genbank.hpp @@ -164,8 +164,8 @@ class format_genbank { constexpr auto is_legal_alph = char_is_valid_for; std::ranges::copy( - stream_view | std::views::filter(!(is_space || is_digit)) - | detail::take_until_or_throw_and_consume(is_end) // consume "//" + stream_view | std::views::filter(!(is_space || is_digit)) // ignore whitespace and numbers + | detail::take_until_or_throw(is_end) // until // | std::views::transform( [is_legal_alph](char const c) // enforce legal alphabet { @@ -183,9 +183,10 @@ class format_genbank } else { - detail::consume(stream_view | detail::take_until_or_throw_and_consume(is_end)); // consume until "//" - ++stream_it; // consume "/n" + detail::consume(stream_view | detail::take_until_or_throw(is_end)); // consume until "//" } + + std::ranges::advance(stream_it, 3u, std::ranges::end(stream_view)); // Skip `//` and potentially '\n' } //!\copydoc sequence_file_output_format::write_sequence_record diff --git a/test/unit/io/sequence_file/sequence_file_format_genbank_test.cpp b/test/unit/io/sequence_file/sequence_file_format_genbank_test.cpp index 18af6e72b1..4eb1c318ee 100644 --- a/test/unit/io/sequence_file/sequence_file_format_genbank_test.cpp +++ b/test/unit/io/sequence_file/sequence_file_format_genbank_test.cpp @@ -50,8 +50,7 @@ DEFINITION Homo sapiens mRNA for prepro cortistatin like peptide, complete ACCESSION ID3 ORIGIN 1 ACGTTTA -// -)"}; +//)"}; std::string illegal_alphabet_character_input{ R"(LOCUS ID1