From c5631745f33546f96f83998890a39605caa53dd6 Mon Sep 17 00:00:00 2001 From: Andre Mueller Date: Thu, 29 Feb 2024 12:37:00 +0100 Subject: [PATCH] better handling of inconsistent assembly_summary file headers --- src/building.cpp | 10 ++++++++-- src/taxonomy_io.cpp | 6 ++++-- src/version.h | 4 ++-- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/building.cpp b/src/building.cpp index af0c388..0226fe2 100644 --- a/src/building.cpp +++ b/src/building.cpp @@ -367,10 +367,16 @@ void add_targets_to_database(database& db, } try { - const auto fileId = extract_accession_string( + const auto fileAccession = extract_accession_string( filename, sequence_id_type::acc_ver); - const taxon_id fileTaxId = find_taxon_id(sequ2taxid, fileId); + const taxon_id fileTaxId = find_taxon_id(sequ2taxid, fileAccession); + + if(infoLvl == info_level::verbose) { + std::lock_guard lock(outputMtx); + cout << " accession '" << fileAccession + << "' -> taxid " << fileTaxId << endl; + } sequence_reader reader{filename}; diff --git a/src/taxonomy_io.cpp b/src/taxonomy_io.cpp index 593ad59..1e3b927 100644 --- a/src/taxonomy_io.cpp +++ b/src/taxonomy_io.cpp @@ -237,9 +237,11 @@ void read_sequence_to_taxon_id_mapping(const string& mappingFile, string header; getline(is, header); std::istringstream hs(header); - //get rid of comment chars - hs >> header; while(hs >> header) { + // handle comment chars + if (header.size() == 1 && header[0] == '#') { + hs >> header; + } if(header == "taxid") { taxcol = col; } diff --git a/src/version.h b/src/version.h index b808180..69427d3 100644 --- a/src/version.h +++ b/src/version.h @@ -24,11 +24,11 @@ #define MC_VERSION_H_ -#define MC_VERSION 20230309 +#define MC_VERSION 20240229 #define MC_DB_VERSION 20200820 -#define MC_VERSION_STRING "2.3.1" +#define MC_VERSION_STRING "2.3.2" #endif