diff --git a/MANUAL b/MANUAL index 1cb6a7db..1659dfff 100644 --- a/MANUAL +++ b/MANUAL @@ -1423,6 +1423,12 @@ must also be specified. This is because the ID tag is required by the SAM Spec. Specify --rg multiple times to set multiple fields. See the SAM Spec for details about what fields are legal. + --sam-omit-prim-seq + +When printing primary alignments, Bowtie 2 by default will write out +the SEQ and QUAL strings. Specifying this option causes Bowtie 2 to +print an asterisk in those fields instead. + --omit-sec-seq When printing secondary alignments, Bowtie 2 by default will write out diff --git a/MANUAL.markdown b/MANUAL.markdown index 828d9e27..b0596418 100644 --- a/MANUAL.markdown +++ b/MANUAL.markdown @@ -1822,6 +1822,17 @@ must also be specified. This is because the `ID` tag is required by the [SAM Spec][SAM]. Specify `--rg` multiple times to set multiple fields. See the [SAM Spec][SAM] for details about what fields are legal. + + + + --sam-omit-prim-seq + + + +When printing primary alignments, Bowtie 2 by default will write out +the `SEQ` and `QUAL` strings. Specifying this option causes Bowtie 2 to +print an asterisk in those fields instead. + @@ -2860,6 +2871,7 @@ for more details and variations on this process. [`--np`]: #bowtie2-options-np [`--offrate`]: #bowtie2-options-o [`--omit-sec-seq`]: #bowtie2-options-omit-sec-seq +[`--sam-omit-prim-seq`]: #bowtie2-options-sam-omit-prim-seq [`--packed`]: #bowtie2-build-options-p [`--phred33`]: #bowtie2-options-phred33-quals [`--phred64`]: #bowtie2-options-phred64-quals diff --git a/NEWS b/NEWS index e4b7b786..f2b4bf7f 100644 --- a/NEWS +++ b/NEWS @@ -19,6 +19,12 @@ Please report any issues to the Bowtie 2 Github page or using the Sourceforge bu Version Release History ======================= +## dev + +### bowtie2 ### + * Added option --sam-omit-prim-seq, which causes Bowtie 2 to set SEQ and QUAL + fields to "*" for primary alignments. + ## Version 2.5.2 - Oct 13, 2023 ## ### bowtie2 ### diff --git a/aln_sink.cpp b/aln_sink.cpp index 4256bf86..62e74574 100644 --- a/aln_sink.cpp +++ b/aln_sink.cpp @@ -2054,7 +2054,8 @@ void AlnSinkSam::appendMate( o.append("0\t"); } // SEQ - if(!flags.isPrimary() && samc_.omitSecondarySeqQual()) { + if(( flags.isPrimary() && samc_.omitPrimarySeqQual() ) || + (!flags.isPrimary() && samc_.omitSecondarySeqQual())) { o.append('*'); } else { // Print the read @@ -2070,7 +2071,8 @@ void AlnSinkSam::appendMate( } o.append('\t'); // QUAL - if(!flags.isPrimary() && samc_.omitSecondarySeqQual()) { + if(( flags.isPrimary() && samc_.omitPrimarySeqQual() ) || + (!flags.isPrimary() && samc_.omitSecondarySeqQual())) { o.append('*'); } else { // Print the quals diff --git a/bt2_search.cpp b/bt2_search.cpp index 60fb797e..da3e9e99 100644 --- a/bt2_search.cpp +++ b/bt2_search.cpp @@ -131,6 +131,7 @@ static bool hadoopOut; // print Hadoop status and summary messages static bool fullRef; static bool samTruncQname; // whether to truncate QNAME to 255 chars static bool samAppendComment; // append FASTA/FASTQ comment to SAM record +static bool samOmitPrimSeqQual; // omit SEQ/QUAL for primary alignments? static bool samOmitSecSeqQual; // omit SEQ/QUAL for 2ndary alignments? static bool samNoUnal; // don't print records for unaligned reads static bool samNoHead; // don't print any header lines in SAM output @@ -344,6 +345,7 @@ static void resetOptions() { fullRef = false; // print entire reference name instead of just up to 1st space samTruncQname = true; // whether to truncate QNAME to 255 chars samAppendComment = false; // append FASTA/Q comment to SAM record + samOmitPrimSeqQual = false; // omit SEQ/QUAL for primary alignments? samOmitSecSeqQual = false; // omit SEQ/QUAL for 2ndary alignments? samNoUnal = false; // omit SAM records for unaligned reads samNoHead = false; // don't print any header lines in SAM output @@ -541,6 +543,10 @@ static struct option long_options[] = { {(char*)"sam-no-qname-trunc", no_argument, 0, ARG_SAM_NO_QNAME_TRUNC}, {(char*)"sam-omit-sec-seq", no_argument, 0, ARG_SAM_OMIT_SEC_SEQ}, {(char*)"omit-sec-seq", no_argument, 0, ARG_SAM_OMIT_SEC_SEQ}, + {(char*)"sam-omit-prim-seq", no_argument, 0, ARG_SAM_OMIT_PRIM_SEQ}, + {(char*)"omit-prim-seq", no_argument, 0, ARG_SAM_OMIT_PRIM_SEQ}, + {(char*)"sam-have-prim-seq", no_argument, 0, ARG_SAM_HAVE_PRIM_SEQ}, + {(char*)"have-prim-seq", no_argument, 0, ARG_SAM_HAVE_PRIM_SEQ}, {(char*)"sam-no-head", no_argument, 0, ARG_SAM_NOHEAD}, {(char*)"sam-nohead", no_argument, 0, ARG_SAM_NOHEAD}, {(char*)"sam-noHD", no_argument, 0, ARG_SAM_NOHEAD}, @@ -871,6 +877,8 @@ static void printUsage(ostream& out) { << " --rg add (\"lab:value\") to @RG line of SAM header." << endl << " Note: @RG line only printed when --rg-id is set." << endl << " --omit-sec-seq put '*' in SEQ and QUAL fields for secondary alignments." << endl + << " --sam-omit-prim-seq" << endl + << " put '*' in SEQ and QUAL fields for primary alignments." << endl << " --sam-no-qname-trunc" << endl << " Suppress standard behavior of truncating readname at first whitespace " << endl << " at the expense of generating non-standard SAM." << endl @@ -1292,6 +1300,8 @@ static void parseOption(int next_option, const char *arg) { case ARG_SAM_NO_QNAME_TRUNC: samTruncQname = false; break; case ARG_SAM_APPEND_COMMENT: samAppendComment = true; break; case ARG_SAM_OMIT_SEC_SEQ: samOmitSecSeqQual = true; break; + case ARG_SAM_OMIT_PRIM_SEQ: samOmitPrimSeqQual = true; break; + case ARG_SAM_HAVE_PRIM_SEQ: samOmitPrimSeqQual = false; break; case ARG_SAM_NO_UNAL: samNoUnal = true; break; case ARG_SAM_NOHEAD: samNoHead = true; break; case ARG_SAM_NOSQ: samNoSQ = true; break; @@ -4940,6 +4950,7 @@ static void driver( reflens, // reference sequence lengths samTruncQname, // whether to truncate QNAME to 255 chars samAppendComment, // append FASTA/FASTQ comment to SAM record + samOmitPrimSeqQual, // omit SEQ/QUAL for primary alignments? samOmitSecSeqQual, // omit SEQ/QUAL for 2ndary alignments? samNoUnal, // omit unaligned-read records? string("bowtie2"), // program id diff --git a/doc/manual.html b/doc/manual.html index 498ebb99..10ed4786 100644 --- a/doc/manual.html +++ b/doc/manual.html @@ -2089,6 +2089,16 @@

SAM options

+ +
--sam-omit-prim-seq
+ + +

When printing primary alignments, Bowtie 2 by default will write +out the SEQ and QUAL strings. Specifying this +option causes Bowtie 2 to print an asterisk in those fields instead.

+ + +
--omit-sec-seq
diff --git a/doc/website/manual.ssi b/doc/website/manual.ssi index 4bf1ed48..1857d6b6 100644 --- a/doc/website/manual.ssi +++ b/doc/website/manual.ssi @@ -2058,6 +2058,16 @@ about what fields are legal.

+ +
--sam-omit-prim-seq 
+ + +

When printing primary alignments, Bowtie 2 by default will write +out the SEQ and QUAL strings. Specifying this +option causes Bowtie 2 to print an asterisk in those fields instead.

+ + +
--omit-sec-seq
diff --git a/opts.h b/opts.h index 6a7edeae..567aac0c 100644 --- a/opts.h +++ b/opts.h @@ -162,6 +162,8 @@ enum { ARG_ALIGN_PAIRED_READS, // --align-paired-reads ARG_SRA_ACC, // --sra-acc ARG_SAM_APPEND_COMMENT, // --sam-append-comment + ARG_SAM_OMIT_PRIM_SEQ, // --sam-omit-prim-seq + ARG_SAM_HAVE_PRIM_SEQ, // --sam-have-prim-seq }; #endif diff --git a/sam.h b/sam.h index 7745909e..7148fd71 100644 --- a/sam.h +++ b/sam.h @@ -63,6 +63,7 @@ class SamConfig { const LenList& reflens, // reference sequence lengths bool truncQname, // truncate read name to 255? bool appendComment, // append FASTA/Q comment to sam record + bool omitprim, // omit primary SEQ/QUAL bool omitsec, // omit secondary SEQ/QUAL bool noUnal, // omit unaligned reads const std::string& pg_id, // id @@ -108,6 +109,7 @@ class SamConfig { bool print_zt) : truncQname_(truncQname), appendComment_(appendComment), + omitprim_(omitprim), omitsec_(omitsec), noUnal_(noUnal), pg_id_(pg_id), @@ -386,6 +388,17 @@ class SamConfig { } + /** + * Return true iff we should ignore the SAM spec's recommendations + * and instead: + * + * SEQ and QUAL of primary alignments will be set to ‘*’ to reduce the + * file size. + */ + bool omitPrimarySeqQual() const { + return omitprim_; + } + /** * Return true iff we should try to obey the SAM spec's recommendations * that: @@ -409,6 +422,7 @@ class SamConfig { bool truncQname_; // truncate QNAME to 255 chars? bool appendComment_;// Append FASTA/Q comment to SAM record + bool omitprim_; // omit primary bool omitsec_; // omit secondary bool noUnal_; // omit unaligned reads