-
Notifications
You must be signed in to change notification settings - Fork 421
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* diamond fix test parameter name comp-based-stat needs to be comp_based_stats also remove param name atribute where possible * add options missing from command line - salltitles and sallseqid were in the inputs but unused in the CLI - fix --unal (reports unaligned queries in fmt 6) - add --al and modify --un for reporting (un)aligned query sequences
- Loading branch information
1 parent
6ecdbaa
commit 828c844
Showing
6 changed files
with
84 additions
and
35 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
<tool id="bg_diamond" name="Diamond" version="@[email protected]" profile="19.01"> | ||
<tool id="bg_diamond" name="Diamond" version="@[email protected]" profile="19.01"> | ||
<description>alignment tool for short sequences against a protein database</description> | ||
<macros> | ||
<import>macros.xml</import> | ||
|
@@ -62,8 +62,23 @@ | |
--query-cover '$query_cover' | ||
--subject-cover '$subject_cover' | ||
--block-size '$sens_cond.block_size' | ||
#if str($unal) == '1': | ||
--unal 1 --un '$unalqueries' | ||
#if $output_unal | ||
#if "--un" in $output_unal | ||
--un '$unalqueries' | ||
#if $query.ext.startswith("fasta"): | ||
--unfmt fasta | ||
#else | ||
--unfmt fastq | ||
#end if | ||
#end if | ||
#if "--al" in $output_unal | ||
--al '$alqueries' | ||
#if $query.ext.startswith("fasta"): | ||
--alfmt fasta | ||
#else | ||
--alfmt fastq | ||
#end if | ||
#end if | ||
#end if | ||
#if $tax_cond.tax_select == 'file': | ||
--taxonlist `cat '$tax_cond.taxonlistfile' | grep -v "^#" | grep -v "^$" | tr "\n" "," | sed 's/,$//'` | ||
|
@@ -79,7 +94,7 @@ | |
<option value="blastx">Align DNA query sequences (blastx)</option> | ||
</param> | ||
<when value="blastx"> | ||
<param name="query_gencode" argument="--query-gencode" type="select" label="Genetic code used for translation of query in BLASTX mode" help=""> | ||
<param argument="--query-gencode" type="select" label="Genetic code used for translation of query in BLASTX mode" help=""> | ||
<option value="1">The Standard Code</option> | ||
<option value="2">The Vertebrate Mitochondrial Code</option> | ||
<option value="3">The Yeast Mitochondrial Code</option> | ||
|
@@ -100,7 +115,7 @@ | |
<option value="25">Candidate Division SR1 and Gracilibacteria Code</option> | ||
<option value="26">Pachysolen tannophilus Nuclear Code</option> | ||
</param> | ||
<param argument="--min-orf" name="min_orf" type="integer" value="1" label="ignore translated sequences without an open reading frame of at least this length" help="By default this feature is disabled for sequences of length below 30, set to 20 for sequences of length below 100, and set to 40 otherwise. Setting this option to 1 will disable this feature" /> | ||
<param argument="--min-orf" type="integer" value="1" label="ignore translated sequences without an open reading frame of at least this length" help="By default this feature is disabled for sequences of length below 30, set to 20 for sequences of length below 100, and set to 40 otherwise. Setting this option to 1 will disable this feature" /> | ||
|
||
<param name="query_strand" argument="--strand" type="select" label="query strands to search" help=""> | ||
<option value="both" selected="True">Both</option> | ||
|
@@ -113,21 +128,21 @@ | |
<option value="no" selected="true">no</option> | ||
</param> | ||
<when value="yes"> | ||
<param argument="--range-culling" name="range_culling" type="boolean" truevalue="--range-culling" falsevalue="" checked="false" label="restrict hit culling to overlapping query ranges" help="This feature is designed for long query DNA sequences that may span several genes. In these cases, the default of reporting the 25 best overall hits could cause hits to a lower scoring gene to be overshadowed. But just increasing the number of alignments reported will bloat the output size and reduce performance. Using this feature along with -k 25 (default), a hit will only be deleted if at least 50% of its query range is spanned by at least 25 higher or equal scoring hits. Using this feature along with --top 10, a hit will only be deleted if its score is more than 10% lower than that of a higher scoring hit over at least 50% of its query range. The percentage is configurable using --range-cover. Note that this feature is currently only available in frameshift alignment mode"/> | ||
<param argument="--range-culling" type="boolean" truevalue="--range-culling" falsevalue="" checked="false" label="restrict hit culling to overlapping query ranges" help="This feature is designed for long query DNA sequences that may span several genes. In these cases, the default of reporting the 25 best overall hits could cause hits to a lower scoring gene to be overshadowed. But just increasing the number of alignments reported will bloat the output size and reduce performance. Using this feature along with -k 25 (default), a hit will only be deleted if at least 50% of its query range is spanned by at least 25 higher or equal scoring hits. Using this feature along with --top 10, a hit will only be deleted if its score is more than 10% lower than that of a higher scoring hit over at least 50% of its query range. The percentage is configurable using --range-cover. Note that this feature is currently only available in frameshift alignment mode"/> | ||
<param argument="--frameshift" type="integer" value="0" label="frame shift penalty" help="Values around 15 are reasonable for this parameter. Enabling this feature will have the aligner tolerate missing bases in DNA sequences and is most recommended for long, error-prone sequences like MinION reads. In the pairwise output format, frameshifts will be indicated by \ and / for a shift by +1 and -1 nucleotide in the direction of translation respectively." /> | ||
</when> | ||
<when value="no"/> | ||
</conditional> | ||
|
||
<param name="comp_based_stats" argument="--comp-based-stats" type="select" label="Composition based statistics" help="Compositionally biased sequences often cause false positive matches, which are effectively filtered by this algorithm in a way similar to the composition based statistics used by BLAST"> | ||
<param argument="--comp-based-stats" type="select" label="Composition based statistics" help="Compositionally biased sequences often cause false positive matches, which are effectively filtered by this algorithm in a way similar to the composition based statistics used by BLAST"> | ||
<option value="0">Disable</option> | ||
<option value="1" selected="True">Default mode (Hauser, 2016)</option> | ||
</param> | ||
</when> | ||
<when value="blastp"> | ||
<param name="no_self_hits" argument="--no-self-hits" type="boolean" truevalue="--no-self-hits" falsevalue="" checked="true" label="suppress reporting of identical self hits?" help=""/> | ||
<param argument="--no-self-hits" type="boolean" truevalue="--no-self-hits" falsevalue="" checked="true" label="suppress reporting of identical self hits?" help=""/> | ||
|
||
<param name="comp_based_stats" argument="--comp-based-stats" type="select" label="Composition based statistics" help="Compositionally biased sequences often cause false positive matches, which are effectively filtered by this algorithm in a way similar to the composition based statistics used by BLAST"> | ||
<param argument="--comp-based-stats" type="select" label="Composition based statistics" help="Compositionally biased sequences often cause false positive matches, which are effectively filtered by this algorithm in a way similar to the composition based statistics used by BLAST"> | ||
<option value="0">Disable</option> | ||
<option value="1" selected="True">Default mode (Hauser, 2016)</option> | ||
<option value="2">Compositional matrix adjust conditioned on sequence properties, simplified (Yu, 2005)</option> | ||
|
@@ -234,18 +249,24 @@ | |
</conditional> | ||
<expand macro="hit_filter_macro" /> | ||
<param argument="--id" type="integer" value="0" label="Minimum identity percentage to report an alignment" help="" /> | ||
<param name="query_cover" argument="--query-cover" type="integer" value="0" label="Minimum query cover percentage to report an alignment" help="" /> | ||
<param name="subject_cover" argument="--subject-cover" type="integer" value="0" label="Minimum subject cover percentage to report an alignment" help="" /> | ||
<param argument="--unal" type="boolean" truevalue="1" falsevalue="0" checked="false" label="report unaligned queries" help=""/> | ||
<param argument="--query-cover" type="integer" value="0" label="Minimum query cover percentage to report an alignment" help="" /> | ||
<param argument="--subject-cover" type="integer" value="0" label="Minimum subject cover percentage to report an alignment" help="" /> | ||
<param name="output_unal" type="select" optional="true" multiple="true" label="Output aligned/unaligned queries to separate file" help=""> | ||
<option value="--un">Output unaligned queries (--un)</option> | ||
<option value="--al">Output alaligned queries (--al)</option> | ||
</param> | ||
</inputs> | ||
<outputs> | ||
<expand macro="output_macro" /> | ||
<data format="fasta" name="unalqueries" label="${tool.name} on ${on_string} (unaligned queries)"> | ||
<filter>unal == "1"</filter> | ||
<data format_source="query" name="unalqueries" label="${tool.name} on ${on_string}: unaligned queries"> | ||
<filter>output_unal and "--un" in output_unal</filter> | ||
</data> | ||
<data format_source="query" name="alqueries" label="${tool.name} on ${on_string}: aligned queries"> | ||
<filter>output_unal and "--un" in output_unal</filter> | ||
</data> | ||
</outputs> | ||
<tests> | ||
<test> | ||
<test expect_num_outputs="3"> | ||
<conditional name="method_cond"> | ||
<param name="method_select" value="blastp" /> | ||
</conditional> | ||
|
@@ -256,13 +277,15 @@ | |
</conditional> | ||
<conditional name="output"> | ||
<param name="outfmt" value="6"/> | ||
<param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,cigar,scovhsp,sskingdoms,skingdoms,sphylums"/> | ||
<!-- removed ,cigar from test: https://github.com/bbuchfink/diamond/issues/532 --> | ||
<param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,scovhsp,sskingdoms,skingdoms,sphylums"/> | ||
<param name="unal" value="true"/> | ||
</conditional> | ||
<conditional name="sens_cond"> | ||
<param name="sensitivity" value=""/> | ||
</conditional> | ||
<param name="matrix" value="BLOSUM62"/> | ||
<param name="comp-based-stat" value="1"/> | ||
<param name="comp_based_stats" value="1"/> | ||
<param name="masking" value="1"/> | ||
<conditional name="hit_filter"> | ||
<param name="hit_filter_select" value="max"/> | ||
|
@@ -277,9 +300,20 @@ | |
<conditional name="sens_cond"> | ||
<param name="block_size" value="2"/> | ||
</conditional> | ||
<param name="output_unal" value="--al,--un"/> | ||
<output name="unalqueries"> | ||
<assert_contents> | ||
<has_line line=">shuffled sequence that should go to unaligned"/> | ||
</assert_contents> | ||
</output> | ||
<output name="alqueries"> | ||
<assert_contents> | ||
<has_line line=">sequence more text"/> | ||
</assert_contents> | ||
</output> | ||
<output name="blast_tabular" file="diamond_results.tabular"/> | ||
</test> | ||
<test> | ||
<test expect_num_outputs="1"> | ||
<conditional name="method_cond"> | ||
<param name="method_select" value="blastp" /> | ||
</conditional> | ||
|
@@ -300,7 +334,7 @@ | |
<param name="sensitivity" value=""/> | ||
</conditional> | ||
<param name="matrix" value="BLOSUM62"/> | ||
<param name="comp-based-stat" value="1"/> | ||
<param name="comp_based_stats" value="1"/> | ||
<param name="masking" value="1"/> | ||
<conditional name="hit_filter"> | ||
<param name="hit_filter_select" value="max"/> | ||
|
@@ -317,7 +351,7 @@ | |
</conditional> | ||
<output name="blast_tabular" file="diamond_results.wtax.tabular"/> | ||
</test> | ||
<test> | ||
<test expect_num_outputs="1"> | ||
<conditional name="method_cond"> | ||
<param name="method_select" value="blastx" /> | ||
<conditional name="frameshift_cond"> | ||
|
@@ -336,7 +370,7 @@ | |
<param name="sensitivity" value=""/> | ||
</conditional> | ||
<param name="matrix" value="BLOSUM62"/> | ||
<param name="comp-based-stat" value="1"/> | ||
<param name="comp_based_stats" value="1"/> | ||
<param name="masking" value="1"/> | ||
<conditional name="hit_filter"> | ||
<param name="hit_filter_select" value="top"/> | ||
|
@@ -353,7 +387,7 @@ | |
</conditional> | ||
<output name="blast_tabular" file="diamond_results.pairwise"/> | ||
</test> | ||
<test> | ||
<test expect_num_outputs="1"> | ||
<conditional name="method_cond"> | ||
<param name="method_select" value="blastp" /> | ||
</conditional> | ||
|
@@ -364,10 +398,12 @@ | |
</conditional> | ||
<conditional name="output"> | ||
<param name="outfmt" value="100"/> | ||
<param name="salltitles" value="false"/> | ||
<param name="sallseqid" value="false"/> | ||
</conditional> | ||
<output name="daa_output" file="diamond_results.daa" compare="sim_size" delta="10"/> | ||
</test> | ||
<test> | ||
<test expect_num_outputs="1"> | ||
<conditional name="method_cond"> | ||
<param name="method_select" value="blastx" /> | ||
<conditional name="frameshift_cond"> | ||
|
@@ -386,7 +422,7 @@ | |
<param name="sensitivity" value=""/> | ||
</conditional> | ||
<param name="matrix" value="BLOSUM62"/> | ||
<param name="comp-based-stat" value="1"/> | ||
<param name="comp_based_stats" value="1"/> | ||
<param name="masking" value="1"/> | ||
<conditional name="hit_filter"> | ||
<param name="hit_filter_select" value="top"/> | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
<tool id="bg_diamond_view" name="Diamond view" version="@VERSION@" profile="19.01"> | ||
<tool id="bg_diamond_view" name="Diamond view" version="@[email protected]" profile="19.01"> | ||
<description>generate formatted output from DAA files</description> | ||
<macros> | ||
<import>macros.xml</import> | ||
|
@@ -29,7 +29,7 @@ | |
<expand macro="output_macro" /> | ||
</outputs> | ||
<tests> | ||
<test> | ||
<test expect_num_outputs="1"> | ||
<param name="daa" ftype="daa" value="diamond_results.daa" /> | ||
<conditional name="output"> | ||
<param name="outfmt" value="5"/> | ||
|
@@ -40,15 +40,15 @@ | |
</conditional> | ||
<output name="blast_tabular" file="diamond_results.xml"/> | ||
</test> | ||
<test> | ||
<test expect_num_outputs="1"> | ||
<param name="daa" ftype="daa" value="diamond_results.daa" /> | ||
<conditional name="output"> | ||
<param name="outfmt" value="6"/> | ||
<param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,cigar,scovhsp"/> | ||
</conditional> | ||
<output name="blast_tabular" file="diamond_view_results.tabular"/> | ||
</test> | ||
<test> | ||
<test expect_num_outputs="1"> | ||
<param name="daa" ftype="daa" value="diamond_results.daa" /> | ||
<conditional name="output"> | ||
<param name="outfmt" value="101"/> | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
sequence gi|5524211|gb|AAD44166.1| 99.6 284 0 1 1 283 1 284 1.44e-205 550 94M1D189M 100 0 0 0 | ||
sequence gi|5524212|gb|AAD44167.1| 79.6 284 57 1 1 283 1 284 5.77e-150 409 105M1D178M 100 0 0 0 | ||
sequence gi|5524211|gb|AAD44166.1| 99.6 284 0 1 1 283 1 284 1.44e-205 550 100 0 0 0 | ||
sequence gi|5524212|gb|AAD44167.1| 79.6 284 57 1 1 283 1 284 5.77e-150 409 100 0 0 0 | ||
shuffled * -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 * * * |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters