Skip to content

Commit

Permalink
Merge pull request #609 from gallardoalba/Update_clustalw
Browse files Browse the repository at this point in the history
ClustalW: include additional options
  • Loading branch information
nsoranzo authored Sep 15, 2022
2 parents 471ede0 + 640e610 commit fdbbc72
Show file tree
Hide file tree
Showing 8 changed files with 1,172 additions and 17 deletions.
112 changes: 112 additions & 0 deletions tools/clustalw/macros.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
<macros>
<token name="@TOOL_VERSION@">2.1</token>
<token name="@VERSION_SUFFIX@">1</token>
<xml name="requirements">
<requirements>
<requirement type="package" version="@TOOL_VERSION@">clustalw</requirement>
</requirements>
</xml>
<xml name="macro_options" token_algorithm="" token_multiple="" token_label="" token_default="">
<conditional name="algorithm_conditional">
<param name="selector" type="select" label="Algorithm for the alignment guide tree" help="Default: slow">
<option value="">Slow: dynamic programming (accurate)</option>
<option value="-QUICKTREE">Fast: method of Wilbur and Lipman (approximate)</option>
</param>
<when value="">
<section name="slow_pairwise_alignments" title="Slow pairwise alignments options">
<param argument="-@ALGORITHM@" type="select" label="@LABEL@ weigth matrix" help="Default: @DEFAULT@">
<yield/>
</param>
<param argument="-PWGAPOPEN" type="integer" min="0" optional="true" label="Gap opening penalty" help="Default: 10" />
<param argument="-PWGAPEXT" type="float" min="0" optional="true" label="Gap extension penalty" help="Default: 0.1" />
</section>
</when>
<when value="-QUICKTREE">
<section name="fast_pairwise_alignments" title="Fast pairwise alignments options">
<param argument="-KTUPLE" type="integer" min="0" optional="true" label="Word size"
help="Size of exactly matching fragment that is used. Increase for speed, decrease for sensitivity. Maximum value for proteins is 2, and for DNA 4.Default: 1" />
<param argument="-TOPDIAGS" type="integer" min="0" optional="true" label="Number of best diagonals"
help="Number of k-tuple matches on each diagonal. Decrease for speed; increase for sensitivity. Default: 5" />
<param argument="-WINDOW" type="integer" min="0" optional="true" label="Window length"
help="This is the number of diagonals around each of the best diagonals that will be used. Decrease for speed; increase for sensitivity. Default: 5" />
<param argument="-PAIRGAP" type="integer" min="0" optional="true" label="Gap penalty"
help="Fast pairwise alignment gap penalty for each gap created. Default: 3" />
<param argument="-SCORE" type="select" label="Score type" help="Score type to output. Default: percent">
<option value="PERCENT">Percent</option>
<option value="ABSOLUTE">Absolute</option>
</param>
</section>
</when>
</conditional>
<section name="multiple_alignments" title="Multiple alignments options">
<param argument="-@MULTIPLE@" type="select" label="@LABEL@ weigth matrix" help="Default: @DEFAULT@">
<yield/>
</param>
<param argument="-GAPOPEN" type="integer" min="0" optional="true" label="Gap opening penalty" help="Default: 10" />
<param argument="-GAPEXT" type="float" min="0" optional="true" label="Gap extension penalty" help="Default: 0.2" />
<param argument="-ENDGAPS" type="boolean" truevalue="-ENDGAPS" falsevalue="" checked="false" label="End gap separation penalty"
help="End gap separation treats end gaps just like internal gaps for the purposes of avoiding gaps that are too close. If you turn this off, end gaps will be ignored for this purpose.
This is useful when you wish to align fragments where the end gaps are not biologically meaningful" />
<param argument="-GAPDIST" type="integer" min="0" optional="true" label="Gap separation penalty range"
help="Gap separation distance tries to decrease the chances of gaps being too close to each other. Gaps that are less than this distance apart are penalised more than other gaps.
This does not prevent close gaps; it makes them less frequent, promoting a block-like appearance of the alignment" />
<param argument="-NOPGAP" type="boolean" truevalue="-NOPGAP" falsevalue="" checked="false" label="Residue specific penalties"
help="Residue specific penalties are amino acid specific gap penalties that reduce or increase the gap opening penalties at each position in the alignment or sequence. As an example,
positions that are rich in glycine are more likely to have an adjacent gap than positions that are rich in valine" />
<param argument="-NOHGAP" type="boolean" truevalue="-NOHGAP" falsevalue="" checked="false" label="Hydrophilic gaps" help="Hydrophilic gap penalties are used to increase the chances of a gap
within a run (5 or more residues) of hydrophilic amino acids; these are likely to be loop or random coil regions where gaps are more common" />
<param argument="-MAXDIV" type="integer" min="0" max="100" optional="true" label="Delay divergent sequences" help="Percentage identity for delay" />
<param argument="-NEGATIVE" type="boolean" truevalue="-NEGATIVE" falsevalue="" checked="false" label="Negative values in matrix" help="Delays the alignment of the most distantly related sequences
until after the most closely related sequences have been aligned. The setting shows the percent identity level required to delay the addition of a sequence; sequences that are less identical
than this level to any other sequences will be aligned later" />
<param argument="-TRANSWEIGHT" type="float" min="0" max="1" optional="true" label="Transition weight" help="The transition weight option for aligning nucleotide sequences has been changed in version
1.7 from an on/off toggle to a weight between 0 and 1. A weight of zero means that the transitions are scored as mismatches; a weight of 1 gives transitions the full match score. For distantly
related DNA sequences, the weight should be near to zero; for closely related sequences it can be useful to assign a higher score" />
</section>
</xml>
<token name="@SLOW_PAIRWISE_ALIGNMENTS@"><![CDATA[
#if str($type_conditional.algorithm_conditional.slow_pairwise_alignments.PWGAPOPEN)
-PWGAPOPEN=$type_conditional.algorithm_conditional.slow_pairwise_alignments.PWGAPOPEN
#end if
#if str($type_conditional.algorithm_conditional.slow_pairwise_alignments.PWGAPEXT)
-PWGAPEXT=$type_conditional.algorithm_conditional.slow_pairwise_alignments.PWGAPEXT
#end ifx
]]></token>
<token name="@FAST_PAIRWISE_ALIGNMENTS@"><![CDATA[
#if str($type_conditional.algorithm_conditional.fast_pairwise_alignments.KTUPLE)
-KTUPLE=$type_conditional.algorithm_conditional.fast_pairwise_alignments.KTUPLE
#end if
#if str($type_conditional.algorithm_conditional.fast_pairwise_alignments.TOPDIAGS)
-TOPDIAGS=$type_conditional.algorithm_conditional.fast_pairwise_alignments.TOPDIAGS
#end if
#if str($type_conditional.algorithm_conditional.fast_pairwise_alignments.WINDOW)
-WINDOW=$type_conditional.algorithm_conditional.fast_pairwise_alignments.WINDOW
#end if
#if str($type_conditional.algorithm_conditional.fast_pairwise_alignments.PAIRGAP)
-PAIRGAP=$type_conditional.algorithm_conditional.fast_pairwise_alignments.PAIRGAP
#end if
-SCORE=$type_conditional.algorithm_conditional.fast_pairwise_alignments.SCORE
]]></token>
<token name="@MULTIPLE_ALIGNMENTS@"><![CDATA[
#if str($type_conditional.multiple_alignments.GAPOPEN)
-GAPOPEN=$type_conditional.multiple_alignments.GAPOPEN
#end if
#if str($type_conditional.multiple_alignments.GAPEXT)
-GAPEXT=$type_conditional.multiple_alignments.GAPEXT
#end if
$type_conditional.multiple_alignments.ENDGAPS
#if str($type_conditional.multiple_alignments.GAPDIST)
-GAPDIST=$type_conditional.multiple_alignments.GAPDIST
#end if
$type_conditional.multiple_alignments.NOPGAP
$type_conditional.multiple_alignments.NOHGAP
#if str($type_conditional.multiple_alignments.MAXDIV)
-MAXDIV=$type_conditional.multiple_alignments.MAXDIV
#end if
$type_conditional.multiple_alignments.NEGATIVE
#if str($type_conditional.multiple_alignments.TRANSWEIGHT)
-TRANSWEIGHT=$type_conditional.multiple_alignments.TRANSWEIGHT
#end if
]]></token>

</macros>
4 changes: 0 additions & 4 deletions tools/clustalw/repository_dependencies.xml

This file was deleted.

152 changes: 139 additions & 13 deletions tools/clustalw/rgClustalw.xml
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
<tool id="clustalw" name="ClustalW" version="2.1">
<tool id="clustalw" name="ClustalW" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01">
<description>multiple sequence alignment program for DNA or proteins</description>
<requirements>
<requirement type="package" version="2.1">clustalw</requirement>
</requirements>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="requirements"/>
<command detect_errors="exit_code"><![CDATA[
ln -s '$input' input.fasta &&
clustalw2 -INFILE=input.fasta -OUTFILE='$output' -OUTORDER=$out_order -TYPE=$dnarna
clustalw2 -INFILE=input.fasta -OUTFILE='$output' -OUTORDER=$out_order -TYPE=$type_conditional.dnarna
#if $outcontrol.outform == "clustal"
-OUTPUT=CLUSTAL
#if $outcontrol.out_seqnos == "ON"
Expand All @@ -21,13 +22,62 @@ clustalw2 -INFILE=input.fasta -OUTFILE='$output' -OUTORDER=$out_order -TYPE=$dna
#if $range.mode == "part"
-RANGE=${range.seq_range_start},${range.seq_range_end}
#end if
#if $type_conditional.dnarna == 'PROTEIN'
#if $type_conditional.algorithm_conditional.selector == ''
-PWMATRIX=$type_conditional.algorithm_conditional.slow_pairwise_alignments.PWMATRIX
@SLOW_PAIRWISE_ALIGNMENTS@
#else
@FAST_PAIRWISE_ALIGNMENTS@
#end if
-MATRIX=$type_conditional.multiple_alignments.MATRIX
@MULTIPLE_ALIGNMENTS@
#else
#if $type_conditional.algorithm_conditional.selector == ''
-PWDNAMATRIX=$type_conditional.algorithm_conditional.slow_pairwise_alignments.PWDNAMATRIX
@SLOW_PAIRWISE_ALIGNMENTS@
#else
@FAST_PAIRWISE_ALIGNMENTS@
#end if
-DNAMATRIX=$type_conditional.multiple_alignments.DNAMATRIX
@MULTIPLE_ALIGNMENTS@
#end if
-OUTPUTTREE=$tree_calculation.OUTPUTTREE
$tree_calculation.KIMURA
$tree_calculation.TOSSGAPS
]]></command>
<inputs>
<param name="input" type="data" format="fasta" label="FASTA file" />
<param name="dnarna" type="select" label="Data type">
<option value="DNA" selected="True">DNA nucleotide sequences</option>
<option value="PROTEIN">Protein sequences</option>
</param>
<conditional name="type_conditional">
<param name="dnarna" type="select" label="Data type">
<option value="DNA" selected="True">DNA nucleotide sequences</option>
<option value="PROTEIN">Protein sequences</option>
</param>
<when value="DNA">
<expand macro="macro_options" algorithm="PWDNAMATRIX" multiple="DNAMATRIX" label="DNA" default="IUB">
<option value="IUB" selected="true">IUB</option>
<option value="CLUSTALW">CLUSTALW</option>
</expand>
</when>
<when value="PROTEIN">
<expand macro="macro_options" algorithm="PWMATRIX" multiple="MATRIX" label="Protein" default="GONNET">
<option value="BLOSUM">BLOSUM</option>
<option value="PAM">PAM</option>
<option value="GONNET" selected="true">GONNET</option>
<option value="ID">ID</option>
</expand>
</when>
</conditional>
<section name="tree_calculation" title="Tree calculation/BOOTSTRAP options">
<param argument="-OUTPUTTREE" type="select" label="Output tree/distance forma" help="Specify the output format. Default: phylip">
<option value="PHYLIP">PHYLIP</option>
<option value="DIST">DIST</option>
<option value="NJ">NJ</option>
<option value="NEXUS">NEXUS</option>
</param>
<param argument="-KIMURA" type="boolean" truevalue="-KIMURA" falsevalue="" checked="false" label="Use Kimura correction"
help="For small divergence (say inferior 10%) this option makes no difference. For greater divergence, this option corrects for the fact that observed distances underestimate actual evolutionary distances." />
<param argument="-TOSSGAPS" type="boolean" truevalue="-TOSSGAPS" falsevalue="" checked="false" label="Ignore positions with gaps" help="Default: No" />
</section>
<conditional name="outcontrol">
<param name="outform" type="select" label="Output alignment format">
<option value="clustal" selected="True">Native Clustal output format</option>
Expand Down Expand Up @@ -69,18 +119,92 @@ clustalw2 -INFILE=input.fasta -OUTFILE='$output' -OUTORDER=$out_order -TYPE=$dna
<test>
<param name="input" value="rgClustal_testin.fasta" />
<param name="outform" value="fasta" />
<param name="dnarna" value="DNA" />
<conditional name="type_conditional">
<param name="dnarna" value="DNA"/>
</conditional>
<param name="mode" value="complete" />
<param name="out_order" value="ALIGNED" />
<output name="output" file="rgClustal_testout.fasta" ftype="fasta" />
<output name="dnd" file="rgClustal_testin.dnd" ftype="nhx" />
</test>
<!-- Test DNA options-->
<test expect_num_outputs="2">
<param name="input" value="rgClustal_testin.fasta"/>
<param name="out_order" value="ALIGNED"/>
<section name="tree_calculation">
<param name="OUTPUTTREE" value="PHYLIP"/>
<param name="KIMURA" value="true"/>
<param name="TOSSGAPS" value="true"/>
</section>
<conditional name="type_conditional">
<param name="dnarna" value="DNA"/>
<section name="multiple_alignments">
<param name="DNAMATRIX" value="IUB"/>
<param name="GAPOPEN" value="2"/>
<param name="GAPEXT" value="1"/>
<param name="ENDGAPS" value="true"/>
<param name="GAPDIST" value="1"/>
<param name="NOPGAP" value="true"/>
<param name="NOHGAP" value="true"/>
<param name="MAXDIV" value="1"/>
<param name="NEGATIVE" value="true"/>
<param name="TRANSWEIGHT" value="1"/>
</section>
<conditional name="algorithm_conditional">
<param name="selector" value=""/>
<section name="slow_pairwise_alignments">
<param name="PWGAPOPEN" value="2"/>
<param name="PWGAPEXT" value="1"/>
</section>
</conditional>
</conditional>
<output name="output" file="rgClustal_testout02.aln" ftype="clustal"/>
<output name="dnd" file="rgClustal_testout02.dnd" ftype="nhx"/>
</test>
<!-- Test protein options and fast algorithm-->
<test expect_num_outputs="2">
<param name="input" value="rgClustal_testin_prot.fasta"/>
<param name="out_order" value="ALIGNED"/>
<section name="tree_calculation">
<param name="OUTPUTTREE" value="PHYLIP"/>
<param name="KIMURA" value="false"/>
<param name="TOSSGAPS" value="false"/>
</section>
<conditional name="type_conditional">
<param name="dnarna" value="PROTEIN"/>
<section name="multiple_alignments">
<param name="MATRIX" value="BLOSUM"/>
<param name="GAPOPEN" value="3"/>
<param name="GAPEXT" value="1"/>
<param name="ENDGAPS" value="true"/>
<param name="GAPDIST" value="2"/>
<param name="NOPGAP" value="true"/>
<param name="NOHGAP" value="true"/>
<param name="MAXDIV" value="1"/>
<param name="NEGATIVE" value="true"/>
<param name="TRANSWEIGHT" value="1"/>
</section>
<conditional name="algorithm_conditional">
<param name="selector" value="-QUICKTREE"/>
<section name="fast_pairwise_alignments">
<param name="KTUPLE" value="1"/>
<param name="TOPDIAGS" value="1"/>
<param name="WINDOW" value="2"/>
<param name="PAIRGAP" value="2"/>
<param name="SCORE" value="PERCENT"/>
</section>
</conditional>
</conditional>
<output name="output" file="rgClustal_testout03.aln" ftype="clustal"/>
<output name="dnd" file="rgClustal_testout03.dnd" ftype="nhx"/>
</test>
</tests>
<help><![CDATA[
**Note**
.. class:: infomark
This tool allows you to run a multiple sequence alignment with ClustalW_ using the default options.
**Note**
This tool allows you to run a multiple sequence alignment with ClustalW_.
You can align DNA or protein sequences in the input file which should be multiple sequences to be aligned in a FASTA file.
The alignments will appear as a clustal format file or optionally, as PHYLIP or FASTA format files in your history. If you choose FASTA as
Expand All @@ -92,11 +216,13 @@ A subsequence of the alignment can be output by setting the Output complete para
----
.. class:: infomark
**Attribution**
The first iteration of this Galaxy wrapper was written by Hans-Rudolf Hotz.
It was modified by Ross Lazarus for the rgenetics project - tests and some additional parameters were added.
It was modified by Ross Lazarus for the rgenetics project - tests and some additional parameters were added. Cristóbal Gallardo included the remaining parameters.
This wrapper is released licensed under the LGPL_.
Expand Down
Loading

0 comments on commit fdbbc72

Please sign in to comment.