From 8069caa954485a8324e36a068021b5b3ee1c8398 Mon Sep 17 00:00:00 2001
From: Matthias Bernt <m.bernt@ufz.de>
Date: Wed, 27 Jul 2022 16:55:52 +0200
Subject: [PATCH] eggnog mapper: implement search and annotation phase

as separate tools

for rationale see README
---
 tools/eggnog_mapper/eggnog_macros.xml         | 468 +++++++++++++++++-
 tools/eggnog_mapper/eggnog_mapper/README      |  21 +
 .../eggnog_mapper/eggnog_mapper.xml           | 358 +-------------
 .../eggnog_mapper/eggnog_mapper_annotate.xml  | 155 ++++++
 .../eggnog_mapper/eggnog_mapper_search.xml    | 111 +++++
 5 files changed, 763 insertions(+), 350 deletions(-)
 create mode 100644 tools/eggnog_mapper/eggnog_mapper/README
 create mode 100644 tools/eggnog_mapper/eggnog_mapper/eggnog_mapper_annotate.xml
 create mode 100644 tools/eggnog_mapper/eggnog_mapper/eggnog_mapper_search.xml
diff --git a/tools/eggnog_mapper/eggnog_macros.xml b/tools/eggnog_mapper/eggnog_macros.xml
index ac49d0b28..8a9142865 100644
--- a/tools/eggnog_mapper/eggnog_macros.xml
+++ b/tools/eggnog_mapper/eggnog_macros.xml
@@ -3,6 +3,7 @@
    <token name="@TOOL_VERSION@">2.1.8</token>
    <token name="@VERSION_SUFFIX@">3</token>
    <token name="@EGGNOG_DB_VERSION@">5.0.2</token>
+   <token name="@PROFILE@">22.01</token>
     <!--
     # DB versionning was super confusing for eggnog-mapper 2.0.x:
     # eggnog-mapper 1.* needed a db v4.5 (based on eggnog v4.5)
@@ -65,6 +66,27 @@ python '${__tool_directory__}/data_manager_eggnog.py' --config_file '$out_file'
         </test>
     </xml>
 
+    
+    <xml name="stdout_assertion">
+        <assert_stdout>
+            <has_line line="#  emapper-@TOOL_VERSION@"/>
+            <has_line line="FINISHED"/>
+            <yield/>
+        </assert_stdout>
+    </xml>
+    <xml name="db_macro">
+        <param name="eggnog_data" type="select" label="Version of eggNOG Database">
+            <options from_data_table="eggnog_mapper_db_versioned">
+                <filter type="static_value" column="3" value="@IDX_VERSION@" />
+            </options>
+        </param>
+    </xml>
+    <token name="@DB_TOKEN@"><![CDATA[
+        --data_dir '$eggnog_data.fields.path'
+    ]]></token>
+
+    <!-- macros and tokens for search -->
+    
     <xml name="fasta_input">
         <param argument="-i" name="input" type="data" format="fasta" label="Fasta sequences to annotate"/>
         <conditional name="input_trans">
@@ -105,15 +127,316 @@ python '${__tool_directory__}/data_manager_eggnog.py' --config_file '$out_file'
         <param argument="--evalue" type="float" optional="true" min="0" label="Minimum query coverage" help="Report only alignments below or equal the e-value" />
         <param argument="--score" type="float" value="0.001" optional="true" min="0" label="Minimum query coverage" help="Report only alignments above or equal the score" />
     </xml>
-    <token name="@SEED_ORTHOLOG_COLUMNS@">query_name,seed_eggNOG_ortholog,seed_ortholog_evalue,seed_ortholog_score,query_start,query_end,seed_start,seed_end,pident,query_cov,seed_cov</token>
+
+    <xml name="ortho_macro">
+        <conditional name="ortho_method">
+            <param argument="-m" type="select" label="Basis for annotation">
+                <yield name="search_options"/>
+                <yield name="reuse_options"/>
+            </param>
+            <yield name="search_whens"/>
+            <yield name="reuse_whens"/>
+        </conditional>
+    </xml>
+
+    <xml name="ortho_search_macro">
+        <expand macro="ortho_macro">
+            <token name="search_options">
+                <expand macro="search_options_macro"/>
+            </token>
+            <token name="search_whens">
+                <expand macro="search_whens_macro"/>
+            </token>
+        </expand>
+    </xml>
+
+    <xml name="ortho_annotate_macro">
+        <expand macro="ortho_macro">
+            <token name="reuse_options">
+                <expand macro="reuse_options_macro"/>
+            </token>
+            <token name="reuse_whens">
+                <expand macro="reuse_whens_macro"/>
+            </token>
+        </expand>
+    </xml>
+
+    <xml name="ortho_full_macro">
+        <expand macro="ortho_macro">
+            <token name="search_options">
+                <expand macro="search_options_macro"/>
+            </token>
+            <token name="reuse_options">
+                <expand macro="reuse_options_macro"/>
+            </token>
+            <token name="search_whens">
+                <expand macro="search_whens_macro"/>
+            </token>
+            <token name="reuse_whens">
+                <expand macro="reuse_whens_macro"/>
+            </token>
+        </expand>
+    </xml>
+
+    <xml name="search_options_macro">
+        <option value="diamond">Seed orthologs computed with Diamond (diamond)</option>
+        <option value="mmseqs">Seed orthologs computed with MMseqs2 (mmseqs)</option>
+    </xml>
+
+    <xml name="reuse_options_macro">
+        <option value="no_search">Use existing seed orthologs (no_search)</option>
+        <option value="cache">Use cached annotations (cache). See also --md5</option>
+    </xml>
+
+    <xml name="search_whens_macro">
+        <when value="diamond">
+            <expand macro="fasta_input"/>
+            <conditional name="matrix_gapcosts">
+                <param argument="--matrix" type="select" label="Scoring matrix and gap costs">
+                    <option value="BLOSUM90">BLOSUM90</option>
+                    <option value="BLOSUM80">BLOSUM80</option>
+                    <option value="BLOSUM62" selected="true">BLOSUM62</option>
+                    <option value="BLOSUM50">BLOSUM50</option>
+                    <option value="BLOSUM45">BLOSUM45</option>
+                    <option value="PAM250">PAM250</option>
+                    <option value="PAM70">PAM70</option>
+                    <option value="PAM30">PAM30</option>
+                </param>
+                <when value="BLOSUM90">
+                    <param name="gap_costs" type="select" label="Gap Costs">
+                        <option value="--gapopen 9 --gapextend 2">Existence: 9  Extension: 2</option>
+                        <option value="--gapopen 8 --gapextend 2">Existence: 8  Extension: 2</option>
+                        <option value="--gapopen 7 --gapextend 2">Existence: 7  Extension: 2</option>
+                        <option value="--gapopen 6 --gapextend 2">Existence: 6  Extension: 2</option>
+                        <option value="--gapopen 11 --gapextend 1">Existence: 11  Extension: 1</option>
+                        <option value="--gapopen 10 --gapextend 1" selected="true">Existence: 10  Extension: 1</option>
+                        <option value="--gapopen 9 --gapextend 1">Existence: 9  Extension: 1</option>
+                    </param>
+                </when>
+                <when value="BLOSUM80">
+                    <param name="gap_costs" type="select" label="Gap Costs">
+                        <option value="--gapopen 8 --gapextend 2">Existence: 8  Extension: 2</option>
+                        <option value="--gapopen 7 --gapextend 2">Existence: 7  Extension: 2</option>
+                        <option value="--gapopen 6 --gapextend 2">Existence: 6  Extension: 2</option>
+                        <option value="--gapopen 11 --gapextend 1">Existence: 11  Extension: 1</option>
+                        <option value="--gapopen 10 --gapextend 1" selected="true">Existence: 10  Extension: 1</option>
+                        <option value="--gapopen 9 --gapextend 1">Existence: 9  Extension: 1</option>
+                    </param>
+                </when>
+                <when value="BLOSUM62">
+                    <param name="gap_costs" type="select" label="Gap Costs">
+                        <option value="--gapopen 11 --gapextend 2">Existence: 11  Extension: 2</option>
+                        <option value="--gapopen 10 --gapextend 2">Existence: 10  Extension: 2</option>
+                        <option value="--gapopen 9 --gapextend 2">Existence: 9  Extension: 2</option>
+                        <option value="--gapopen 8 --gapextend 2">Existence: 8  Extension: 2</option>
+                        <option value="--gapopen 7 --gapextend 2">Existence: 7  Extension: 2</option>
+                        <option value="--gapopen 6 --gapextend 2">Existence: 6  Extension: 2</option>
+                        <option value="--gapopen 13 --gapextend 1">Existence: 13  Extension: 1</option>
+                        <option value="--gapopen 12 --gapextend 1">Existence: 12  Extension: 1</option>
+                        <option value="--gapopen 11 --gapextend 1" selected="true">Existence: 11  Extension: 1</option>
+                        <option value="--gapopen 10 --gapextend 1">Existence: 10  Extension: 1</option>
+                        <option value="--gapopen 9 --gapextend 1">Existence: 9  Extension: 1</option>
+                    </param>
+                </when>
+                <when value="BLOSUM50">
+                    <param name="gap_costs" type="select" label="Gap Costs">
+                        <option value="--gapopen 13 --gapextend 3">Existence: 13  Extension: 3</option>
+                        <option value="--gapopen 12 --gapextend 3">Existence: 12  Extension: 3</option>
+                        <option value="--gapopen 11 --gapextend 3">Existence: 11  Extension: 3</option>
+                        <option value="--gapopen 10 --gapextend 3">Existence: 10  Extension: 3</option>
+                        <option value="--gapopen 9 --gapextend 3">Existence: 9  Extension: 3</option>
+                        <option value="--gapopen 16 --gapextend 2">Existence: 16  Extension: 2</option>
+                        <option value="--gapopen 15 --gapextend 2">Existence: 15  Extension: 2</option>
+                        <option value="--gapopen 14 --gapextend 2">Existence: 14  Extension: 2</option>
+                        <option value="--gapopen 13 --gapextend 2" selected="true">Existence: 13  Extension: 2</option>
+                        <option value="--gapopen 12 --gapextend 2">Existence: 12  Extension: 2</option>
+                        <option value="--gapopen 19 --gapextend 1">Existence: 19  Extension: 1</option>
+                        <option value="--gapopen 18 --gapextend 1">Existence: 18  Extension: 1</option>
+                        <option value="--gapopen 17 --gapextend 1">Existence: 17  Extension: 1</option>
+                        <option value="--gapopen 16 --gapextend 1">Existence: 16  Extension: 1</option>
+                        <option value="--gapopen 15 --gapextend 1">Existence: 15  Extension: 1</option>
+                    </param>
+                </when>
+                <when value="BLOSUM45">
+                    <param name="gap_costs" type="select" label="Gap Costs">
+                        <option value="--gapopen 13 --gapextend 3">Existence: 13  Extension: 3</option>
+                        <option value="--gapopen 12 --gapextend 3">Existence: 12  Extension: 3</option>
+                        <option value="--gapopen 11 --gapextend 3">Existence: 11  Extension: 3</option>
+                        <option value="--gapopen 10 --gapextend 3">Existence: 10  Extension: 3</option>
+                        <option value="--gapopen 15 --gapextend 2" selected="true">Existence: 15  Extension: 2</option>
+                        <option value="--gapopen 14 --gapextend 2">Existence: 14  Extension: 2</option>
+                        <option value="--gapopen 13 --gapextend 2">Existence: 13  Extension: 2</option>
+                        <option value="--gapopen 12 --gapextend 2">Existence: 12  Extension: 2</option>
+                        <option value="--gapopen 19 --gapextend 1">Existence: 19  Extension: 1</option>
+                        <option value="--gapopen 18 --gapextend 1">Existence: 18  Extension: 1</option>
+                        <option value="--gapopen 17 --gapextend 1">Existence: 17  Extension: 1</option>
+                        <option value="--gapopen 16 --gapextend 1">Existence: 16  Extension: 1</option>
+                    </param>
+                </when>
+                <when value="PAM250">
+                    <param name="gap_costs" type="select" label="Gap Costs">
+                        <option value="--gapopen 15 --gapextend 3">Existence: 15  Extension: 3</option>
+                        <option value="--gapopen 14 --gapextend 3">Existence: 14  Extension: 3</option>
+                        <option value="--gapopen 13 --gapextend 3">Existence: 13  Extension: 3</option>
+                        <option value="--gapopen 12 --gapextend 3">Existence: 12  Extension: 3</option>
+                        <option value="--gapopen 17 --gapextend 2">Existence: 17  Extension: 2</option>
+                        <option value="--gapopen 16 --gapextend 2">Existence: 16  Extension: 2</option>
+                        <option value="--gapopen 15 --gapextend 2">Existence: 15  Extension: 2</option>
+                        <option value="--gapopen 14 --gapextend 2" selected="true">Existence: 14  Extension: 2</option>
+                        <option value="--gapopen 13 --gapextend 2">Existence: 13  Extension: 2</option>
+                        <option value="--gapopen 21 --gapextend 1">Existence: 21  Extension: 1</option>
+                        <option value="--gapopen 20 --gapextend 1">Existence: 20  Extension: 1</option>
+                        <option value="--gapopen 19 --gapextend 1">Existence: 19  Extension: 1</option>
+                        <option value="--gapopen 18 --gapextend 1">Existence: 18  Extension: 1</option>
+                        <option value="--gapopen 17 --gapextend 1">Existence: 17  Extension: 1</option>
+                    </param>
+                </when>
+                <when value="PAM70">
+                    <param name="gap_costs" type="select" label="Gap Costs">
+                        <option value="--gapopen 12 --gapextend 3">Existence: 12 Extension: 3</option>
+                        <option value="--gapopen 11 --gapextend 2">Existence: 11 Extension: 2</option>
+                        <option value="--gapopen 8 --gapextend 2">Existence: 8  Extension: 2</option>
+                        <option value="--gapopen 7 --gapextend 2">Existence: 7  Extension: 2</option>
+                        <option value="--gapopen 6 --gapextend 2">Existence: 6  Extension: 2</option>
+                        <option value="--gapopen 11 --gapextend 1">Existence: 11  Extension: 1</option>
+                        <option value="--gapopen 10 --gapextend 1" selected="true">Existence: 10  Extension: 1</option>
+                        <option value="--gapopen 9 --gapextend 1">Existence: 9  Extension: 1</option>
+                    </param>
+                </when>
+                <when value="PAM30">
+                    <param name="gap_costs" type="select" label="Gap Costs">
+                        <option value="--gapopen 15 --gapextend 3">Existence: 15 Extension: 3</option>
+                        <option value="--gapopen 13 --gapextend 3">Existence: 13 Extension: 3</option>
+                        <option value="--gapopen 14 --gapextend 2">Existence: 14 Extension: 2</option>
+                        <option value="--gapopen 7 --gapextend 2">Existence: 7  Extension: 2</option>
+                        <option value="--gapopen 6 --gapextend 2">Existence: 6  Extension: 2</option>
+                        <option value="--gapopen 5 --gapextend 2">Existence: 5  Extension: 2</option>
+                        <option value="--gapopen 14 --gapextend 1">Existence: 14 Extension: 1</option>
+                        <option value="--gapopen 10 --gapextend 1">Existence: 10  Extension: 1</option>
+                        <option value="--gapopen 9 --gapextend 1" selected="true">Existence: 9  Extension: 1</option>
+                        <option value="--gapopen 8 --gapextend 1">Existence: 8  Extension: 1</option>
+                    </param>
+                </when>
+            </conditional>
+            <param argument="--sensmode" type="select" label="Diamond's sensitivity mode">
+                <option value="default">default</option>
+                <option value="fast">fast</option>
+                <option value="mid-sensitive">mid-sensitive</option>
+                <option value="sensitive" selected="true">sensitive</option>
+                <option value="more-sensitive">more-sensitive</option>
+                <option value="very-sensitive">very-sensitive</option>
+                <option value="ultra-sensitive">ultra-sensitive</option>
+            </param>
+            <param argument="--dmnd_iterate" type="boolean" truevalue="--dmnd_iterate yes" falsevalue="--dmnd_iterate no" checked="false"
+                label="Run diamond in iterative mode, up to the sensitivity level"/>
+            <param argument="--dmnd_ignore_warnings" type="boolean" truevalue="--dmnd_ignore_warnings" falsevalue="" checked="false"
+                label="Ignore Diamond warnings on sequence content (e.g. when a protein contains only ATGC symbols)"/>
+            <expand macro="common_search_options"/>
+        </when>
+        <when value="mmseqs">
+            <expand macro="fasta_input"/>
+            <param argument="--start_sens" type="integer" value="3" min="0" max="100" label="Starting sensitivity" />
+            <param argument="--sens_steps" type="integer" value="3" min="0" max="100" label="Number of sensitivity steps" />
+            <param argument="--final_sens" type="integer" value="7" min="0" max="100" label="Final sensititivy step" />
+            <expand macro="common_search_options"/>
+        </when>
+    </xml>
+
+    <xml name="reuse_whens_macro">
+            <when value="no_search">
+                <param argument="--annotate_hits_table" type="data" multiple="true" format="tabular" label="Seed orthologs">
+                    <validator type="expression" message="No seed orthologs">value.metadata.columns == 11</validator>
+                    <!-- would be cool to replace with this validator: 
+                        <validator type="expression" message="No seed orthologs">value.metadata.column_names == '@SEED_ORTHOLOG_COLUMNS@'.split()</validator>
+                        but this does not work (yet) in tool tests since column_names can not be set in uploads -->
+                </param>
+            </when>
+            <when value="cache">
+                <expand macro="fasta_input"/>
+                <param argument="--cache"  type="data" format="tabular" label="EggNOG Annotations with md5 hashes" help="Annotations computed with EggNOG mapper with enabled --md5 option">
+                    <validator type="expression" message="No seed orthologs">value.metadata.columns == 22</validator>
+                </param>
+                <param name="output_no_annotations" type="boolean" checked="true" label="Output sequences without annotation" help="Produce an additional FASTA file with the sequences of queries for which an existing annotation was not found using cache mode. This file can be used as input of another eggNOG-mapper run without using the cache, trying to annotate the sequences."/>
+            </when>
+    </xml>
+
+    <token name="@MERGE_ANNOTATIONS@"><![CDATA[
+        #if $ortho_method.m == "no_search"
+            cat 
+            #for aht in $ortho_method.annotate_hits_table
+                $aht
+            #end for
+            > annotate_hits_table.tsv
+            &&
+        #end if
+    ]]></token>
+
+    <token name="@ORTHO_SEARCH_TOKEN@"><![CDATA[
+        -m '$ortho_method.m'
+        #if $ortho_method.m in ['diamond', 'mmseqs', 'cache']:
+            -i '$ortho_method.input'
+            --itype '$ortho_method.input_trans.itype'
+            #if $ortho_method.input_trans.itype in ['CDS', 'genome', 'metagenome']:
+                $ortho_method.input_trans.translate
+            #end if
+            #if $ortho_method.input_trans.itype in ['genome', 'metagenome']:
+                --genepred $ortho_method.input_trans.genepred
+            #end if
+        #elif $ortho_method.m == "no_search"
+            --annotate_hits_table annotate_hits_table.tsv
+        #end if
+        
+        #if $ortho_method.m == 'cache'
+            --cache '$ortho_method.cache'
+        #end if
+        #if $ortho_method.m == 'no_search'
+            --annotate_hits_table annotate_hits_table.tsv
+        #end if
+
+        #if $ortho_method.m in ['diamond', 'mmseqs']:
+            ## Diamond option
+            #if $ortho_method.m == "diamond":
+                --matrix '$ortho_method.matrix_gapcosts.matrix'
+                $ortho_method.matrix_gapcosts.gap_costs
+                --sensmode $ortho_method.sensmode
+                $ortho_method.dmnd_iterate
+                $ortho_method.dmnd_ignore_warnings
+            #elif $ortho_method.m == "mmseqs":
+                --start_sens $ortho_method.start_sens
+                --sens_steps $ortho_method.sens_steps
+                --final_sens $ortho_method.final_sens
+            #end if
+
+            ## Common options for search filtering (applies to diamond and mmseqs only)
+            #if str($ortho_method.query_cover):
+                --query_cover $ortho_method.query_cover
+            #end if
+            #if str($ortho_method.subject_cover):
+                --subject_cover $ortho_method.subject_cover
+            #end if
+            #if str($ortho_method.pident):
+                --pident $ortho_method.pident
+            #end if
+            #if str($ortho_method.evalue):
+                --evalue $ortho_method.evalue
+            #end if
+            #if str($ortho_method.score):
+                --score $ortho_method.score
+            #end if
+        #end if
+    ]]></token>
     
-    <xml name="stdout_assertion">
-        <assert_stdout>
-            <has_line line="#  emapper-@TOOL_VERSION@"/>
-            <has_line line="FINISHED"/>
-            <yield/>
-        </assert_stdout>
+    <token name="@SEED_ORTHOLOG_COLUMNS@">query_name,seed_eggNOG_ortholog,seed_ortholog_evalue,seed_ortholog_score,query_start,query_end,seed_start,seed_end,pident,query_cov,seed_cov</token>
+
+    <xml name="ortho_search_outputs_macro">
+        <data name="seed_orthologs" format="tabular" label="${tool.name} on ${on_string}: seed_orthologs" from_work_dir="results.emapper.seed_orthologs">
+            <filter>ortho_method['m'] not in ['no_search', 'cache']</filter>
+            <actions>
+                <action name="column_names" type="metadata" default="@SEED_ORTHOLOG_COLUMNS@"/>
+            </actions>
+        </data>
     </xml>
+
     <xml name="seed_orthologs_assertion">
         <output name="seed_orthologs" ftype="tabular">
             <assert_contents>
@@ -122,6 +445,83 @@ python '${__tool_directory__}/data_manager_eggnog.py' --config_file '$out_file'
             </assert_contents>
         </output>
     </xml>
+
+    <!-- macros and tokens for annotate -->
+
+    <xml name="annotation_options_macro">
+        <param argument="--seed_ortholog_evalue" type="float" value="0.001" min="0" label="Min E-value threshold">
+            <help>
+            Min E-value expected when searching for seed eggNOG ortholog. Applies to phmmer/diamond searches.
+            Queries not having a significant seed orthologs (E-value less than threshold) will not be annotated.
+            </help>
+        </param>
+        <param argument="--seed_ortholog_score" type="float" optional="true" min="0" label="Minimum bit score threshold">
+            <help>
+            Min bit score expected when searching for seed eggNOG ortholog.
+            Queries not having a significant seed orthologs will not be annotated.
+            </help>
+        </param>
+        <param argument="--tax_scope" type="integer" optional="true" label="Set taxonomic scope" help="NCBI taxonomy id" />
+        <param argument="--target_orthologs" type="select" label="target orthologs for functional transfer">
+            <option value="one2one">one2one</option>
+            <option value="many2one">many2one</option>
+            <option value="one2many">one2many</option>
+            <option value="many2many">many2many</option>
+            <option value="all" selected="true">all</option>
+        </param>
+        <param argument="--go_evidence" type="select"
+                label="Select the set of GO terms that should be used for annotation">
+            <option value="experimental">experimental = Use only terms inferred from experimental evidence</option>
+            <option value="non-electronic" selected="true">non-electronic = Use only non-electronically curated terms</option>
+            <option value="all" selected="true">All (experimental + non-electronic)</option>
+        </param>
+    </xml>
+    <token name="@ANNOTATION_TOKEN@"><![CDATA[
+    #if str($annotation_options.seed_ortholog_evalue):
+        --seed_ortholog_evalue $annotation_options.seed_ortholog_evalue
+    #end if
+    #if str($annotation_options.seed_ortholog_score):
+        --seed_ortholog_score $annotation_options.seed_ortholog_score
+    #end if
+    #if $annotation_options.tax_scope:
+        --tax_scope=$annotation_options.tax_scope
+    #end if
+    #if $annotation_options.target_orthologs:
+        --target_orthologs=$annotation_options.target_orthologs
+    #end if
+    #if $annotation_options.go_evidence:
+        --go_evidence=$annotation_options.go_evidence
+    #end if
+    ]]></token>
+    <xml name="annotation_output_macro">
+        <data name="annotations" format="tabular" label="${tool.name} on ${on_string}: annotations" from_work_dir="results.emapper.annotations">
+            <yield/>
+            <actions>
+                <conditional name="output_options.md5">
+                    <when value="True">
+                        <action name="column_names" type="metadata" default="query,seed_ortholog,evalue,score,max_annot_lvl,COG_category,Description,Preferred_name,GOs,EC,KEGG_ko,KEGG_Pathway,KEGG_Module,KEGG_Reaction,KEGG_rclass,BRITE,KEGG_TC,CAZy,BiGG_Reaction,PFAMseggNOG_OGs,md5"/>
+                    </when>
+                    <when value="False">
+                        <action name="column_names" type="metadata" default="query,seed_ortholog,evalue,score,max_annot_lvl,COG_category,Description,Preferred_name,GOs,EC,KEGG_ko,KEGG_Pathway,KEGG_Module,KEGG_Reaction,KEGG_rclass,BRITE,KEGG_TC,CAZy,BiGG_Reaction,PFAMseggNOG_OGs"/>
+                    </when>
+                </conditional>
+            </actions>
+        </data>
+        <data name="no_annotations" format="fasta" label="${tool.name} on ${on_string}: sequences without annotation"  from_work_dir="results.emapper.no_annotations.fasta">
+            <filter>ortho_method['m'] == 'cache' and ortho_method['output_no_annotations']</filter>
+        </data>
+    </xml>
+
+    <xml name="annotation_orthologs_output_macro">
+        <data name="annotations_orthologs" format="tabular" label="${tool.name} on ${on_string}: orthologs"  from_work_dir="results.emapper.orthologs">
+            <filter>ortho_method['m'] != 'cache'</filter>
+            <filter>output_options['report_orthologs']</filter>
+            <actions>
+                <action name="column_names" type="metadata" default="query,orth_type,species,orthologs"/>
+            </actions>
+        </data>
+    </xml>
+
     <xml name="annotations_assertion" token_columns="21" token_add_metadata_columm_names="" token_add_column_names="" token_add_column_re="">
         <output name="annotations" ftype="tabular">
             <metadata name="columns" value="@COLUMNS@" />
@@ -142,4 +542,58 @@ python '${__tool_directory__}/data_manager_eggnog.py' --config_file '$out_file'
             </assert_contents>
         </output>
     </xml>
+    <token name="@HELP_SEARCH_OUTPUTS@"><![CDATA[
+        **seed orthologs**
+        
+        each line in the file provides the best match of each query within the best Orthologous Group (OG)
+        reported in the [project].hmm_hits file, obtained running PHMMER against all sequences within the best OG.
+        The seed ortholog is used to fetch fine-grained orthology relationships from eggNOG.
+        If using the diamond search mode, seed orthologs are directly
+        obtained from the best matching sequences by running DIAMOND against the whole eggNOG protein space.
+    ]]></token>
+    <token name="@HELP_ANNOTATION_OUTPUTS@"><![CDATA[
+        **annotations**
+
+        This file provides final annotations of each query. Tab-delimited columns in the file are:
+        
+        - ``query_name``: query sequence name
+        - ``seed_eggNOG_ortholog``: best protein match in eggNOG
+        - ``seed_ortholog_evalue``: best protein match (e-value)
+        - ``seed_ortholog_score``: best protein match (bit-score)
+        - ``predicted_taxonomic_group``
+        - ``predicted_protein_name``: Predicted protein name for query sequences
+        - ``GO_terms``: Comma delimited list of predicted Gene Ontology terms
+        - ``EC_number``
+        - ``KEGG_KO``
+        - ``KEGG_Pathway``: Comma delimited list of predicted KEGG pathways
+        - ``KEGG_Module``
+        - ``KEGG_Reaction``
+        - ``KEGG_rclass``
+        - ``BRITE``
+        - ``KEGG_TC``
+        - ``CAZy``
+        - ``BiGG_Reactions``
+        - ``Annotation_tax_scope``: The taxonomic scope used to annotate this query sequence
+        - ``Matching_OGs``: Comma delimited list of matching eggNOG Orthologous Groups
+        - ``best_OG|evalue|score``: Best matching Orthologous Groups (deprecated, use smallest from eggnog OGs)
+        - ``COG_functional_categories``: COG functional category inferred from best matching OG
+        - ``eggNOG_free_text_description``
+        
+        **orthologs**
+
+        This output is only created if the option ``--report_orthologs`` is checked.
+        It provides the orthologs used for the annotation. It's a tab delimited file with the following columns:
+        
+        - ``query``
+        - ``orth_type`` Type of orthologs in this row. See --target_orthologs.
+        - ``species``
+        - ``orthologs`` comma-separated list of orthologs (If an ortholog shows a "*", such ortholog was used to transfer its annotations to the query.)
+        
+        **sequences without annotation **
+
+        This output is created if cached annotations are used as input. 
+        It is a FASTA file containing all sequences that are not found in the cached annotations.
+        These sequences can then be used as input for another run of the EggNOG mapper
+        computing seed orthologs with diamond, etc.
+    ]]></token>
 </macros>
diff --git a/tools/eggnog_mapper/eggnog_mapper/README b/tools/eggnog_mapper/eggnog_mapper/README
new file mode 100644
index 000000000..fdef9b01b
--- /dev/null
+++ b/tools/eggnog_mapper/eggnog_mapper/README
@@ -0,0 +1,21 @@
+This folder contains three tools: 
+
+1. eggnogg_mapper: which runs the search and annotation phase in a single tool
+2. eggnogg_mapper_search: which implements the search phase
+3. eggnogg_mapper_annotate: which implements the annotation phase
+
+While the search phase of eggnog_mapper is very CPU intense and is efficient
+also for larger number of threads, the anotation phase is very IO intensive
+and can be very inefficient (depending on the configuration, e.g. if the
+reference data is located on a slow partition).
+
+While for most applications eggnogg_mapper will be sufficient separating the
+two phases can be more efficient: 
+
+- sending eggnogg_mapper_search to a destination using many threads
+- and eggnogg_mapper_annotate to a destination using a small numbe of threads
+
+If eggnogg_mapper_annotate is send to a single core destination
+the option `--dbmem` is activated which will copy the complete
+EggNOG annotation DB into memory which is usually much faster
+than using multiple cores (but needs approx. 37GB of RAM).
\ No newline at end of file
diff --git a/tools/eggnog_mapper/eggnog_mapper/eggnog_mapper.xml b/tools/eggnog_mapper/eggnog_mapper/eggnog_mapper.xml
index 502422f39..0666d582e 100644
--- a/tools/eggnog_mapper/eggnog_mapper/eggnog_mapper.xml
+++ b/tools/eggnog_mapper/eggnog_mapper/eggnog_mapper.xml
@@ -1,4 +1,4 @@
-<tool id="eggnog_mapper" name="eggNOG Mapper" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
+<tool id="eggnog_mapper" name="eggNOG Mapper" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
     <description>functional sequence annotation by orthology</description>
     <macros>
         <import>eggnog_macros.xml</import>
@@ -6,86 +6,15 @@
     <expand macro="requirements"/>
     <expand macro="version_command"/>
     <command detect_errors="aggressive"><![CDATA[
-        #if $ortho_method.m == "no_search"
-            cat 
-            #for aht in $ortho_method.annotate_hits_table
-                $aht
-            #end for
-            > annotate_hits_table.tsv
-            &&
-        #end if
+        @MERGE_ANNOTATIONS@
 
         emapper.py
-        --data_dir '$eggnog_data.fields.path'
-        -m '$ortho_method.m'
-
-        #if $ortho_method.m in ['diamond', 'mmseqs', 'cache']:
-            -i '$ortho_method.input'
-            --itype '$ortho_method.input_trans.itype'
-            #if $ortho_method.input_trans.itype in ['CDS', 'genome', 'metagenome']:
-                $ortho_method.input_trans.translate
-            #end if
-            #if $ortho_method.input_trans.itype in ['genome', 'metagenome']:
-                --genepred $ortho_method.input_trans.genepred
-            #end if
-        #elif $ortho_method.m == "no_search"
-            --annotate_hits_table annotate_hits_table.tsv
-        #end if
-        
-        #if $ortho_method.m == 'cache'
-            --cache '$ortho_method.cache'
-        #end if
-
-        #if $ortho_method.m in ['diamond', 'mmseqs']:
-            ## Diamond option
-            #if $ortho_method.m == "diamond":
-                --matrix '$ortho_method.matrix_gapcosts.matrix'
-                $ortho_method.matrix_gapcosts.gap_costs
-                --sensmode $ortho_method.sensmode
-                $ortho_method.dmnd_iterate
-                $ortho_method.dmnd_ignore_warnings
-            #elif $ortho_method.m == "mmseqs":
-                --start_sens $ortho_method.start_sens
-                --sens_steps $ortho_method.sens_steps
-                --final_sens $ortho_method.final_sens
-            #end if
-
-            ## Common options for search filtering (applies to diamond and mmseqs only)
-            #if str($ortho_method.query_cover):
-                --query_cover $ortho_method.query_cover
-            #end if
-            #if str($ortho_method.subject_cover):
-                --subject_cover $ortho_method.subject_cover
-            #end if
-            #if str($ortho_method.pident):
-                --pident $ortho_method.pident
-            #end if
-            #if str($ortho_method.evalue):
-                --evalue $ortho_method.evalue
-            #end if
-            #if str($ortho_method.score):
-                --score $ortho_method.score
-            #end if
-        #end if
-
+        @DB_TOKEN@
+        @ORTHO_SEARCH_TOKEN@
         #if $annotation_options.no_annot == "--no_annot"
             --no_annot
         #else
-            #if str($annotation_options.seed_ortholog_evalue):
-                --seed_ortholog_evalue $annotation_options.seed_ortholog_evalue
-            #end if
-            #if str($annotation_options.seed_ortholog_score):
-                --seed_ortholog_score $annotation_options.seed_ortholog_score
-            #end if
-            #if $annotation_options.tax_scope:
-                --tax_scope=$annotation_options.tax_scope
-            #end if
-            #if $annotation_options.target_orthologs:
-                --target_orthologs=$annotation_options.target_orthologs
-            #end if
-            #if $annotation_options.go_evidence:
-                --go_evidence=$annotation_options.go_evidence
-            #end if
+            @ANNOTATION_TOKEN@
         #end if
         $output_options.no_file_comments
         $output_options.report_orthologs
@@ -96,219 +25,15 @@
         --temp_dir \${TEMP:-\$_GALAXY_JOB_TMP_DIR}
     ]]></command>
     <inputs>
-        <param name="eggnog_data" type="select" label="Version of eggNOG Database">
-            <options from_data_table="eggnog_mapper_db_versioned">
-                <filter type="static_value" column="3" value="@IDX_VERSION@" />
-            </options>
-        </param>
-
-        <conditional name="ortho_method">
-            <param argument="-m" type="select" label="Method to search seed orthologs">
-                <option value="diamond" selected="true">Diamond</option>
-                <option value="mmseqs">MMseqs2</option>
-                <option value="no_search">Skip search stage: annotate an existing seed orthologs data set</option>
-                <option value="cache">Skip search stage: annotate based on cached annotations (see also --md5)</option>
-            </param>
-            <when value="diamond">
-                <expand macro="fasta_input"/>
-                <conditional name="matrix_gapcosts">
-                    <param argument="--matrix" type="select" label="Scoring matrix and gap costs">
-                        <option value="BLOSUM90">BLOSUM90</option>
-                        <option value="BLOSUM80">BLOSUM80</option>
-                        <option value="BLOSUM62" selected="true">BLOSUM62</option>
-                        <option value="BLOSUM50">BLOSUM50</option>
-                        <option value="BLOSUM45">BLOSUM45</option>
-                        <option value="PAM250">PAM250</option>
-                        <option value="PAM70">PAM70</option>
-                        <option value="PAM30">PAM30</option>
-                    </param>
-                    <when value="BLOSUM90">
-                        <param name="gap_costs" type="select" label="Gap Costs">
-                            <option value="--gapopen 9 --gapextend 2">Existence: 9  Extension: 2</option>
-                            <option value="--gapopen 8 --gapextend 2">Existence: 8  Extension: 2</option>
-                            <option value="--gapopen 7 --gapextend 2">Existence: 7  Extension: 2</option>
-                            <option value="--gapopen 6 --gapextend 2">Existence: 6  Extension: 2</option>
-                            <option value="--gapopen 11 --gapextend 1">Existence: 11  Extension: 1</option>
-                            <option value="--gapopen 10 --gapextend 1" selected="true">Existence: 10  Extension: 1</option>
-                            <option value="--gapopen 9 --gapextend 1">Existence: 9  Extension: 1</option>
-                        </param>
-                    </when>
-                    <when value="BLOSUM80">
-                        <param name="gap_costs" type="select" label="Gap Costs">
-                            <option value="--gapopen 8 --gapextend 2">Existence: 8  Extension: 2</option>
-                            <option value="--gapopen 7 --gapextend 2">Existence: 7  Extension: 2</option>
-                            <option value="--gapopen 6 --gapextend 2">Existence: 6  Extension: 2</option>
-                            <option value="--gapopen 11 --gapextend 1">Existence: 11  Extension: 1</option>
-                            <option value="--gapopen 10 --gapextend 1" selected="true">Existence: 10  Extension: 1</option>
-                            <option value="--gapopen 9 --gapextend 1">Existence: 9  Extension: 1</option>
-                        </param>
-                    </when>
-                    <when value="BLOSUM62">
-                        <param name="gap_costs" type="select" label="Gap Costs">
-                            <option value="--gapopen 11 --gapextend 2">Existence: 11  Extension: 2</option>
-                            <option value="--gapopen 10 --gapextend 2">Existence: 10  Extension: 2</option>
-                            <option value="--gapopen 9 --gapextend 2">Existence: 9  Extension: 2</option>
-                            <option value="--gapopen 8 --gapextend 2">Existence: 8  Extension: 2</option>
-                            <option value="--gapopen 7 --gapextend 2">Existence: 7  Extension: 2</option>
-                            <option value="--gapopen 6 --gapextend 2">Existence: 6  Extension: 2</option>
-                            <option value="--gapopen 13 --gapextend 1">Existence: 13  Extension: 1</option>
-                            <option value="--gapopen 12 --gapextend 1">Existence: 12  Extension: 1</option>
-                            <option value="--gapopen 11 --gapextend 1" selected="true">Existence: 11  Extension: 1</option>
-                            <option value="--gapopen 10 --gapextend 1">Existence: 10  Extension: 1</option>
-                            <option value="--gapopen 9 --gapextend 1">Existence: 9  Extension: 1</option>
-                        </param>
-                    </when>
-                    <when value="BLOSUM50">
-                        <param name="gap_costs" type="select" label="Gap Costs">
-                            <option value="--gapopen 13 --gapextend 3">Existence: 13  Extension: 3</option>
-                            <option value="--gapopen 12 --gapextend 3">Existence: 12  Extension: 3</option>
-                            <option value="--gapopen 11 --gapextend 3">Existence: 11  Extension: 3</option>
-                            <option value="--gapopen 10 --gapextend 3">Existence: 10  Extension: 3</option>
-                            <option value="--gapopen 9 --gapextend 3">Existence: 9  Extension: 3</option>
-                            <option value="--gapopen 16 --gapextend 2">Existence: 16  Extension: 2</option>
-                            <option value="--gapopen 15 --gapextend 2">Existence: 15  Extension: 2</option>
-                            <option value="--gapopen 14 --gapextend 2">Existence: 14  Extension: 2</option>
-                            <option value="--gapopen 13 --gapextend 2" selected="true">Existence: 13  Extension: 2</option>
-                            <option value="--gapopen 12 --gapextend 2">Existence: 12  Extension: 2</option>
-                            <option value="--gapopen 19 --gapextend 1">Existence: 19  Extension: 1</option>
-                            <option value="--gapopen 18 --gapextend 1">Existence: 18  Extension: 1</option>
-                            <option value="--gapopen 17 --gapextend 1">Existence: 17  Extension: 1</option>
-                            <option value="--gapopen 16 --gapextend 1">Existence: 16  Extension: 1</option>
-                            <option value="--gapopen 15 --gapextend 1">Existence: 15  Extension: 1</option>
-                        </param>
-                    </when>
-                    <when value="BLOSUM45">
-                        <param name="gap_costs" type="select" label="Gap Costs">
-                            <option value="--gapopen 13 --gapextend 3">Existence: 13  Extension: 3</option>
-                            <option value="--gapopen 12 --gapextend 3">Existence: 12  Extension: 3</option>
-                            <option value="--gapopen 11 --gapextend 3">Existence: 11  Extension: 3</option>
-                            <option value="--gapopen 10 --gapextend 3">Existence: 10  Extension: 3</option>
-                            <option value="--gapopen 15 --gapextend 2" selected="true">Existence: 15  Extension: 2</option>
-                            <option value="--gapopen 14 --gapextend 2">Existence: 14  Extension: 2</option>
-                            <option value="--gapopen 13 --gapextend 2">Existence: 13  Extension: 2</option>
-                            <option value="--gapopen 12 --gapextend 2">Existence: 12  Extension: 2</option>
-                            <option value="--gapopen 19 --gapextend 1">Existence: 19  Extension: 1</option>
-                            <option value="--gapopen 18 --gapextend 1">Existence: 18  Extension: 1</option>
-                            <option value="--gapopen 17 --gapextend 1">Existence: 17  Extension: 1</option>
-                            <option value="--gapopen 16 --gapextend 1">Existence: 16  Extension: 1</option>
-                        </param>
-                    </when>
-                    <when value="PAM250">
-                        <param name="gap_costs" type="select" label="Gap Costs">
-                            <option value="--gapopen 15 --gapextend 3">Existence: 15  Extension: 3</option>
-                            <option value="--gapopen 14 --gapextend 3">Existence: 14  Extension: 3</option>
-                            <option value="--gapopen 13 --gapextend 3">Existence: 13  Extension: 3</option>
-                            <option value="--gapopen 12 --gapextend 3">Existence: 12  Extension: 3</option>
-                            <option value="--gapopen 17 --gapextend 2">Existence: 17  Extension: 2</option>
-                            <option value="--gapopen 16 --gapextend 2">Existence: 16  Extension: 2</option>
-                            <option value="--gapopen 15 --gapextend 2">Existence: 15  Extension: 2</option>
-                            <option value="--gapopen 14 --gapextend 2" selected="true">Existence: 14  Extension: 2</option>
-                            <option value="--gapopen 13 --gapextend 2">Existence: 13  Extension: 2</option>
-                            <option value="--gapopen 21 --gapextend 1">Existence: 21  Extension: 1</option>
-                            <option value="--gapopen 20 --gapextend 1">Existence: 20  Extension: 1</option>
-                            <option value="--gapopen 19 --gapextend 1">Existence: 19  Extension: 1</option>
-                            <option value="--gapopen 18 --gapextend 1">Existence: 18  Extension: 1</option>
-                            <option value="--gapopen 17 --gapextend 1">Existence: 17  Extension: 1</option>
-                        </param>
-                    </when>
-                    <when value="PAM70">
-                        <param name="gap_costs" type="select" label="Gap Costs">
-                            <option value="--gapopen 12 --gapextend 3">Existence: 12 Extension: 3</option>
-                            <option value="--gapopen 11 --gapextend 2">Existence: 11 Extension: 2</option>
-                            <option value="--gapopen 8 --gapextend 2">Existence: 8  Extension: 2</option>
-                            <option value="--gapopen 7 --gapextend 2">Existence: 7  Extension: 2</option>
-                            <option value="--gapopen 6 --gapextend 2">Existence: 6  Extension: 2</option>
-                            <option value="--gapopen 11 --gapextend 1">Existence: 11  Extension: 1</option>
-                            <option value="--gapopen 10 --gapextend 1" selected="true">Existence: 10  Extension: 1</option>
-                            <option value="--gapopen 9 --gapextend 1">Existence: 9  Extension: 1</option>
-                        </param>
-                    </when>
-                    <when value="PAM30">
-                        <param name="gap_costs" type="select" label="Gap Costs">
-                            <option value="--gapopen 15 --gapextend 3">Existence: 15 Extension: 3</option>
-                            <option value="--gapopen 13 --gapextend 3">Existence: 13 Extension: 3</option>
-                            <option value="--gapopen 14 --gapextend 2">Existence: 14 Extension: 2</option>
-                            <option value="--gapopen 7 --gapextend 2">Existence: 7  Extension: 2</option>
-                            <option value="--gapopen 6 --gapextend 2">Existence: 6  Extension: 2</option>
-                            <option value="--gapopen 5 --gapextend 2">Existence: 5  Extension: 2</option>
-                            <option value="--gapopen 14 --gapextend 1">Existence: 14 Extension: 1</option>
-                            <option value="--gapopen 10 --gapextend 1">Existence: 10  Extension: 1</option>
-                            <option value="--gapopen 9 --gapextend 1" selected="true">Existence: 9  Extension: 1</option>
-                            <option value="--gapopen 8 --gapextend 1">Existence: 8  Extension: 1</option>
-                        </param>
-                    </when>
-                </conditional>
-                <param argument="--sensmode" type="select" label="Diamond's sensitivity mode">
-                    <option value="default">default</option>
-                    <option value="fast">fast</option>
-                    <option value="mid-sensitive">mid-sensitive</option>
-                    <option value="sensitive" selected="true">sensitive</option>
-                    <option value="more-sensitive">more-sensitive</option>
-                    <option value="very-sensitive">very-sensitive</option>
-                    <option value="ultra-sensitive">ultra-sensitive</option>
-                </param>
-                <param argument="--dmnd_iterate" type="boolean" truevalue="--dmnd_iterate yes" falsevalue="--dmnd_iterate no" checked="false"
-                    label="Run diamond in iterative mode, up to the sensitivity level"/>
-                <param argument="--dmnd_ignore_warnings" type="boolean" truevalue="--dmnd_ignore_warnings" falsevalue="" checked="false"
-                    label="Ignore Diamond warnings on sequence content (e.g. when a protein contains only ATGC symbols)"/>
-                <expand macro="common_search_options"/>
-            </when>
-            <when value="mmseqs">
-                <expand macro="fasta_input"/>
-                <param argument="--start_sens" type="integer" value="3" min="0" max="100" label="Starting sensitivity" />
-                <param argument="--sens_steps" type="integer" value="3" min="0" max="100" label="Number of sensitivity steps" />
-                <param argument="--final_sens" type="integer" value="7" min="0" max="100" label="Final sensititivy step" />
-                <expand macro="common_search_options"/>
-            </when>
-            <when value="no_search">
-                <param argument="--annotate_hits_table" type="data" multiple="true" format="tabular" label="Seed orthologs">
-                    <validator type="expression" message="No seed orthologs">value.metadata.columns == 11</validator>
-                    <!-- would be cool to replace with this validator: 
-                        <validator type="expression" message="No seed orthologs">value.metadata.column_names == '@SEED_ORTHOLOG_COLUMNS@'.split()</validator>
-                        but this does not work (yet) in tool tests since column_names can not be set in uploads -->
-                </param>
-            </when>
-            <when value="cache">
-                <expand macro="fasta_input"/>
-                <param argument="--cache"  type="data" multiple="true" format="tabular" label="EggNOG Annotations with md5 hashes" help="Annotations computed with EggNOG mapper with enabled --md5 option">
-                    <validator type="expression" message="No seed orthologs">value.metadata.columns == 22</validator>
-                </param>
-                <param name="output_no_annotations" type="boolean" checked="true" label="Output sequences without annotation" help="Produce an additional FASTA file with the sequences of queries for which an existing annotation was not found using cache mode. This file can be used as input of another eggNOG-mapper run without using the cache, trying to annotate the sequences."/>
-            </when>
-        </conditional>
-
+        <expand macro="db_macro"/>
+        <expand macro="ortho_full_macro"/>
         <conditional name="annotation_options">
             <param argument="--no_annot" type="select" label="Annotate seed orthologs">
                 <option value="">Yes</option>
                 <option value="--no_annot">No</option>
             </param>
             <when value="">
-                <param argument="--seed_ortholog_evalue" type="float" value="0.001" min="0" label="Min E-value threshold">
-                    <help>
-                    Min E-value expected when searching for seed eggNOG ortholog. Applies to phmmer/diamond searches.
-                    Queries not having a significant seed orthologs (E-value less than threshold) will not be annotated.
-                    </help>
-                </param>
-                <param argument="--seed_ortholog_score" type="float" optional="true" min="0" label="Minimum bit score threshold">
-                    <help>
-                    Min bit score expected when searching for seed eggNOG ortholog.
-                    Queries not having a significant seed orthologs will not be annotated.
-                    </help>
-                </param>
-                <param argument="--tax_scope" type="integer" optional="true" label="Set taxonomic scope" help="NCBI taxonomy id" />
-                <param argument="--target_orthologs" type="select" label="target orthologs for functional transfer">
-                    <option value="one2one">one2one</option>
-                    <option value="many2one">many2one</option>
-                    <option value="one2many">one2many</option>
-                    <option value="many2many">many2many</option>
-                    <option value="all" selected="true">all</option>
-                </param>
-                <param argument="--go_evidence" type="select"
-                        label="Select the set of GO terms that should be used for annotation">
-                    <option value="experimental">experimental = Use only terms inferred from experimental evidence</option>
-                    <option value="non-electronic" selected="true">non-electronic = Use only non-electronically curated terms</option>
-                    <option value="all" selected="true">All (experimental + non-electronic)</option>
-                </param>
+                <expand macro="annotation_options_macro"/>
             </when>
             <when value="--no_annot"/>
         </conditional>
@@ -317,40 +42,17 @@
             <param argument="--no_file_comments" type="boolean" truevalue="--no_file_comments" falsevalue="" checked="true"
                 label="Exclude header lines and stats from output files"/>
             <param argument="--report_orthologs" type="boolean" truevalue="--report_orthologs" falsevalue="" checked="false"
-                label="Output a file with the list of orthologs for each hits"/>
+                label="Output a file with the list of orthologs for each hit"/>
             <param argument="--md5" type="boolean" truevalue="--md5" falsevalue="" checked="false"
                 label="Add md5 hash of each query to annotations"/>
         </section>
     </inputs>
     <outputs>
-        <data name="seed_orthologs" format="tabular" label="${tool.name} on ${on_string}: seed_orthologs" from_work_dir="results.emapper.seed_orthologs">
-            <filter>ortho_method['m'] not in ['no_search', 'cache']</filter>
-            <actions>
-                <action name="column_names" type="metadata" default="@SEED_ORTHOLOG_COLUMNS@"/>
-            </actions>
-        </data>
-        <data name="annotations" format="tabular" label="${tool.name} on ${on_string}: annotations" from_work_dir="results.emapper.annotations">
+        <expand macro="ortho_search_outputs_macro"/>
+        <expand macro="annotation_output_macro">
             <filter>annotation_options['no_annot'] == ''</filter>
-            <actions>
-                <conditional name="output_options.md5">
-                    <when value="True">
-                        <action name="column_names" type="metadata" default="query,seed_ortholog,evalue,score,max_annot_lvl,COG_category,Description,Preferred_name,GOs,EC,KEGG_ko,KEGG_Pathway,KEGG_Module,KEGG_Reaction,KEGG_rclass,BRITE,KEGG_TC,CAZy,BiGG_Reaction,PFAMseggNOG_OGs,md5"/>
-                    </when>
-                    <when value="False">
-                        <action name="column_names" type="metadata" default="query,seed_ortholog,evalue,score,max_annot_lvl,COG_category,Description,Preferred_name,GOs,EC,KEGG_ko,KEGG_Pathway,KEGG_Module,KEGG_Reaction,KEGG_rclass,BRITE,KEGG_TC,CAZy,BiGG_Reaction,PFAMseggNOG_OGs"/>
-                    </when>
-                </conditional>
-            </actions>
-        </data>
-        <data name="annotations_orthologs" format="tabular" label="${tool.name} on ${on_string}: orthologs"  from_work_dir="results.emapper.orthologs">
-            <filter>ortho_method['m'] != 'cache' and output_options['report_orthologs']</filter>
-            <actions>
-                <action name="column_names" type="metadata" default="query,orth_type,species,orthologs"/>
-            </actions>
-        </data>
-        <data name="no_annotations" format="fasta" label="${tool.name} on ${on_string}: Sequences wo annotation"  from_work_dir="results.emapper.no_annotations.fasta">
-            <filter>ortho_method['m'] == 'cache' and output_options['output_no_annotations']</filter>
-        </data>
+        </expand>
+        <expand macro="annotation_orthologs_output_macro"/>
     </outputs>
     <tests>
         <!-- test producing only seed orthologs-->
@@ -502,40 +204,11 @@ EggNOG-mapper is also available as a public online resource:  `<http://beta-eggn
 Outputs
 -------
 
-**seed_orthologs**
-
-each line in the file provides the best match of each query within the best Orthologous Group (OG)
-reported in the [project].hmm_hits file, obtained running PHMMER against all sequences within the best OG.
-The seed ortholog is used to fetch fine-grained orthology relationships from eggNOG.
-If using the diamond search mode, seed orthologs are directly
-obtained from the best matching sequences by running DIAMOND against the whole eggNOG protein space.
+@HELP_SEARCH_OUTPUTS@
 
-**annotations**
+@HELP_ANNOTATION_OUTPUTS@
 
-This file provides final annotations of each query. Tab-delimited columns in the file are:
 
-- ``query_name``: query sequence name
-- ``seed_eggNOG_ortholog``: best protein match in eggNOG
-- ``seed_ortholog_evalue``: best protein match (e-value)
-- ``seed_ortholog_score``: best protein match (bit-score)
-- ``predicted_taxonomic_group``
-- ``predicted_protein_name``: Predicted protein name for query sequences
-- ``GO_terms``: Comma delimited list of predicted Gene Ontology terms
-- ``EC_number``
-- ``KEGG_KO``
-- ``KEGG_Pathway``: Comma delimited list of predicted KEGG pathways
-- ``KEGG_Module``
-- ``KEGG_Reaction``
-- ``KEGG_rclass``
-- ``BRITE``
-- ``KEGG_TC``
-- ``CAZy``
-- ``BiGG_Reactions``
-- ``Annotation_tax_scope``: The taxonomic scope used to annotate this query sequence
-- ``Matching_OGs``: Comma delimited list of matching eggNOG Orthologous Groups
-- ``best_OG|evalue|score``: Best matching Orthologous Groups (deprecated, use smallest from eggnog OGs)
-- ``COG_functional_categories``: COG functional category inferred from best matching OG
-- ``eggNOG_free_text_description``
 
 **Recommentation for large input data**
 
@@ -558,7 +231,6 @@ See [also](https://github.com/eggnogdb/eggnog-mapper/wiki/eggNOG-mapper-v2.1.5-t
 
 Another alternative is to use cached annotations (produced in a run with --md5 enabled).
 
-
     ]]></help>
     <expand macro="citations"/>
 </tool>
diff --git a/tools/eggnog_mapper/eggnog_mapper/eggnog_mapper_annotate.xml b/tools/eggnog_mapper/eggnog_mapper/eggnog_mapper_annotate.xml
new file mode 100644
index 000000000..762dc999b
--- /dev/null
+++ b/tools/eggnog_mapper/eggnog_mapper/eggnog_mapper_annotate.xml
@@ -0,0 +1,155 @@
+<tool id="eggnog_mapper_annotate" name="eggNOG Mapper" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>annotation phase</description>
+    <macros>
+        <import>eggnog_macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <expand macro="version_command"/>
+    <command detect_errors="aggressive"><![CDATA[
+        @MERGE_ANNOTATIONS@
+
+        if [[ "\${GALAXY_SLOTS:-1}" -eq "1" ]]; then
+            DBMEM="--dbmem";
+        else
+            DBMEM="";
+        fi && 
+
+        emapper.py
+        @DB_TOKEN@
+        -m no_search
+        @ORTHO_SEARCH_TOKEN@
+
+        @ANNOTATION_TOKEN@
+        $output_options.no_file_comments
+        $output_options.report_orthologs
+        $output_options.md5
+        --output='results'
+        --cpu "\${GALAXY_SLOTS:-1}"
+        \$DBMEM
+        --scratch_dir \${TEMP:-\$_GALAXY_JOB_TMP_DIR}
+        --temp_dir \${TEMP:-\$_GALAXY_JOB_TMP_DIR}
+    ]]></command>
+    <inputs>
+        <expand macro="db_macro"/>
+        <expand macro="ortho_annotate_macro"/>
+
+        <section name="annotation_options" title="Annotation options">
+            <expand macro="annotation_options_macro"/>
+        </section>
+        
+        <section name="output_options" expanded="false" title="Output Options">
+            <param argument="--no_file_comments" type="boolean" truevalue="--no_file_comments" falsevalue="" checked="true"
+                label="Exclude header lines and stats from output files"/>
+            <param argument="--report_orthologs" type="boolean" truevalue="--report_orthologs" falsevalue="" checked="false"
+                label="Output a file with the list of orthologs for each hit"/>
+            <param argument="--md5" type="boolean" truevalue="--md5" falsevalue="" checked="false"
+                label="Add md5 hash of each query to annotations"/>
+        </section>
+    </inputs>
+    <outputs>
+        <expand macro="annotation_output_macro"/>
+        <expand macro="annotation_orthologs_output_macro"/>
+    </outputs>
+    <tests>
+        <!-- test producing annotations form seed orthologs -->
+        <test expect_num_outputs="1">
+            <param name="eggnog_data" value="@EGGNOG_DB_VERSION@"/> <!-- not passed in test, but required for test to work -->
+            <param name="annotate_hits_table" value="DIA_nlim.emapper.seed_orthologs" ftype="tabular">
+                <!-- this has no effect at the moment since column_names can not be set in uploads <metadata name="column_names" value="@SEED_ORTHOLOG_COLUMNS@"/> -->
+            </param>
+            <conditional name="annotation_options">
+            </conditional>
+            <section name="output_options">
+                <param name="report_orthologs" value="false"/>
+                <param name="no_file_comments" value="true"/>
+            </section>
+            <expand macro="annotations_assertion"/>
+            <expand macro="stdout_assertion"/>
+        </test>
+
+        <!-- test using chached annotations from previous run -->
+        <test expect_num_outputs="2">
+            <param name="eggnog_data" value="@EGGNOG_DB_VERSION@"/> <!-- not passed in test, but required for test to work -->
+            <conditional name="ortho_method">
+                <param name="m" value="cache"/>
+                <param name="input" value="Nmar_0135.fa" ftype="fasta"/>
+                <param name="cache" value="DIA_nlim.emapper.annotations_cached" ftype="tabular"/>
+            </conditional>
+            <section name="output_options">
+                <param name="report_orthologs" value="true"/>
+                <param name="no_file_comments" value="true"/>
+                <param name="md5" value="true"/>
+            </section>
+            <expand macro="annotations_assertion" columns="22" add_metadata_columm_names=",md5" add_column_names="&#009;md5" add_column_re="\t[\d\w]+"/>
+            <output name="no_annotations" ftype="fasta">
+                <assert_contents>
+                    <has_n_lines n="0"/>
+                </assert_contents>
+            </output>
+            <expand macro="stdout_assertion"/>
+        </test>
+
+        <!-- test setting tax scope-->
+        <test expect_num_outputs="2">
+            <param name="eggnog_data" value="@EGGNOG_DB_VERSION@"/> <!-- not passed in test, but required for test to work -->
+            <param name="annotate_hits_table" value="DIA_nlim.emapper.seed_orthologs" ftype="tabular">
+                <!-- this has no effect at the moment since column_names can not be set in uploads <metadata name="column_names" value="@SEED_ORTHOLOG_COLUMNS@"/> -->
+            </param>
+            <section name="annotation_options">
+                <param name="tax_scope" value="651137" />
+            </section>
+            <section name="output_options">
+                <param name="report_orthologs" value="true"/>
+                <param name="no_file_comments" value="true"/>
+            </section>
+            <expand macro="annotations_assertion"/>
+            <expand macro="annotations_orthologs_assertion"/>
+            <expand macro="stdout_assertion">
+                <has_text text="--tax_scope=651137"/>
+            </expand>
+        </test>
+    </tests>
+    <help><![CDATA[
+
+eggnog-mapper
+=============
+Overview
+--------
+
+``eggnog-mapper`` is a tool for fast functional annotation of novel sequences (genes or proteins) using precomputed eggNOG-based orthology assignments.
+Obvious examples include the annotation of novel genomes, transcriptomes or even metagenomic gene catalogs.
+The use of orthology predictions for functional annotation is considered more precise than traditional homology searches,
+as it avoids transferring annotations from paralogs (duplicate genes with a higher chance of being involved in functional divergence).
+
+EggNOG-mapper is also available as a public online resource:  `<http://beta-eggnogdb.embl.de/#/app/emapper>`_.
+
+Outputs
+-------
+
+@HELP_ANNOTATION_OUTPUTS@
+
+**Recommentation for large input data**
+
+EggNOG-mapper consists of two phases
+
+1. finding seed orthologous sequences (compute intensive)
+2. expanding annotations (IO intensive)
+
+by default (i.e. if *Method to search seed orthologs* is not *Skip search stage...* and *Annotate seed orthologs* is *Yes*)
+both phases are executed within one tool run. 
+
+For large input FASTA datasets in can be favourable to split this in two separate
+tool runs as follows:
+
+1. Split the FASTA (e.g. 1M seqs per data set)
+2. Run the search phase only (set *Annotate seed orthologs* to *No*) on the separate FASTA files.
+3. Run the annotation phase (set *Method to search seed orthologs* to *Skip search stage...*)
+
+See [also](https://github.com/eggnogdb/eggnog-mapper/wiki/eggNOG-mapper-v2.1.5-to-v2.1.8#Setting_up_large_annotation_jobs)
+
+Another alternative is to use cached annotations (produced in a run with --md5 enabled).
+
+
+    ]]></help>
+    <expand macro="citations"/>
+</tool>
diff --git a/tools/eggnog_mapper/eggnog_mapper/eggnog_mapper_search.xml b/tools/eggnog_mapper/eggnog_mapper/eggnog_mapper_search.xml
new file mode 100644
index 000000000..c37e5d8c5
--- /dev/null
+++ b/tools/eggnog_mapper/eggnog_mapper/eggnog_mapper_search.xml
@@ -0,0 +1,111 @@
+<tool id="eggnog_mapper_search" name="eggNOG Mapper" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>search phase</description>
+    <macros>
+        <import>eggnog_macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <expand macro="version_command"/>
+    <command detect_errors="aggressive"><![CDATA[
+        emapper.py
+        @DB_TOKEN@
+        @ORTHO_SEARCH_TOKEN@
+
+        $output_options.no_file_comments
+        $output_options.report_orthologs
+        $output_options.md5
+        --output='results'
+        --cpu "\${GALAXY_SLOTS:-4}"
+        --scratch_dir \${TEMP:-\$_GALAXY_JOB_TMP_DIR}
+        --temp_dir \${TEMP:-\$_GALAXY_JOB_TMP_DIR}
+    ]]></command>
+    <inputs>
+        <expand macro="db_macro"/>
+        <expand macro="ortho_search_macro"/>
+        <section name="output_options" expanded="false" title="Output Options">
+            <param argument="--no_file_comments" type="boolean" truevalue="--no_file_comments" falsevalue="" checked="true"
+                label="Exclude header lines and stats from output files"/>
+            <param argument="--report_orthologs" type="boolean" truevalue="--report_orthologs" falsevalue="" checked="false"
+                label="Output a file with the list of orthologs for each hits"/>
+            <param argument="--md5" type="boolean" truevalue="--md5" falsevalue="" checked="false"
+                label="Add md5 hash of each query to annotations"/>
+        </section>
+    </inputs>
+    <outputs>
+        <expand macro="ortho_search_outputs_macro"/>
+    </outputs>
+    <tests>
+        <!-- test producing only seed orthologs-->
+        <test expect_num_outputs="1">
+            <param name="eggnog_data" value="@EGGNOG_DB_VERSION@"/>
+            <conditional name="ortho_method">
+                <param name="input" value="Nmar_0135.fa" ftype="fasta"/>
+            </conditional>
+            <section name="output_options">
+                <param name="no_file_comments" value="true"/>
+            </section>
+            <expand macro="seed_orthologs_assertion"/>
+            <expand macro="stdout_assertion"/>
+        </test>
+        
+        <!-- test setting a diamond option-->
+        <test expect_num_outputs="1">
+            <param name="eggnog_data" value="@EGGNOG_DB_VERSION@"/> <!-- not passed in test, but required for test to work -->
+            <conditional name="ortho_method">
+                <param name="m" value="diamond" />
+                <param name="input" value="Nmar_0135.fa" ftype="fasta"/>
+                <param name="sensmode" value="fast" />
+            </conditional>
+            <section name="output_options">
+                <param name="report_orthologs" value="true"/>
+                <param name="no_file_comments" value="true"/>
+            </section>
+            <expand macro="seed_orthologs_assertion"/>
+            <expand macro="stdout_assertion">
+                <has_text text="--sensmode fast"/>
+            </expand>
+        </test>
+    </tests>
+    <help><![CDATA[
+
+eggnog-mapper
+=============
+Overview
+--------
+
+``eggnog-mapper`` is a tool for fast functional annotation of novel sequences (genes or proteins) using precomputed eggNOG-based orthology assignments.
+Obvious examples include the annotation of novel genomes, transcriptomes or even metagenomic gene catalogs.
+The use of orthology predictions for functional annotation is considered more precise than traditional homology searches,
+as it avoids transferring annotations from paralogs (duplicate genes with a higher chance of being involved in functional divergence).
+
+EggNOG-mapper is also available as a public online resource:  `<http://beta-eggnogdb.embl.de/#/app/emapper>`_.
+
+Outputs
+-------
+
+@HELP_SEARCH_OUTPUTS@
+
+**Recommentation for large input data**
+
+EggNOG-mapper consists of two phases
+
+1. finding seed orthologous sequences (compute intensive)
+2. expanding annotations (IO intensive)
+
+by default (i.e. if *Method to search seed orthologs* is not *Skip search stage...* and *Annotate seed orthologs* is *Yes*)
+both phases are executed within one tool run. 
+
+For large input FASTA datasets in can be favourable to split this in two separate
+tool runs as follows:
+
+1. Split the FASTA (e.g. 1M seqs per data set)
+2. Run the search phase only (set *Annotate seed orthologs* to *No*) on the separate FASTA files.
+3. Run the annotation phase (set *Method to search seed orthologs* to *Skip search stage...*)
+
+See [also](https://github.com/eggnogdb/eggnog-mapper/wiki/eggNOG-mapper-v2.1.5-to-v2.1.8#Setting_up_large_annotation_jobs)
+
+Another alternative is to use cached annotations (produced in a run with --md5 enabled).
+
+
+    ]]></help>
+    <expand macro="citations"/>
+</tool>