diff --git a/tools/eggnog_mapper/eggnog_macros.xml b/tools/eggnog_mapper/eggnog_macros.xml
index ac49d0b28..cfed89b33 100644
--- a/tools/eggnog_mapper/eggnog_macros.xml
+++ b/tools/eggnog_mapper/eggnog_macros.xml
@@ -3,6 +3,7 @@
2.1.8
3
5.0.2
+ 22.01
+
@@ -105,41 +127,494 @@ python '${__tool_directory__}/data_manager_eggnog.py' --config_file '$out_file'
- query_name,seed_eggNOG_ortholog,seed_ortholog_evalue,seed_ortholog_score,query_start,query_end,seed_start,seed_end,pident,query_cov,seed_cov
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ value.metadata.columns == 11
+
+
+
+
+
+
+ value.metadata.columns == 22
+
+
+
+
+
+ annotate_hits_table.tsv
+ &&
+ #end if
+ ]]>
+
+
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+ query_name,seed_eggNOG_ortholog,seed_ortholog_evalue,seed_ortholog_score,query_start,query_end,seed_start,seed_end,pident,query_cov,seed_cov
+
+
+
+ ortho_method['m'] not in ['no_search', 'cache']
+
+
+
+
-
+
+
-
+
+
+
+
+
+
+
+
+ Min E-value expected when searching for seed eggNOG ortholog. Applies to phmmer/diamond searches.
+ Queries not having a significant seed orthologs (E-value less than threshold) will not be annotated.
+
+
+
+
+ Min bit score expected when searching for seed eggNOG ortholog.
+ Queries not having a significant seed orthologs will not be annotated.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ortho_method['m'] == 'cache' and ortho_method['output_no_annotations']
+
+
+
+
+
+ ortho_method['m'] != 'cache'
+ output_options['report_orthologs']
+
+
+
+
+
+
+
-
+
+
diff --git a/tools/eggnog_mapper/eggnog_mapper/README b/tools/eggnog_mapper/eggnog_mapper/README
new file mode 100644
index 000000000..e665ac94f
--- /dev/null
+++ b/tools/eggnog_mapper/eggnog_mapper/README
@@ -0,0 +1,20 @@
+This folder contains three tools:
+
+1. eggnogg_mapper: which runs the search and annotation phase in a single tool
+2. eggnogg_mapper_search: which implements the search phase
+3. eggnogg_mapper_annotate: which implements the annotation phase
+
+While the search phase of eggnog_mapper is very CPU intense and is efficient
+also for a larger number of threads, the annotation phase is very IO intensive
+and can be very inefficient (depending on the configuration, e.g. if the
+reference data is located on a slow partition).
+
+While for most applications eggnogg_mapper will be sufficient to separate the
+two phases can be more efficient:
+
+- sending eggnogg_mapper_search to a destination using many threads
+- and eggnogg_mapper_annotate to a destination using a small number of threads
+
+Admins can choose to set the environment variable ``EGGNOG_DBMEM=--dbmem``
+which will copy the complete EggNOG annotation DB into memory which is usually
+much faster than using multiple cores (but needs approx. 37GB of RAM).
\ No newline at end of file
diff --git a/tools/eggnog_mapper/eggnog_mapper/eggnog_mapper.xml b/tools/eggnog_mapper/eggnog_mapper/eggnog_mapper.xml
index 502422f39..e06a09a6a 100644
--- a/tools/eggnog_mapper/eggnog_mapper/eggnog_mapper.xml
+++ b/tools/eggnog_mapper/eggnog_mapper/eggnog_mapper.xml
@@ -1,4 +1,4 @@
-
+
functional sequence annotation by orthology
eggnog_macros.xml
@@ -6,86 +6,15 @@
annotate_hits_table.tsv
- &&
- #end if
+ @MERGE_ANNOTATIONS@
emapper.py
- --data_dir '$eggnog_data.fields.path'
- -m '$ortho_method.m'
-
- #if $ortho_method.m in ['diamond', 'mmseqs', 'cache']:
- -i '$ortho_method.input'
- --itype '$ortho_method.input_trans.itype'
- #if $ortho_method.input_trans.itype in ['CDS', 'genome', 'metagenome']:
- $ortho_method.input_trans.translate
- #end if
- #if $ortho_method.input_trans.itype in ['genome', 'metagenome']:
- --genepred $ortho_method.input_trans.genepred
- #end if
- #elif $ortho_method.m == "no_search"
- --annotate_hits_table annotate_hits_table.tsv
- #end if
-
- #if $ortho_method.m == 'cache'
- --cache '$ortho_method.cache'
- #end if
-
- #if $ortho_method.m in ['diamond', 'mmseqs']:
- ## Diamond option
- #if $ortho_method.m == "diamond":
- --matrix '$ortho_method.matrix_gapcosts.matrix'
- $ortho_method.matrix_gapcosts.gap_costs
- --sensmode $ortho_method.sensmode
- $ortho_method.dmnd_iterate
- $ortho_method.dmnd_ignore_warnings
- #elif $ortho_method.m == "mmseqs":
- --start_sens $ortho_method.start_sens
- --sens_steps $ortho_method.sens_steps
- --final_sens $ortho_method.final_sens
- #end if
-
- ## Common options for search filtering (applies to diamond and mmseqs only)
- #if str($ortho_method.query_cover):
- --query_cover $ortho_method.query_cover
- #end if
- #if str($ortho_method.subject_cover):
- --subject_cover $ortho_method.subject_cover
- #end if
- #if str($ortho_method.pident):
- --pident $ortho_method.pident
- #end if
- #if str($ortho_method.evalue):
- --evalue $ortho_method.evalue
- #end if
- #if str($ortho_method.score):
- --score $ortho_method.score
- #end if
- #end if
-
+ @DB_TOKEN@
+ @ORTHO_SEARCH_TOKEN@
#if $annotation_options.no_annot == "--no_annot"
--no_annot
#else
- #if str($annotation_options.seed_ortholog_evalue):
- --seed_ortholog_evalue $annotation_options.seed_ortholog_evalue
- #end if
- #if str($annotation_options.seed_ortholog_score):
- --seed_ortholog_score $annotation_options.seed_ortholog_score
- #end if
- #if $annotation_options.tax_scope:
- --tax_scope=$annotation_options.tax_scope
- #end if
- #if $annotation_options.target_orthologs:
- --target_orthologs=$annotation_options.target_orthologs
- #end if
- #if $annotation_options.go_evidence:
- --go_evidence=$annotation_options.go_evidence
- #end if
+ @ANNOTATION_TOKEN@
#end if
$output_options.no_file_comments
$output_options.report_orthologs
@@ -96,261 +25,27 @@
--temp_dir \${TEMP:-\$_GALAXY_JOB_TMP_DIR}
]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- value.metadata.columns == 11
-
-
-
-
-
-
- value.metadata.columns == 22
-
-
-
-
-
+
+
-
-
- Min E-value expected when searching for seed eggNOG ortholog. Applies to phmmer/diamond searches.
- Queries not having a significant seed orthologs (E-value less than threshold) will not be annotated.
-
-
-
-
- Min bit score expected when searching for seed eggNOG ortholog.
- Queries not having a significant seed orthologs will not be annotated.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
-
+
-
- ortho_method['m'] not in ['no_search', 'cache']
-
-
-
-
-
+
+
annotation_options['no_annot'] == ''
-
-
-
-
-
-
-
-
-
-
-
-
- ortho_method['m'] != 'cache' and output_options['report_orthologs']
-
-
-
-
-
- ortho_method['m'] == 'cache' and output_options['output_no_annotations']
-
+
+
@@ -363,7 +58,6 @@
@@ -382,7 +76,6 @@
@@ -397,12 +90,12 @@
-
-
-
+
+
+
@@ -416,7 +109,6 @@
@@ -439,7 +131,6 @@
@@ -459,7 +150,6 @@
@@ -502,40 +192,11 @@ EggNOG-mapper is also available as a public online resource: `
diff --git a/tools/eggnog_mapper/eggnog_mapper/eggnog_mapper_annotate.xml b/tools/eggnog_mapper/eggnog_mapper/eggnog_mapper_annotate.xml
new file mode 100644
index 000000000..14cc122fc
--- /dev/null
+++ b/tools/eggnog_mapper/eggnog_mapper/eggnog_mapper_annotate.xml
@@ -0,0 +1,141 @@
+
+ annotation phase
+
+ eggnog_macros.xml
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ `_.
+
+Outputs
+-------
+
+@HELP_ANNOTATION_OUTPUTS@
+
+**Recommentation for large input data**
+
+EggNOG-mapper consists of two phases
+
+1. finding seed orthologous sequences (compute intensive)
+2. expanding annotations (IO intensive)
+
+by default (i.e. if *Method to search seed orthologs* is not *Skip search stage...* and *Annotate seed orthologs* is *Yes*)
+both phases are executed within one tool run.
+
+For large input FASTA datasets in can be favourable to split this in two separate
+tool runs as follows:
+
+1. Split the FASTA (e.g. 1M seqs per data set)
+2. Run the search phase only (set *Annotate seed orthologs* to *No*) on the separate FASTA files.
+3. Run the annotation phase (set *Method to search seed orthologs* to *Skip search stage...*)
+
+See [also](https://github.com/eggnogdb/eggnog-mapper/wiki/eggNOG-mapper-v2.1.5-to-v2.1.8#Setting_up_large_annotation_jobs)
+
+Another alternative is to use cached annotations (produced in a run with --md5 enabled).
+
+
+ ]]>
+
+
diff --git a/tools/eggnog_mapper/eggnog_mapper/eggnog_mapper_search.xml b/tools/eggnog_mapper/eggnog_mapper/eggnog_mapper_search.xml
new file mode 100644
index 000000000..46729f49e
--- /dev/null
+++ b/tools/eggnog_mapper/eggnog_mapper/eggnog_mapper_search.xml
@@ -0,0 +1,101 @@
+
+ search phase
+
+ eggnog_macros.xml
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ `_.
+
+Outputs
+-------
+
+@HELP_SEARCH_OUTPUTS@
+
+**Recommentation for large input data**
+
+EggNOG-mapper consists of two phases
+
+1. finding seed orthologous sequences (compute intensive)
+2. expanding annotations (IO intensive)
+
+by default (i.e. if *Method to search seed orthologs* is not *Skip search stage...* and *Annotate seed orthologs* is *Yes*)
+both phases are executed within one tool run.
+
+For large input FASTA datasets in can be favourable to split this in two separate
+tool runs as follows:
+
+1. Split the FASTA (e.g. 1M seqs per data set)
+2. Run the search phase only (set *Annotate seed orthologs* to *No*) on the separate FASTA files.
+3. Run the annotation phase (set *Method to search seed orthologs* to *Skip search stage...*)
+
+See [also](https://github.com/eggnogdb/eggnog-mapper/wiki/eggNOG-mapper-v2.1.5-to-v2.1.8#Setting_up_large_annotation_jobs)
+
+Another alternative is to use cached annotations (produced in a run with --md5 enabled).
+
+
+ ]]>
+
+