diff --git a/CHANGELOG.md b/CHANGELOG.md index 42e770ab..8030912e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#315](https://github.com/nf-core/taxprofiler/pull/315) Updated to nf-core pipeline template v2.9 (added by @sofstam & @jfy133) - [#319](https://github.com/nf-core/taxprofiler/pull/319) Added support for virus hit expansion in Kaiju (❤️ to @dnlrxn for requesting, added by @jfy133) - [#323](https://github.com/nf-core/taxprofiler/pull/323) Add ability to skip sequencing quality control tools (❤️ to @vinisalazar for requesting, added by @jfy133) + - [#318](https://github.com/nf-core/taxprofiler/pull/318) Added the profiler MetaPhlAn4 and removed MetaPhlAn3 (added by @LilyAnderssonLee) ### `Fixed` diff --git a/CITATIONS.md b/CITATIONS.md index 14183519..3f92c12d 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -64,9 +64,9 @@ > Breitwieser, Florian P., Daniel N. Baker, and Steven L. Salzberg. 2018. KrakenUniq: confident and fast metagenomics classification using unique k-mer counts. Genome Biology 19 (1): 198. doi: 10.1186/s13059-018-1568-0 -- [MetaPhlAn3](https://doi.org/10.7554/eLife.65088) +- [MetaPhlAn](https://doi.org/10.1038/s41587-023-01688-w) - > Beghini, Francesco, Lauren J McIver, Aitor Blanco-Míguez, Leonard Dubois, Francesco Asnicar, Sagun Maharjan, Ana Mailyan, et al. 2021. “Integrating Taxonomic, Functional, and Strain-Level Profiling of Diverse Microbial Communities with BioBakery 3.” Edited by Peter Turnbaugh, Eduardo Franco, and C Titus Brown. ELife 10 (May): e65088. doi: 10.7554/eLife.65088 + > Blanco-Míguez, A., Beghini, F., Cumbo, F. et al. Extending and improving metagenomic taxonomic profiling with uncharacterized species using MetaPhlAn 4. Nat Biotechnol (2023). doi: 10.1038/s41587-023-01688-w - [MALT](https://doi.org/10.1038/s41559-017-0446-6) diff --git a/README.md b/README.md index 1d633dfd..9d6200b5 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ 3. Supports statistics for host-read removal ([Samtools](http://www.htslib.org/)) 4. Performs taxonomic classification and/or profiling using one or more of: - [Kraken2](https://ccb.jhu.edu/software/kraken2/) - - [MetaPhlAn3](https://huttenhower.sph.harvard.edu/metaphlan/) + - [MetaPhlAn](https://huttenhower.sph.harvard.edu/metaphlan/) - [MALT](https://uni-tuebingen.de/fakultaeten/mathematisch-naturwissenschaftliche-fakultaet/fachbereiche/informatik/lehrstuehle/algorithms-in-bioinformatics/software/malt/) - [DIAMOND](https://github.com/bbuchfink/diamond) - [Centrifuge](https://ccb.jhu.edu/software/centrifuge/) @@ -68,7 +68,7 @@ Additionally, you will need a database sheet that looks as follows: ``` tool,db_name,db_params,db_path kraken2,db2,--quick,///kraken2/testdb-kraken2.tar.gz -metaphlan3,db1,,///metaphlan3/metaphlan_database/ +metaphlan,db1,,///metaphlan/metaphlan_database/ ``` That includes directories or `.tar.gz` archives containing databases for the tools you wish to run the pipeline against. @@ -81,7 +81,7 @@ nextflow run nf-core/taxprofiler \ --input samplesheet.csv \ --databases databases.csv \ --outdir \ - --run_kraken2 --run_metaphlan3 + --run_kraken2 --run_metaphlan ``` > **Warning:** diff --git a/conf/modules.config b/conf/modules.config index 2c64c656..823ad820 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -523,20 +523,20 @@ process { ] } - withName: METAPHLAN3_METAPHLAN3 { + withName: METAPHLAN_METAPHLAN { ext.args = { "${meta.db_params}" } - ext.prefix = params.perform_runmerging ? { "${meta.id}_${meta.db_name}.metaphlan3" } : { "${meta.id}_${meta.run_accession}_${meta.db_name}.metaphlan3" } + ext.prefix = params.perform_runmerging ? { "${meta.id}_${meta.db_name}.metaphlan" } : { "${meta.id}_${meta.run_accession}_${meta.db_name}.metaphlan" } publishDir = [ - path: { "${params.outdir}/metaphlan3/${meta.db_name}/" }, + path: { "${params.outdir}/metaphlan/${meta.db_name}/" }, mode: params.publish_dir_mode, pattern: '*.{biom,txt}' ] } - withName: METAPHLAN3_MERGEMETAPHLANTABLES { - ext.prefix = { "metaphlan3_${meta.id}_combined_reports" } + withName: METAPHLAN_MERGEMETAPHLANTABLES { + ext.prefix = { "metaphlan_${meta.id}_combined_reports" } publishDir = [ - path: { "${params.outdir}/metaphlan3/" }, + path: { "${params.outdir}/metaphlan/" }, mode: params.publish_dir_mode, pattern: '*.{txt}' ] diff --git a/conf/test.config b/conf/test.config index 2d7ec074..4be47467 100644 --- a/conf/test.config +++ b/conf/test.config @@ -34,7 +34,7 @@ params { run_kraken2 = true run_bracken = true run_malt = false - run_metaphlan3 = true + run_metaphlan = true run_centrifuge = true run_diamond = true run_krakenuniq = true diff --git a/conf/test_full.config b/conf/test_full.config index 4579a8ec..b37216ae 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -57,7 +57,7 @@ params { malt_save_reads = false malt_generate_megansummary = true - run_metaphlan3 = true + run_metaphlan = true run_motus = true motus_save_mgc_read_counts = true diff --git a/conf/test_krakenuniq.config b/conf/test_krakenuniq.config index 6d119887..2ab2c1ca 100644 --- a/conf/test_krakenuniq.config +++ b/conf/test_krakenuniq.config @@ -38,7 +38,7 @@ params { run_kraken2 = false run_bracken = false run_malt = false - run_metaphlan3 = false + run_metaphlan = false run_centrifuge = false run_diamond = false run_krakenuniq = true diff --git a/conf/test_motus.config b/conf/test_motus.config index 9563f34e..15833549 100644 --- a/conf/test_motus.config +++ b/conf/test_motus.config @@ -37,7 +37,7 @@ params { run_kraken2 = false run_bracken = false run_malt = false - run_metaphlan3 = false + run_metaphlan = false run_centrifuge = false run_diamond = false run_krakenuniq = false diff --git a/conf/test_nopreprocessing.config b/conf/test_nopreprocessing.config index ee3b589b..43e01929 100644 --- a/conf/test_nopreprocessing.config +++ b/conf/test_nopreprocessing.config @@ -33,7 +33,7 @@ params { run_kraken2 = true run_bracken = true run_malt = true - run_metaphlan3 = true + run_metaphlan = true run_centrifuge = true run_diamond = true run_krakenuniq = true diff --git a/conf/test_noprofiling.config b/conf/test_noprofiling.config index 93b491ef..284c43ee 100644 --- a/conf/test_noprofiling.config +++ b/conf/test_noprofiling.config @@ -34,7 +34,7 @@ params { run_kraken2 = false run_bracken = false run_malt = false - run_metaphlan3 = false + run_metaphlan = false run_centrifuge = false run_diamond = false run_krakenuniq = false diff --git a/conf/test_nothing.config b/conf/test_nothing.config index c9b3b1da..e8fa96a8 100644 --- a/conf/test_nothing.config +++ b/conf/test_nothing.config @@ -33,7 +33,7 @@ params { run_kraken2 = false run_bracken = false run_malt = false - run_metaphlan3 = false + run_metaphlan = false run_centrifuge = false run_diamond = false run_krakenuniq = false diff --git a/docs/images/taxprofiler_tube.svg b/docs/images/taxprofiler_tube.svg index 6eb06cd5..a663e68d 100644 --- a/docs/images/taxprofiler_tube.svg +++ b/docs/images/taxprofiler_tube.svg @@ -3186,7 +3186,7 @@ id="tspan44956-3" style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-family:'Maven Pro';-inkscape-font-specification:'Maven Pro Bold';text-align:end;text-anchor:end;stroke-width:0.264583" x="-347.26553" - y="100.45697">MetaPhlAn3MetaPhlAn Output files -- `metaphlan3/` - - `metaphlan3__combined_reports.txt`: A combined profile of all samples aligned to a given database (as generated by `metaphlan_merge_tables`) +- `metaphlan/` + - `metaphlan__combined_reports.txt`: A combined profile of all samples aligned to a given database (as generated by `metaphlan_merge_tables`) - `/` - `.biom`: taxonomic profile in BIOM format - - `.bowtie2out.txt`: BowTie2 alignment information (can be re-used for skipping alignment when re-running MetaPhlAn3 with different parameters) - - `_profile.txt`: MetaPhlAn3 taxonomic profile including abundance estimates + - `.bowtie2out.txt`: BowTie2 alignment information (can be re-used for skipping alignment when re-running MetaPhlAn with different parameters) + - `_profile.txt`: MetaPhlAn taxonomic profile including abundance estimates -The main taxonomic profiling file from MetaPhlAn3 is the `*_profile.txt` file. This provides the abundance estimates from MetaPhlAn3 however does not include raw counts by default. +The main taxonomic profiling file from MetaPhlAn is the `*_profile.txt` file. This provides the abundance estimates from MetaPhlAn however does not include raw counts by default. ### mOTUs @@ -535,7 +535,7 @@ The following report files are used for the taxpasta step: - KrakenUniq: `_.report.txt` Taxpasta uses the `reads` column for the standardised profile. - Kraken2: `_.report.txt` Taxpasta uses the `direct_assigned_reads` column for the standardised profile. - MALT: `.txt.gz` Taxpasta uses the `count` (second) column from the output of MEGAN6's rma2info for the standardised profile. -- MetaPhlAn3: `_profile.txt` Taxpasta uses the `relative_abundance` column multiplied with a fixed number to yield an integer for the standardised profile. +- MetaPhlAn: `_profile.txt` Taxpasta uses the `relative_abundance` column multiplied with a fixed number to yield an integer for the standardised profile. - mOTUs: `.out` Taxpasta uses the `read_count` column for the standardised profile. > ⚠️ Please aware the outputs of each tool's standardised profile _may not_ be directly comparable between each tool. Some may report raw read counts, whereas others may report abundance information. Please always refer to the list above, for which information is used for each tool. diff --git a/docs/usage.md b/docs/usage.md index 3a106655..b43194f4 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -101,7 +101,7 @@ bracken,db1,;-r 150,///bracken/testdb-bracken.tar.gz kraken2,db2,--quick,///kraken2/testdb-kraken2.tar.gz krakenuniq,db3,,///krakenuniq/testdb-krakenuniq.tar.gz centrifuge,db1,,///centrifuge/minigut_cf.tar.gz -metaphlan3,db1,,///metaphlan3/metaphlan_database/ +metaphlan,db1,,///metaphlan/metaphlan_database/ motus,db_mOTU,,///motus/motus_database/ ganon,db1,,///ganon/test-db-ganon.tar.gz ``` @@ -130,7 +130,7 @@ The (uncompressed) database paths (`db_path`) for each tool are expected to cont - [**Kraken2**:](#kraken2-custom-database) output of `kraken2-build` command(s). - [**KrakenUniq**:](#krakenuniq-custom-database) output of `krakenuniq-build` command(s). - [**MALT**](#malt-custom-database) output of `malt-build`. -- [**MetaPhlAn3**:](#metaphlan3-custom-database) output of with `metaphlan --install` or downloaded from links on the [MetaPhlAn3 wiki](https://github.com/biobakery/MetaPhlAn/wiki/MetaPhlAn-3.0#customizing-the-database). +- [**MetaPhlAn**:](#metaphlan-custom-database) output of with `metaphlan --install` or downloaded from links on the [MetaPhlAn wiki](https://github.com/biobakery/MetaPhlAn/wiki/MetaPhlAn-4#customizing-the-database). - [**mOTUs**:](#motus-custom-database) the directory `db_mOTU/` that is downloaded via `motus downloadDB`. - [**ganon**:](#ganon-custom-database) output of `ganon build` or `ganon build-custom`. @@ -298,9 +298,9 @@ MALT does not support paired-end reads alignment (unlike other tools), therefore Krona can only be run on MALT output if path to Krona taxonomy database supplied to `--krona_taxonomy_directory`. Therefore if you do not supply the a Krona directory, Krona plots will not be produced for MALT. -##### MetaPhlAn3 +##### MetaPhlAn -MetaPhlAn3 currently does not accept FASTA files as input, therefore no output will be produced for these input files. +MetaPhlAn4 is compatible with the MetaPhlAn3 database by adding the `--mpa3` paramter to the MetaPhlAn process in the config file `module.config`. ##### mOTUs @@ -339,7 +339,7 @@ The following tools will produce multi-sample taxon tables: - **Centrifuge** (via KrakenTools' `combine_kreports.py` script) - **Kaiju** (via Kaiju's `kaiju2table` tool) - **Kraken2** (via KrakenTools' `combine_kreports.py` script) -- **MetaPhlAn3** (via MetaPhlAn's `merge_metaphlan_tables.py` script) +- **MetaPhlAn** (via MetaPhlAn's `merge_metaphlan_tables.py` script) - **mOTUs** (via the `motus merge` command) - **ganon** (via the `ganon table` command) @@ -712,11 +712,11 @@ You can then add the `/` path to your nf-core/taxprofiler database See the [MALT manual](https://software-ab.informatik.uni-tuebingen.de/download/malt/manual.pdf) for more information. -#### MetaPhlAn3 custom database +#### MetaPhlAn custom database -MetaPhlAn3 does not allow (easy) construction of custom databases. Therefore we recommend to use the prebuilt database of marker genes that is provided by the developers. +MetaPhlAn does not allow (easy) construction of custom databases. Therefore we recommend to use the prebuilt database of marker genes that is provided by the developers. -To do this you need to have `MetaPhlAn3` installed on your machine. +To do this you need to have `MetaPhlAn` installed on your machine. ```bash metaphlan --install --bowtie2db / @@ -731,21 +731,20 @@ You can then add the `/` path to your nf-core/taxprofiler database
Expected files in database directory -- `metaphlan3` - - `mpa_v30_CHOCOPhlAn_201901.pkl` - - `mpa_v30_CHOCOPhlAn_201901.pkl` - - `mpa_v30_CHOCOPhlAn_201901.fasta` - - `mpa_v30_CHOCOPhlAn_201901.3.bt2` - - `mpa_v30_CHOCOPhlAn_201901.4.bt2` - - `mpa_v30_CHOCOPhlAn_201901.1.bt2` - - `mpa_v30_CHOCOPhlAn_201901.2.bt2` - - `mpa_v30_CHOCOPhlAn_201901.rev.1.bt2` - - `mpa_v30_CHOCOPhlAn_201901.rev.2.bt2` +- `metaphlan` + - `mpa_vJan21_TOY_CHOCOPhlAnSGB_202103.pkl` + - `mpa_vJan21_TOY_CHOCOPhlAnSGB_202103.fna.bz2` + - `mpa_vJan21_TOY_CHOCOPhlAnSGB_202103.1.bt2l` + - `mpa_vJan21_TOY_CHOCOPhlAnSGB_202103.2.bt2l` + - `mpa_vJan21_TOY_CHOCOPhlAnSGB_202103.3.bt2l` + - `mpa_vJan21_TOY_CHOCOPhlAnSGB_202103.4.bt2l` + - `mpa_vJan21_TOY_CHOCOPhlAnSGB_202103.rev.1.bt2l` + - `mpa_vJan21_TOY_CHOCOPhlAnSGB_202103.rev.2.bt2l` - `mpa_latest`
-More information on the MetaPhlAn3 database can be found [here](https://github.com/biobakery/MetaPhlAn/wiki/MetaPhlAn-3.1#installation). +More information on the MetaPhlAn database can be found [here](https://github.com/biobakery/MetaPhlAn/wiki/MetaPhlAn-4#Pre-requisites). #### mOTUs custom database diff --git a/modules.json b/modules.json index a608c5ca..a1c6fed1 100644 --- a/modules.json +++ b/modules.json @@ -156,14 +156,14 @@ "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", "installed_by": ["modules"] }, - "metaphlan3/mergemetaphlantables": { + "metaphlan/mergemetaphlantables": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "9aa59197c0fb35c29e315bcd10c0fc9e1afc70a8", "installed_by": ["modules"] }, - "metaphlan3/metaphlan3": { + "metaphlan/metaphlan": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "31ec4470b455fe88c072151a5ea7821bfb2add38", "installed_by": ["modules"] }, "minimap2/align": { diff --git a/modules/nf-core/metaphlan3/mergemetaphlantables/main.nf b/modules/nf-core/metaphlan/mergemetaphlantables/main.nf similarity index 62% rename from modules/nf-core/metaphlan3/mergemetaphlantables/main.nf rename to modules/nf-core/metaphlan/mergemetaphlantables/main.nf index 5be6e4f1..94c70cd6 100644 --- a/modules/nf-core/metaphlan3/mergemetaphlantables/main.nf +++ b/modules/nf-core/metaphlan/mergemetaphlantables/main.nf @@ -1,17 +1,17 @@ -process METAPHLAN3_MERGEMETAPHLANTABLES { +process METAPHLAN_MERGEMETAPHLANTABLES { label 'process_single' - conda "bioconda::metaphlan=3.0.12" + conda "bioconda::metaphlan=4.0.6" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/metaphlan:3.0.12--pyhb7b1952_0' : - 'quay.io/biocontainers/metaphlan:3.0.12--pyhb7b1952_0' }" + 'https://depot.galaxyproject.org/singularity/metaphlan:4.0.6--pyhca03a8a_0' : + 'quay.io/biocontainers/metaphlan:4.0.6--pyhca03a8a_0' }" input: tuple val(meta), path(profiles) output: tuple val(meta), path("${prefix}.txt") , emit: txt - path "versions.yml" , emit: versions + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -27,7 +27,7 @@ process METAPHLAN3_MERGEMETAPHLANTABLES { cat <<-END_VERSIONS > versions.yml "${task.process}": - metaphlan3: \$(metaphlan --version 2>&1 | awk '{print \$3}') + metaphlan: \$(metaphlan --version 2>&1 | awk '{print \$3}') END_VERSIONS """ } diff --git a/modules/nf-core/metaphlan3/mergemetaphlantables/meta.yml b/modules/nf-core/metaphlan/mergemetaphlantables/meta.yml similarity index 74% rename from modules/nf-core/metaphlan3/mergemetaphlantables/meta.yml rename to modules/nf-core/metaphlan/mergemetaphlantables/meta.yml index 365973ef..3c93964b 100644 --- a/modules/nf-core/metaphlan3/mergemetaphlantables/meta.yml +++ b/modules/nf-core/metaphlan/mergemetaphlantables/meta.yml @@ -1,5 +1,5 @@ -name: "metaphlan3_mergemetaphlantables" -description: Merges output abundance tables from MetaPhlAn3 +name: "metaphlan_mergemetaphlantables" +description: Merges output abundance tables from MetaPhlAn4 keywords: - metagenomics - classification @@ -7,11 +7,11 @@ keywords: - table - profiles tools: - - metaphlan3: + - metaphlan4: description: Identify clades (phyla to species) present in the metagenome obtained from a microbiome sample and their relative abundance homepage: https://huttenhower.sph.harvard.edu/metaphlan/ documentation: https://github.com/biobakery/MetaPhlAn - doi: "10.7554/eLife.65088" + doi: "10.1038/s41587-023-01688-w" licence: ["MIT License"] input: @@ -22,7 +22,7 @@ input: e.g. [ id:'test', single_end:false ] - profiles: type: file - description: List of per-sample MetaPhlAn3 taxonomic abundance tables + description: List of per-sample MetaPhlAn4 taxonomic abundance tables pattern: "*" output: @@ -36,9 +36,10 @@ output: description: File containing software versions pattern: "versions.yml" - txt: - type: txt - description: Combined MetaPhlAn3 table + type: file + description: Combined MetaPhlAn4 table pattern: "*.txt" authors: - "@jfy133" + - "@LilyAnderssonLee" diff --git a/modules/nf-core/metaphlan3/metaphlan3/main.nf b/modules/nf-core/metaphlan/metaphlan/main.nf similarity index 61% rename from modules/nf-core/metaphlan3/metaphlan3/main.nf rename to modules/nf-core/metaphlan/metaphlan/main.nf index 34f8705c..477f1f28 100644 --- a/modules/nf-core/metaphlan3/metaphlan3/main.nf +++ b/modules/nf-core/metaphlan/metaphlan/main.nf @@ -1,15 +1,15 @@ -process METAPHLAN3_METAPHLAN3 { +process METAPHLAN_METAPHLAN { tag "$meta.id" - label 'process_high' + label 'process_medium' - conda "bioconda::metaphlan=3.0.12" + conda "bioconda::metaphlan=4.0.6" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/metaphlan:3.0.12--pyhb7b1952_0' : - 'quay.io/biocontainers/metaphlan:3.0.12--pyhb7b1952_0' }" + 'https://depot.galaxyproject.org/singularity/metaphlan:4.0.6--pyhca03a8a_0' : + 'biocontainers/metaphlan:4.0.6--pyhca03a8a_0' }" input: tuple val(meta), path(input) - path metaphlan_db + path metaphlan_db_latest output: tuple val(meta), path("*_profile.txt") , emit: profile @@ -23,12 +23,13 @@ process METAPHLAN3_METAPHLAN3 { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def input_type = ("$input".endsWith(".fastq.gz") || "$input".endsWith(".fq.gz")) ? "--input_type fastq" : ("$input".contains(".fasta")) ? "--input_type fasta" : ("$input".endsWith(".bowtie2out.txt")) ? "--input_type bowtie2out" : "--input_type sam" + def input_type = "$input" =~ /.*\.(fastq|fq)/ ? "--input_type fastq" : "$input" =~ /.*\.(fasta|fna|fa)/ ? "--input_type fasta" : "$input".endsWith(".bowtie2out.txt") ? "--input_type bowtie2out" : "--input_type sam" def input_data = ("$input_type".contains("fastq")) && !meta.single_end ? "${input[0]},${input[1]}" : "$input" def bowtie2_out = "$input_type" == "--input_type bowtie2out" || "$input_type" == "--input_type sam" ? '' : "--bowtie2out ${prefix}.bowtie2out.txt" """ - BT2_DB=`find -L "${metaphlan_db}" -name "*rev.1.bt2" -exec dirname {} \\;` + BT2_DB=`find -L "${metaphlan_db_latest}" -name "*rev.1.bt2l" -exec dirname {} \\;` + BT2_DB_INDEX=`find -L ${metaphlan_db_latest} -name "*.rev.1.bt2l" | sed 's/\\.rev.1.bt2l\$//' | sed 's/.*\\///'` metaphlan \\ --nproc $task.cpus \\ @@ -37,12 +38,13 @@ process METAPHLAN3_METAPHLAN3 { $args \\ $bowtie2_out \\ --bowtie2db \$BT2_DB \\ + --index \$BT2_DB_INDEX \\ --biom ${prefix}.biom \\ --output_file ${prefix}_profile.txt cat <<-END_VERSIONS > versions.yml "${task.process}": - metaphlan3: \$(metaphlan --version 2>&1 | awk '{print \$3}') + metaphlan: \$(metaphlan --version 2>&1 | awk '{print \$3}') END_VERSIONS """ } diff --git a/modules/nf-core/metaphlan3/metaphlan3/meta.yml b/modules/nf-core/metaphlan/metaphlan/meta.yml similarity index 74% rename from modules/nf-core/metaphlan3/metaphlan3/meta.yml rename to modules/nf-core/metaphlan/metaphlan/meta.yml index 659d83a9..cb74bd59 100644 --- a/modules/nf-core/metaphlan3/metaphlan3/meta.yml +++ b/modules/nf-core/metaphlan/metaphlan/meta.yml @@ -1,17 +1,17 @@ -name: metaphlan3_metaphlan3 +name: metaphlan_metaphlan description: MetaPhlAn is a tool for profiling the composition of microbial communities from metagenomic shotgun sequencing data. keywords: - metagenomics - classification - fastq - - bam - fasta + - sam tools: - - metaphlan3: + - metaphlan: description: Identify clades (phyla to species) present in the metagenome obtained from a microbiome sample and their relative abundance homepage: https://huttenhower.sph.harvard.edu/metaphlan/ documentation: https://github.com/biobakery/MetaPhlAn - doi: "10.7554/eLife.65088" + doi: "10.1038/s41587-023-01688-w" licence: ["MIT License"] input: @@ -22,13 +22,13 @@ input: e.g. [ id:'test', single_end:false ] - input: type: file - description: Metaphlan 3.0 can classify the metagenome from a variety of input data types, including FASTQ files (single-end and paired-end), FASTA, bowtie2-produced SAM files (produced from alignments to the MetaPHlAn marker database) and intermediate bowtie2 alignment files (bowtie2out) + description: Metaphlan can classify the metagenome from a variety of input data types, including FASTQ files (single-end and paired-end), FASTA, bowtie2-produced SAM files (produced from alignments to the MetaPHlAn marker database) and intermediate bowtie2 alignment files (bowtie2out) pattern: "*.{fastq.gz, fasta, fasta.gz, sam, bowtie2out.txt}" - metaphlan_db: type: file description: | - Directory containing pre-downloaded and uncompressed MetaPhlAn3 database downloaded from: http://cmprod1.cibio.unitn.it/biobakery3/metaphlan_databases/. - Note that you will also need to specify `--index` and the database version name (e.g. 'mpa_v31_CHOCOPhlAn_201901') in your module.conf ext.args for METAPHLAN3_METAPHLAN3! + Directory containing pre-downloaded and uncompressed MetaPhlAn database downloaded from: http://cmprod1.cibio.unitn.it/biobakery4/metaphlan_databases/. + Note that you will also need to specify `--index` and the database version name (e.g. 'mpa_vJan21_TOY_CHOCOPhlAnSGB_202103') in your module.conf ext.args for METAPHLAN_METAPHLAN! pattern: "*/" output: @@ -56,3 +56,4 @@ output: authors: - "@MGordon09" + - "@LilyAnderssonLee" diff --git a/nextflow.config b/nextflow.config index 1d79286e..73574f6e 100644 --- a/nextflow.config +++ b/nextflow.config @@ -16,7 +16,6 @@ params { igenomes_base = 's3://ngi-igenomes/igenomes' igenomes_ignore = false - // MultiQC options multiqc_config = null multiqc_title = null @@ -43,7 +42,6 @@ params { config_profile_contact = null config_profile_url = null - // Max resource options // Defaults only, expecting to be overwritten max_memory = '128.GB' @@ -137,8 +135,8 @@ params { run_centrifuge = false centrifuge_save_reads = false // added directly to module in profiling.nf - // metaphlan3 - run_metaphlan3 = false + // metaphlan + run_metaphlan = false // kaiju run_kaiju = false diff --git a/nextflow_schema.json b/nextflow_schema.json index b2d2807e..b82b81cc 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -474,9 +474,9 @@ "description": "Turn on generation of MEGAN summary file from MALT results", "help_text": "Turns on saving of MALT output in an additional MEGAN summary file (`.megan`) that can be loaded into the MEGAN metagenomic exploration tool.\n\nNote: this file is generated not directly from MALT but rather then MEGAN utility script `rma2info`.\n\n> Modifies tool parameter(s):\n> - rma2info: `-es`" }, - "run_metaphlan3": { + "run_metaphlan": { "type": "boolean", - "description": "Turn on profiling with MetaPhlAn3. Requires database to be present CSV file passed to --databases", + "description": "Turn on profiling with MetaPhlAn. Requires database to be present CSV file passed to --databases", "fa_icon": "fas fa-toggle-on" }, "run_motus": { @@ -715,12 +715,14 @@ "type": "boolean", "description": "Display help text.", "fa_icon": "fas fa-question-circle", + "default": false, "hidden": true }, "version": { "type": "boolean", "description": "Display version and exit.", "fa_icon": "fas fa-question-circle", + "default": false, "hidden": true }, "publish_dir_mode": { @@ -744,6 +746,7 @@ "type": "boolean", "description": "Send plain-text email instead of HTML.", "fa_icon": "fas fa-remove-format", + "default": false, "hidden": true }, "max_multiqc_email_size": { @@ -758,6 +761,7 @@ "type": "boolean", "description": "Do not use coloured log outputs.", "fa_icon": "fas fa-palette", + "default": false, "hidden": true }, "hook_url": { @@ -796,6 +800,7 @@ "type": "boolean", "fa_icon": "far fa-eye-slash", "description": "Show all params when using `--help`", + "default": false, "hidden": true, "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." }, diff --git a/subworkflows/local/db_check.nf b/subworkflows/local/db_check.nf index 7c680c17..1dad2e46 100644 --- a/subworkflows/local/db_check.nf +++ b/subworkflows/local/db_check.nf @@ -79,7 +79,7 @@ def validate_db_rows(LinkedHashMap row) { if ( !row.keySet().containsAll(expected_headers) ) error("[nf-core/taxprofiler] ERROR: Invalid database input sheet - malformed column names. Please check input TSV. Column names should be: ${expected_headers.join(", ")}") // valid tools specified - def expected_tools = [ "bracken", "centrifuge", "diamond", "kaiju", "kraken2", "krakenuniq", "malt", "metaphlan3", "motus", "ganon", "metaphlan", "kmcp" ] + def expected_tools = [ "bracken", "centrifuge", "diamond", "kaiju", "kraken2", "krakenuniq", "malt", "metaphlan3", "metaphlan", "motus", "ganon", "kmcp" ] if ( !expected_tools.contains(row.tool) ) error("[nf-core/taxprofiler] ERROR: Invalid tool name. Please see documentation for all supported profilers. Error in: ${row}") // detect quotes in params diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 2f11100d..3eb99d92 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -9,7 +9,7 @@ include { KRAKEN2_STANDARD_REPORT } from '../../modules/lo include { BRACKEN_BRACKEN } from '../../modules/nf-core/bracken/bracken/main' include { CENTRIFUGE_CENTRIFUGE } from '../../modules/nf-core/centrifuge/centrifuge/main' include { CENTRIFUGE_KREPORT } from '../../modules/nf-core/centrifuge/kreport/main' -include { METAPHLAN3_METAPHLAN3 } from '../../modules/nf-core/metaphlan3/metaphlan3/main' +include { METAPHLAN_METAPHLAN } from '../../modules/nf-core/metaphlan/metaphlan/main' include { KAIJU_KAIJU } from '../../modules/nf-core/kaiju/kaiju/main' include { KAIJU_KAIJU2TABLE as KAIJU_KAIJU2TABLE_SINGLE } from '../../modules/nf-core/kaiju/kaiju2table/main' include { DIAMOND_BLASTX } from '../../modules/nf-core/diamond/blastx/main' @@ -47,7 +47,7 @@ workflow PROFILING { kraken2: it[2]['tool'] == 'kraken2' || it[2]['tool'] == 'bracken' // to reuse the kraken module to produce the input data for bracken krakenuniq: it[2]['tool'] == 'krakenuniq' malt: it[2]['tool'] == 'malt' - metaphlan3: it[2]['tool'] == 'metaphlan3' + metaphlan: it[2]['tool'] == 'metaphlan' motus: it[2]['tool'] == 'motus' ganon: it[2]['tool'] == 'ganon' unknown: true @@ -239,22 +239,18 @@ workflow PROFILING { } - if ( params.run_metaphlan3 ) { + if ( params.run_metaphlan ) { - ch_input_for_metaphlan3 = ch_input_for_profiling.metaphlan3 - .filter{ - if (it[0].is_fasta) log.warn "[nf-core/taxprofiler] MetaPhlAn3 currently does not accept FASTA files as input. Skipping MetaPhlAn3 for sample ${it[0].id}." - !it[0].is_fasta - } + ch_input_for_metaphlan = ch_input_for_profiling.metaphlan .multiMap { it -> reads: [it[0] + it[2], it[1]] db: it[3] } - METAPHLAN3_METAPHLAN3 ( ch_input_for_metaphlan3.reads, ch_input_for_metaphlan3.db ) - ch_versions = ch_versions.mix( METAPHLAN3_METAPHLAN3.out.versions.first() ) - ch_raw_profiles = ch_raw_profiles.mix( METAPHLAN3_METAPHLAN3.out.profile ) + METAPHLAN_METAPHLAN ( ch_input_for_metaphlan.reads, ch_input_for_metaphlan.db ) + ch_versions = ch_versions.mix( METAPHLAN_METAPHLAN.out.versions.first() ) + ch_raw_profiles = ch_raw_profiles.mix( METAPHLAN_METAPHLAN.out.profile ) } diff --git a/subworkflows/local/standardisation_profiles.nf b/subworkflows/local/standardisation_profiles.nf index 7d6713e7..354cbdea 100644 --- a/subworkflows/local/standardisation_profiles.nf +++ b/subworkflows/local/standardisation_profiles.nf @@ -8,7 +8,7 @@ include { BRACKEN_COMBINEBRACKENOUTPUTS include { KAIJU_KAIJU2TABLE as KAIJU_KAIJU2TABLE_COMBINED } from '../../modules/nf-core/kaiju/kaiju2table/main' include { KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_KRAKEN } from '../../modules/nf-core/krakentools/combinekreports/main' include { KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE } from '../../modules/nf-core/krakentools/combinekreports/main' -include { METAPHLAN3_MERGEMETAPHLANTABLES } from '../../modules/nf-core/metaphlan3/mergemetaphlantables/main' +include { METAPHLAN_MERGEMETAPHLANTABLES } from '../../modules/nf-core/metaphlan/mergemetaphlantables/main' include { MOTUS_MERGE } from '../../modules/nf-core/motus/merge/main' include { GANON_TABLE } from '../../modules/nf-core/ganon/table/main' @@ -29,7 +29,7 @@ workflow STANDARDISATION_PROFILES { meta, profile -> def meta_new = [:] meta_new.id = meta.db_name - meta_new.tool = meta.tool == 'metaphlan3' ? 'metaphlan' : meta.tool == 'malt' ? 'megan6' : meta.tool + meta_new.tool = meta.tool == 'metaphlan' ? 'metaphlan' : meta.tool == 'malt' ? 'megan6' : meta.tool [meta_new, profile] } .groupTuple () @@ -56,7 +56,7 @@ workflow STANDARDISATION_PROFILES { bracken: it[0]['tool'] == 'bracken' centrifuge: it[0]['tool'] == 'centrifuge' kraken2: it[0]['tool'] == 'kraken2' - metaphlan3: it[0]['tool'] == 'metaphlan3' + metaphlan: it[0]['tool'] == 'metaphlan' motus: it[0]['tool'] == 'motus' ganon: it[0]['tool'] == 'ganon' unknown: true @@ -137,18 +137,18 @@ workflow STANDARDISATION_PROFILES { ch_multiqc_files = ch_multiqc_files.mix( KRAKENTOOLS_COMBINEKREPORTS_KRAKEN.out.txt ) ch_versions = ch_versions.mix( KRAKENTOOLS_COMBINEKREPORTS_KRAKEN.out.versions ) - // MetaPhlAn3 + // MetaPhlAn - ch_profiles_for_metaphlan3 = ch_input_profiles.metaphlan3 + ch_profiles_for_metaphlan = ch_input_profiles.metaphlan .map { [it[0]['db_name'], it[1]] } .groupTuple() .map { [[id:it[0]], it[1]] } - METAPHLAN3_MERGEMETAPHLANTABLES ( ch_profiles_for_metaphlan3 ) - ch_multiqc_files = ch_multiqc_files.mix( METAPHLAN3_MERGEMETAPHLANTABLES.out.txt ) - ch_versions = ch_versions.mix( METAPHLAN3_MERGEMETAPHLANTABLES.out.versions ) + METAPHLAN_MERGEMETAPHLANTABLES ( ch_profiles_for_metaphlan ) + ch_multiqc_files = ch_multiqc_files.mix( METAPHLAN_MERGEMETAPHLANTABLES.out.txt ) + ch_versions = ch_versions.mix( METAPHLAN_MERGEMETAPHLANTABLES.out.versions ) // mOTUs