From e8124b5b820d4701a0af4c8d8708b7ef7354825b Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Wed, 19 Jul 2023 08:55:04 +0200 Subject: [PATCH 1/4] Allow console disambuation between kraken2/bracken --- CHANGELOG.md | 1 + conf/modules.config | 11 +++++++++++ docs/output.md | 6 +++--- lib/WorkflowTaxprofiler.groovy | 6 +++--- 4 files changed, 18 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 216b8605..3a06e0f8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#304](https://github.com/nf-core/taxprofiler/pull/304) Correct mistake in kaiju2table documentation, only single rank can be supplied (♥ to @artur-matysik for reporting, fix by @jfy133) - [#307](https://github.com/nf-core/taxprofiler/pull/307) Fix databases being sometimes associated with the wrong tool (e.g. Kaiju) (fix by @jfy133) - [#313](https://github.com/nf-core/taxprofiler/pull/304) Fix pipeline not providing error when database sheet does not have a header (♥ to @noah472 for reporting, fix by @jfy133) +- [#329](https://github.com/nf-core/taxprofiler/pull/329) Added better tagging to allow disambiguation of Kraken2 steps of Kraken2 vs Bracken (♥ to @MajoroMAsk for requesting, added by @jfy133) ### `Dependencies` diff --git a/conf/modules.config b/conf/modules.config index 1784ef23..8a290a6f 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -442,6 +442,7 @@ process { } withName: KRAKEN2_KRAKEN2 { + tag = { "${meta.tool}|${meta.id}" } ext.args = params.kraken2_save_minimizers ? { "${meta.db_params} --report-minimizer-data" } : { "${meta.db_params}" } ext.prefix = params.perform_runmerging ? { meta.tool == "bracken" ? "${meta.id}_${meta.db_name}.bracken" : "${meta.id}_${meta.db_name}.kraken2" } : { meta.tool == "bracken" ? "${meta.id}_${meta.run_accession}_${meta.db_name}.bracken" : "${meta.id}_${meta.run_accession}_${meta.db_name}.kraken2" } publishDir = [ @@ -451,6 +452,16 @@ process { ] } + withName: KRAKEN2_STANDARD_REPORT { + tag = { "${meta.tool}|${meta.id}" } + ext.prefix = params.perform_runmerging ? { meta.tool == "bracken" ? "${meta.id}_${meta.db_name}.bracken" : "${meta.id}_${meta.db_name}.kraken2" } : { meta.tool == "bracken" ? "${meta.id}_${meta.run_accession}_${meta.db_name}.bracken" : "${meta.id}_${meta.run_accession}_${meta.db_name}.kraken2" } + publishDir = [ + path: { "${params.outdir}/kraken2/${meta.db_name}/" }, + mode: params.publish_dir_mode, + pattern: '*.report.txt' + ] + } + withName: BRACKEN_BRACKEN { ext.args = { "${meta.db_params}" } ext.prefix = params.perform_runmerging ? { "${meta.id}_${meta.db_name}.bracken" } : { "${meta.id}_${meta.run_accession}_${meta.db_name}.bracken" } diff --git a/docs/output.md b/docs/output.md index 4569ddb6..5df205dd 100644 --- a/docs/output.md +++ b/docs/output.md @@ -312,7 +312,7 @@ This directory and its FASTQ files will only be present if you supply `--save_ru -The main taxonomic profiling file from Bracken is the `*.tsv` file. This provides the basic results from Kraken2 but with the corrected abundance information. +The main taxonomic profiling file from Bracken is the `*.tsv` file. This provides the basic results from Kraken2 but with the corrected abundance information. Note that the raw Kraken2 version of the upstream step of Bracken can be found in the `kraken2/` directory with the suffix of `_.bracken.report.txt` (with a 6 column variant when `--save_minimizers` specified). ### Kraken2 @@ -326,12 +326,12 @@ The main taxonomic profiling file from Bracken is the `*.tsv` file. This provide - `/` - `_.classified.fastq.gz`: FASTQ file containing all reads that had a hit against a reference in the database for a given sample - `_.unclassified.fastq.gz`: FASTQ file containing all reads that did not have a hit in the database for a given sample - - `_.report.txt`: A Kraken2 report that summarises the fraction abundance, taxonomic ID, number of Kmers, taxonomic path of all the hits in the Kraken2 run for a given sample + - `_.report.txt`: A Kraken2 report that summarises the fraction abundance, taxonomic ID, number of Kmers, taxonomic path of all the hits in the Kraken2 run for a given sample. Will be 6 column rather than 8 if `--save_minimizers` specified. - `_.classifiedreads.txt`: A list of read IDs and the hits each read had against each database for a given sample -The main taxonomic classification file from Kraken2 is the `_combined_reports.txt` or `*report.txt` file. The former provides you the broadest over view of the taxonomic classification results across all samples against a single databse, where you get two columns for each sample e.g. `2_all` and `2_lvl`, as well as a summarised column summing up across all samples `tot_all` and `tot_lvl`. The latter gives you the most information for a single sample. The report file is also used for the taxpasta step. +The main taxonomic classification file from Kraken2 is the `_combined_reports.txt` or `*report.txt` file. The former provides you the broadest over view of the taxonomic classification results across all samples against a single database, where you get two columns for each sample e.g. `2_all` and `2_lvl`, as well as a summarised column summing up across all samples `tot_all` and `tot_lvl`. The latter gives you the most information for a single sample. The report file is also used for the taxpasta step. You will only receive the `.fastq` and `*classifiedreads.txt` file if you supply `--kraken2_save_reads` and/or `--kraken2_save_readclassifications` parameters to the pipeline. diff --git a/lib/WorkflowTaxprofiler.groovy b/lib/WorkflowTaxprofiler.groovy index 3b28e0d3..7e9b977f 100755 --- a/lib/WorkflowTaxprofiler.groovy +++ b/lib/WorkflowTaxprofiler.groovy @@ -93,7 +93,7 @@ class WorkflowTaxprofiler { params.run_bracken ? "Bracken (Lu et al. 2017)," : "", params.run_kraken2 ? "Kraken2 (Wood et al. 2019)," : "", params.run_krakenuniq ? "KrakenUniq (Breitwieser et al. 2018)," : "", - params.run_metaphlan3 ? "MetaPhlAn3 (Beghini et al. 2021)," : "", + params.run_metaphlan ? "MetaPhlAn3 (Beghini et al. 2021)," : "", params.run_malt ? "MALT (Vågene et al. 2018) and MEGAN6 CE (Huson et al. 2016)," : "", params.run_diamond ? "DIAMOND (Buchfink et al. 2015)," : "", params.run_centrifuge ? "Centrifuge (Kim et al. 2016)," : "", @@ -162,7 +162,7 @@ class WorkflowTaxprofiler { params.run_bracken ? "
  • Lu, J., Breitwieser, F. P., Thielen, P., & Salzberg, S. L. (2017). Bracken: estimating species abundance in metagenomics data. PeerJ. Computer Science, 3(e104), e104. 10.7717/peerj-cs.104
  • " : "", params.run_kraken2 ? "
  • Wood, D. E., Lu, J., & Langmead, B. (2019). Improved metagenomic analysis with Kraken 2. Genome Biology, 20(1), 257. 10.1186/s13059-019-1891-0
  • " : "", params.run_krakenuniq ? "
  • Breitwieser, F. P., Baker, D. N., & Salzberg, S. L. (2018). KrakenUniq: confident and fast metagenomics classification using unique k-mer counts. Genome Biology, 19(1), 198. 10.1186/s13059-018-1568-0
  • " : "", - params.run_metaphlan3 ? "
  • Blanco-Míguez, A., Beghini, F., Cumbo, F., McIver, L. J., Thompson, K. N., Zolfo, M., Manghi, P., Dubois, L., Huang, K. D., Thomas, A. M., Nickols, W. A., Piccinno, G., Piperni, E., Punčochář, M., Valles-Colomer, M., Tett, A., Giordano, F., Davies, R., Wolf, J., … Segata, N. (2023). Extending and improving metagenomic taxonomic profiling with uncharacterized species using MetaPhlAn 4. Nature Biotechnology, 1–12. 10.1038/s41587-023-01688-w
  • " : "", + params.run_metaphlan ? "
  • Blanco-Míguez, A., Beghini, F., Cumbo, F., McIver, L. J., Thompson, K. N., Zolfo, M., Manghi, P., Dubois, L., Huang, K. D., Thomas, A. M., Nickols, W. A., Piccinno, G., Piperni, E., Punčochář, M., Valles-Colomer, M., Tett, A., Giordano, F., Davies, R., Wolf, J., … Segata, N. (2023). Extending and improving metagenomic taxonomic profiling with uncharacterized species using MetaPhlAn 4. Nature Biotechnology, 1–12. 10.1038/s41587-023-01688-w
  • " : "", params.run_malt ? "
  • Vågene, Å. J., Herbig, A., Campana, M. G., Robles García, N. M., Warinner, C., Sabin, S., Spyrou, M. A., Andrades Valtueña, A., Huson, D., Tuross, N., Bos, K. I., & Krause, J. (2018). Salmonella enterica genomes from victims of a major sixteenth-century epidemic in Mexico. Nature Ecology & Evolution, 2(3), 520–528. 10.1038/s41559-017-0446-6
  • " : "", params.run_malt ? "
  • Huson, D. H., Beier, S., Flade, I., Górska, A., El-Hadidi, M., Mitra, S., Ruscheweyh, H.-J., & Tappu, R. (2016). MEGAN Community Edition - Interactive Exploration and Analysis of Large-Scale Microbiome Sequencing Data. PLoS Computational Biology, 12(6), e1004957. 10.1371/journal.pcbi.1004957
  • " : "", params.run_diamond ? "
  • Buchfink, B., Xie, C., & Huson, D. H. (2015). Fast and sensitive protein alignment using DIAMOND. Nature Methods, 12(1), 59–60. 10.1038/nmeth.3176
  • " : "", @@ -182,7 +182,7 @@ class WorkflowTaxprofiler { def text_extras = [ // fastp shortread qc / complexity filtering - ( params.perform_shortread_qc && params.shortread_qc_tool == "fastp" ) || ( params.text_shortreadcomplexity && params.shortread_complexityfilter_tool == "fastp" ) ? "
  • Chen, S., Zhou, Y., Chen, Y., & Gu, J. (2018). fastp: an ultra-fast all-in-one FASTQ preprocessor. Bioinformatics , 34(17), i884–i890. 10.1093/bioinformatics/bty560
  • " : "", + ( params.perform_shortread_qc && params.shortread_qc_tool == "fastp" ) || ( params.perform_shortread_complexityfilter && params.shortread_complexityfilter_tool == "fastp" ) ? "
  • Chen, S., Zhou, Y., Chen, Y., & Gu, J. (2018). fastp: an ultra-fast all-in-one FASTQ preprocessor. Bioinformatics , 34(17), i884–i890. 10.1093/bioinformatics/bty560
  • " : "", // samtools long / short hostremoval params.perform_shortread_hostremoval || params.perform_longread_hostremoval ? "
  • Danecek, P., Bonfield, J. K., Liddle, J., Marshall, J., Ohan, V., Pollard, M. O., Whitwham, A., Keane, T., McCarthy, S. A., Davies, R. M., & Li, H. (2021). Twelve years of SAMtools and BCFtools. GigaScience, 10(2). 10.1093/gigascience/giab008
  • " : "", ].join(' ').trim() From e5ba578a8cfaae556375eb389db656cdbe811254 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 19 Jul 2023 08:56:25 +0200 Subject: [PATCH 2/4] Apply suggestions from code review --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3a06e0f8..69bd3c37 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,7 +32,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#304](https://github.com/nf-core/taxprofiler/pull/304) Correct mistake in kaiju2table documentation, only single rank can be supplied (♥ to @artur-matysik for reporting, fix by @jfy133) - [#307](https://github.com/nf-core/taxprofiler/pull/307) Fix databases being sometimes associated with the wrong tool (e.g. Kaiju) (fix by @jfy133) - [#313](https://github.com/nf-core/taxprofiler/pull/304) Fix pipeline not providing error when database sheet does not have a header (♥ to @noah472 for reporting, fix by @jfy133) -- [#329](https://github.com/nf-core/taxprofiler/pull/329) Added better tagging to allow disambiguation of Kraken2 steps of Kraken2 vs Bracken (♥ to @MajoroMAsk for requesting, added by @jfy133) +- [#330](https://github.com/nf-core/taxprofiler/pull/330) Added better tagging to allow disambiguation of Kraken2 steps of Kraken2 vs Bracken (♥ to @MajoroMAsk for requesting, added by @jfy133) ### `Dependencies` From a36e608bebf6ef532717cba79b456ce3cfc993f0 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 19 Jul 2023 08:56:36 +0200 Subject: [PATCH 3/4] Apply suggestions from code review --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 69bd3c37..b185cfa5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,7 +32,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#304](https://github.com/nf-core/taxprofiler/pull/304) Correct mistake in kaiju2table documentation, only single rank can be supplied (♥ to @artur-matysik for reporting, fix by @jfy133) - [#307](https://github.com/nf-core/taxprofiler/pull/307) Fix databases being sometimes associated with the wrong tool (e.g. Kaiju) (fix by @jfy133) - [#313](https://github.com/nf-core/taxprofiler/pull/304) Fix pipeline not providing error when database sheet does not have a header (♥ to @noah472 for reporting, fix by @jfy133) -- [#330](https://github.com/nf-core/taxprofiler/pull/330) Added better tagging to allow disambiguation of Kraken2 steps of Kraken2 vs Bracken (♥ to @MajoroMAsk for requesting, added by @jfy133) +- [#330](https://github.com/nf-core/taxprofiler/pull/330) Added better tagging to allow disambiguation of Kraken2 steps of Kraken2 vs Bracken (♥ to @MajoroMask for requesting, added by @jfy133) ### `Dependencies` From 476f87fe514420bd864f8672447ec942112aec84 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Wed, 19 Jul 2023 09:18:53 +0200 Subject: [PATCH 4/4] =?UTF-8?q?Fix=20citation=20typo,=20harshil=20align?= =?UTF-8?q?=E2=84=A2=EF=B8=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lib/WorkflowTaxprofiler.groovy | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/lib/WorkflowTaxprofiler.groovy b/lib/WorkflowTaxprofiler.groovy index 7e9b977f..44b60f2b 100755 --- a/lib/WorkflowTaxprofiler.groovy +++ b/lib/WorkflowTaxprofiler.groovy @@ -90,16 +90,16 @@ class WorkflowTaxprofiler { def text_classification = [ "Taxonomic classification or profiling was carried out with:", - params.run_bracken ? "Bracken (Lu et al. 2017)," : "", - params.run_kraken2 ? "Kraken2 (Wood et al. 2019)," : "", + params.run_bracken ? "Bracken (Lu et al. 2017)," : "", + params.run_kraken2 ? "Kraken2 (Wood et al. 2019)," : "", params.run_krakenuniq ? "KrakenUniq (Breitwieser et al. 2018)," : "", - params.run_metaphlan ? "MetaPhlAn3 (Beghini et al. 2021)," : "", - params.run_malt ? "MALT (Vågene et al. 2018) and MEGAN6 CE (Huson et al. 2016)," : "", - params.run_diamond ? "DIAMOND (Buchfink et al. 2015)," : "", + params.run_metaphlan ? "MetaPhlAn (Blanco-Míguez et al. 2023)," : "", + params.run_malt ? "MALT (Vågene et al. 2018) and MEGAN6 CE (Huson et al. 2016)," : "", + params.run_diamond ? "DIAMOND (Buchfink et al. 2015)," : "", params.run_centrifuge ? "Centrifuge (Kim et al. 2016)," : "", - params.run_kaiju ? "Kaiju (Menzel et al. 2016)," : "", - params.run_motus ? "mOTUs (Ruscheweyh et al. 2022)," : "", - params.run_ganon ? "ganon (Piro et al. 2020)" : "", + params.run_kaiju ? "Kaiju (Menzel et al. 2016)," : "", + params.run_motus ? "mOTUs (Ruscheweyh et al. 2022)," : "", + params.run_ganon ? "ganon (Piro et al. 2020)" : "", "." ].join(' ').trim()