nf-core · jfy133 · Jul 19, 2023 · Jul 19, 2023 · Jul 19, 2023 · Jul 19, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -32,6 +32,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - [#304](https://github.com/nf-core/taxprofiler/pull/304) Correct mistake in kaiju2table documentation, only single rank can be supplied (♥ to @artur-matysik for reporting, fix by @jfy133)
 - [#307](https://github.com/nf-core/taxprofiler/pull/307) Fix databases being sometimes associated with the wrong tool (e.g. Kaiju) (fix by @jfy133)
 - [#313](https://github.com/nf-core/taxprofiler/pull/304) Fix pipeline not providing error when database sheet does not have a header (♥ to @noah472 for reporting, fix by @jfy133)
+- [#329](https://github.com/nf-core/taxprofiler/pull/329) Added better tagging to allow disambiguation of Kraken2 steps of Kraken2 vs Bracken (♥ to @MajoroMAsk for requesting, added by @jfy133)
 
 ### `Dependencies`
 

diff --git a/conf/modules.config b/conf/modules.config
@@ -442,6 +442,7 @@ process {
     }
 
     withName: KRAKEN2_KRAKEN2 {
+        tag = { "${meta.tool}|${meta.id}" }
         ext.args = params.kraken2_save_minimizers ? { "${meta.db_params} --report-minimizer-data" } : { "${meta.db_params}" }
         ext.prefix = params.perform_runmerging ? { meta.tool == "bracken" ? "${meta.id}_${meta.db_name}.bracken" : "${meta.id}_${meta.db_name}.kraken2" } : { meta.tool == "bracken" ? "${meta.id}_${meta.run_accession}_${meta.db_name}.bracken" : "${meta.id}_${meta.run_accession}_${meta.db_name}.kraken2" }
         publishDir = [
@@ -451,6 +452,16 @@ process {
         ]
     }
 
+    withName: KRAKEN2_STANDARD_REPORT {
+        tag = { "${meta.tool}|${meta.id}" }
+        ext.prefix = params.perform_runmerging ? { meta.tool == "bracken" ? "${meta.id}_${meta.db_name}.bracken" : "${meta.id}_${meta.db_name}.kraken2" } : { meta.tool == "bracken" ? "${meta.id}_${meta.run_accession}_${meta.db_name}.bracken" : "${meta.id}_${meta.run_accession}_${meta.db_name}.kraken2" }
+        publishDir = [
+            path: { "${params.outdir}/kraken2/${meta.db_name}/" },
+            mode: params.publish_dir_mode,
+            pattern: '*.report.txt'
+        ]
+    }
+
     withName: BRACKEN_BRACKEN {
         ext.args = { "${meta.db_params}" }
         ext.prefix = params.perform_runmerging ? { "${meta.id}_${meta.db_name}.bracken" } : { "${meta.id}_${meta.run_accession}_${meta.db_name}.bracken" }

diff --git a/docs/output.md b/docs/output.md
@@ -312,7 +312,7 @@ This directory and its FASTQ files will only be present if you supply `--save_ru
 
 </details>
 
-The main taxonomic profiling file from Bracken is the `*.tsv` file. This provides the basic results from Kraken2 but with the corrected abundance information.
+The main taxonomic profiling file from Bracken is the `*.tsv` file. This provides the basic results from Kraken2 but with the corrected abundance information. Note that the raw Kraken2 version of the upstream step of Bracken can be found in the `kraken2/` directory with the suffix of `<sample_id>_<db_name>.bracken.report.txt` (with a 6 column variant when `--save_minimizers` specified).
 
 ### Kraken2
 
@@ -326,12 +326,12 @@ The main taxonomic profiling file from Bracken is the `*.tsv` file. This provide
   - `<db_name>/`
     - `<sample_id>_<db_name>.classified.fastq.gz`: FASTQ file containing all reads that had a hit against a reference in the database for a given sample
     - `<sample_id>_<db_name>.unclassified.fastq.gz`: FASTQ file containing all reads that did not have a hit in the database for a given sample
-    - `<sample_id>_<db_name>.report.txt`: A Kraken2 report that summarises the fraction abundance, taxonomic ID, number of Kmers, taxonomic path of all the hits in the Kraken2 run for a given sample
+    - `<sample_id>_<db_name>.<kraken2/bracken2>report.txt`: A Kraken2 report that summarises the fraction abundance, taxonomic ID, number of Kmers, taxonomic path of all the hits in the Kraken2 run for a given sample. Will be 6 column rather than 8 if `--save_minimizers` specified.
     - `<sample_id>_<db_name>.classifiedreads.txt`: A list of read IDs and the hits each read had against each database for a given sample
 
 </details>
 
-The main taxonomic classification file from Kraken2 is the `_combined_reports.txt` or `*report.txt` file. The former provides you the broadest over view of the taxonomic classification results across all samples against a single databse, where you get two columns for each sample e.g. `2_all` and `2_lvl`, as well as a summarised column summing up across all samples `tot_all` and `tot_lvl`. The latter gives you the most information for a single sample. The report file is also used for the taxpasta step.
+The main taxonomic classification file from Kraken2 is the `_combined_reports.txt` or `*report.txt` file. The former provides you the broadest over view of the taxonomic classification results across all samples against a single database, where you get two columns for each sample e.g. `2_all` and `2_lvl`, as well as a summarised column summing up across all samples `tot_all` and `tot_lvl`. The latter gives you the most information for a single sample. The report file is also used for the taxpasta step.
 
 You will only receive the `.fastq` and `*classifiedreads.txt` file if you supply `--kraken2_save_reads` and/or `--kraken2_save_readclassifications` parameters to the pipeline.
 

diff --git a/lib/WorkflowTaxprofiler.groovy b/lib/WorkflowTaxprofiler.groovy
@@ -93,7 +93,7 @@ class WorkflowTaxprofiler {
                 params.run_bracken ? "Bracken (Lu et al. 2017)," : "",
                 params.run_kraken2 ? "Kraken2 (Wood et al. 2019)," : "",
                 params.run_krakenuniq ? "KrakenUniq (Breitwieser et al. 2018)," : "",
-                params.run_metaphlan3 ? "MetaPhlAn3 (Beghini et al. 2021)," : "",
+                params.run_metaphlan ? "MetaPhlAn3 (Beghini et al. 2021)," : "",
                 params.run_malt ? "MALT (Vågene et al. 2018) and MEGAN6 CE (Huson et al. 2016)," : "",
                 params.run_diamond ? "DIAMOND (Buchfink et al. 2015)," : "",
                 params.run_centrifuge ? "Centrifuge (Kim et al. 2016)," : "",
@@ -162,7 +162,7 @@ class WorkflowTaxprofiler {
                 params.run_bracken    ? "<li>Lu, J., Breitwieser, F. P., Thielen, P., & Salzberg, S. L. (2017). Bracken: estimating species abundance in metagenomics data. PeerJ. Computer Science, 3(e104), e104. <a href=\"https://doi.org/10.7717/peerj-cs.104\">10.7717/peerj-cs.104</a></li>" : "",
                 params.run_kraken2    ? "<li>Wood, D. E., Lu, J., & Langmead, B. (2019). Improved metagenomic analysis with Kraken 2. Genome Biology, 20(1), 257.  <a href=\"https://doi.org/10.1186/s13059-019-1891-0\">10.1186/s13059-019-1891-0</a></li>" : "",
                 params.run_krakenuniq ? "<li>Breitwieser, F. P., Baker, D. N., & Salzberg, S. L. (2018). KrakenUniq: confident and fast metagenomics classification using unique k-mer counts. Genome Biology, 19(1), 198.  <a href=\"https://doi.org/10.1186/s13059-018-1568-0\">10.1186/s13059-018-1568-0</a></li>" : "",
-                params.run_metaphlan3 ? "<li>Blanco-Míguez, A., Beghini, F., Cumbo, F., McIver, L. J., Thompson, K. N., Zolfo, M., Manghi, P., Dubois, L., Huang, K. D., Thomas, A. M., Nickols, W. A., Piccinno, G., Piperni, E., Punčochář, M., Valles-Colomer, M., Tett, A., Giordano, F., Davies, R., Wolf, J., … Segata, N. (2023). Extending and improving metagenomic taxonomic profiling with uncharacterized species using MetaPhlAn 4. Nature Biotechnology, 1–12. <a href=\"https://doi.org/10.1038/s41587-023-01688-w\">10.1038/s41587-023-01688-w</a></li>" : "",
+                params.run_metaphlan  ? "<li>Blanco-Míguez, A., Beghini, F., Cumbo, F., McIver, L. J., Thompson, K. N., Zolfo, M., Manghi, P., Dubois, L., Huang, K. D., Thomas, A. M., Nickols, W. A., Piccinno, G., Piperni, E., Punčochář, M., Valles-Colomer, M., Tett, A., Giordano, F., Davies, R., Wolf, J., … Segata, N. (2023). Extending and improving metagenomic taxonomic profiling with uncharacterized species using MetaPhlAn 4. Nature Biotechnology, 1–12. <a href=\"https://doi.org/10.1038/s41587-023-01688-w\">10.1038/s41587-023-01688-w</a></li>" : "",
                 params.run_malt       ? "<li>Vågene, Å. J., Herbig, A., Campana, M. G., Robles García, N. M., Warinner, C., Sabin, S., Spyrou, M. A., Andrades Valtueña, A., Huson, D., Tuross, N., Bos, K. I., & Krause, J. (2018). Salmonella enterica genomes from victims of a major sixteenth-century epidemic in Mexico. Nature Ecology & Evolution, 2(3), 520–528.  <a href=\"https://doi.org/10.1038/s41559-017-0446-6\">10.1038/s41559-017-0446-6</a></li>" : "",
                 params.run_malt       ? "<li>Huson, D. H., Beier, S., Flade, I., Górska, A., El-Hadidi, M., Mitra, S., Ruscheweyh, H.-J., & Tappu, R. (2016). MEGAN Community Edition - Interactive Exploration and Analysis of Large-Scale Microbiome Sequencing Data. PLoS Computational Biology, 12(6), e1004957. <a href=\"https://doi.org/10.1371/journal.pcbi.1004957\">10.1371/journal.pcbi.1004957</a></li>" : "",
                 params.run_diamond    ? "<li>Buchfink, B., Xie, C., & Huson, D. H. (2015). Fast and sensitive protein alignment using DIAMOND. Nature Methods, 12(1), 59–60. <a href=\"https://doi.org/10.1038/nmeth.3176\">10.1038/nmeth.3176</a></li>" : "",
@@ -182,7 +182,7 @@ class WorkflowTaxprofiler {
 
             def text_extras = [
                 // fastp shortread qc / complexity filtering
-                ( params.perform_shortread_qc && params.shortread_qc_tool == "fastp" ) || ( params.text_shortreadcomplexity && params.shortread_complexityfilter_tool == "fastp" ) ? "<li>Chen, S., Zhou, Y., Chen, Y., & Gu, J. (2018). fastp: an ultra-fast all-in-one FASTQ preprocessor. Bioinformatics , 34(17), i884–i890. <a href=\"https://doi.org/10.1093/bioinformatics/bty560\">10.1093/bioinformatics/bty560</a></li>" : "",
+                ( params.perform_shortread_qc && params.shortread_qc_tool == "fastp" ) || ( params.perform_shortread_complexityfilter && params.shortread_complexityfilter_tool == "fastp" ) ? "<li>Chen, S., Zhou, Y., Chen, Y., & Gu, J. (2018). fastp: an ultra-fast all-in-one FASTQ preprocessor. Bioinformatics , 34(17), i884–i890. <a href=\"https://doi.org/10.1093/bioinformatics/bty560\">10.1093/bioinformatics/bty560</a></li>" : "",
                 // samtools long / short hostremoval
                 params.perform_shortread_hostremoval || params.perform_longread_hostremoval ? "<li>Danecek, P., Bonfield, J. K., Liddle, J., Marshall, J., Ohan, V., Pollard, M. O., Whitwham, A., Keane, T., McCarthy, S. A., Davies, R. M., & Li, H. (2021). Twelve years of SAMtools and BCFtools. GigaScience, 10(2). <a href=\"https://doi.org/10.1093/gigascience/giab008\">10.1093/gigascience/giab008</a></li>" : "",
             ].join(' ').trim()