diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index c1642f76b..e5e7a3d59 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -27,6 +27,9 @@ If you're not used to this workflow with git, you can start with some [docs from ## Tests +You can optionally test your changes by running the pipeline locally. Then it is recommended to use the `debug` profile to +receive warnings about process selectors and other debug info. Example: `nextflow run . -profile debug,test,docker --outdir `. + When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 636e0714b..ada206f79 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -19,6 +19,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/ampl - [ ] If necessary, also make a PR on the nf-core/ampliseq _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). +- [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. - [ ] `CHANGELOG.md` is updated. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 788582d92..bd42a8d09 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -50,6 +50,7 @@ jobs: - "test_failed" - "test_multi" - "test_reftaxcustom" + - "test_qiimecustom" - "test_doubleprimers" - "test_iontorrent" - "test_novaseq" @@ -61,7 +62,7 @@ jobs: steps: - name: Check out pipeline code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Check out test data uses: actions/checkout@v3 diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml index ec23bef21..9781ad7c0 100644 --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix-linting.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest steps: # Use the @nf-core-bot token to check out so we can push later - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: token: ${{ secrets.nf_core_bot_auth_token }} @@ -24,7 +24,7 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} - - uses: actions/setup-node@v3 + - uses: actions/setup-node@v4 - name: Install Prettier run: npm install -g prettier @prettier/plugin-php diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index b8bdd2143..905c58e44 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -14,9 +14,9 @@ jobs: EditorConfig: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - - uses: actions/setup-node@v3 + - uses: actions/setup-node@v4 - name: Install editorconfig-checker run: npm install -g editorconfig-checker @@ -27,9 +27,9 @@ jobs: Prettier: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - - uses: actions/setup-node@v3 + - uses: actions/setup-node@v4 - name: Install Prettier run: npm install -g prettier @@ -40,7 +40,7 @@ jobs: PythonBlack: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Check code lints with Black uses: psf/black@stable @@ -71,7 +71,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install Nextflow uses: nf-core/setup-nextflow@v1 diff --git a/.github/workflows/release-announcments.yml b/.github/workflows/release-announcements.yml similarity index 100% rename from .github/workflows/release-announcments.yml rename to .github/workflows/release-announcements.yml diff --git a/.gitpod.yml b/.gitpod.yml index 25488dcc0..acf726953 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -4,7 +4,9 @@ tasks: command: | pre-commit install --install-hooks nextflow self-update - + - name: unset JAVA_TOOL_OPTIONS + command: | + unset JAVA_TOOL_OPTIONS vscode: extensions: # based on nf-core.nf-core-extensionpack - codezombiech.gitignore # Language support for .gitignore files diff --git a/CHANGELOG.md b/CHANGELOG.md index cf0e5ce0e..723d4a835 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,33 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## nf-core/ampliseq version 2.8.0 - 2024-01-16 + +### `Added` + +- [#666](https://github.com/nf-core/ampliseq/pull/666) - Added Greengenes2 database, version 2022.10, support for QIIME2 taxonomic classification. +- [#667](https://github.com/nf-core/ampliseq/pull/667),[#691](https://github.com/nf-core/ampliseq/pull/691) - Added `--qiime_ref_tax_custom` to permit custom reference database for QIIME2 taxonomic classification +- [#674](https://github.com/nf-core/ampliseq/pull/674) - Add PhytoRef database for DADA2 taxonomy assignment using `--dada_ref_taxonomy phytoref` +- [#675](https://github.com/nf-core/ampliseq/pull/675) - Add the Zehr lab nifH database for DADA2 taxonomy assignment using `--dada_ref_taxonomy zehr-nifh` +- [#681](https://github.com/nf-core/ampliseq/pull/681) - For DADA2, with `--dada_addspecies_allowmultiple` multiple exact species matches are reported and with `--dada_taxonomy_rc` reverse-complement matches are also considered in taxonomic classification + +### `Changed` + +- [#677](https://github.com/nf-core/ampliseq/pull/677) - Added cut_its information to SDBI export + +### `Fixed` + +- [#672](https://github.com/nf-core/ampliseq/pull/672),[#688](https://github.com/nf-core/ampliseq/pull/688),[#691](https://github.com/nf-core/ampliseq/pull/691) - Updated documentation +- [#676](https://github.com/nf-core/ampliseq/pull/676) - Phyloseq sometimes only produced one of multiple output files +- [#679](https://github.com/nf-core/ampliseq/pull/679) - Prevent masking low complexity regions by VSEARCH with lower case letters +- [#680](https://github.com/nf-core/ampliseq/pull/680),[#673](https://github.com/nf-core/ampliseq/pull/673) - Improved pipeline summary report & error messages +- [#683](https://github.com/nf-core/ampliseq/pull/683) - Template update for nf-core/tools version 2.11 +- [#687](https://github.com/nf-core/ampliseq/pull/687) - Correct conda package for ASV SSU filtering + +### `Dependencies` + +### `Removed` + ## nf-core/ampliseq version 2.7.1 - 2023-11-14 ### `Added` diff --git a/CITATIONS.md b/CITATIONS.md index ee03b01c8..73e92bc00 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -41,6 +41,10 @@ > Quast C, Pruesse E, Yilmaz P, Gerken J, Schweer T, Yarza P, Peplies J, Glöckner FO. The SILVA ribosomal RNA gene database project: improved data processing and web-based tools. Nucleic Acids Res. 2013 Jan;41(Database issue):D590-6. doi: 10.1093/nar/gks1219. Epub 2012 Nov 28. PMID: 23193283; PMCID: PMC3531112. +- [Greengenes2](https://doi.org/10.1038/s41587-023-01845-1) + + > McDonald, D., Jiang, Y., Balaban, M. et al. Greengenes2 unifies microbial data in a single reference tree. Nat Biotechnol (2023). https://doi.org/10.1038/s41587-023-01845-1 + - [PR2 - Protist Reference Ribosomal Database](https://pubmed.ncbi.nlm.nih.gov/23193267/) > Guillou L, Bachar D, Audic S, Bass D, Berney C, Bittner L, Boutte C, Burgaud G, de Vargas C, Decelle J, Del Campo J, Dolan JR, Dunthorn M, Edvardsen B, Holzmann M, Kooistra WH, Lara E, Le Bescot N, Logares R, Mahé F, Massana R, Montresor M, Morard R, Not F, Pawlowski J, Probert I, Sauvadet AL, Siano R, Stoeck T, Vaulot D, Zimmermann P, Christen R. The Protist Ribosomal Reference database (PR2): a catalog of unicellular eukaryote small sub-unit rRNA sequences with curated taxonomy. Nucleic Acids Res. 2013 Jan;41(Database issue):D597-604. doi: 10.1093/nar/gks1160. Epub 2012 Nov 27. PMID: 23193267; PMCID: PMC3531120. @@ -61,13 +65,21 @@ > Kõljalg U, Larsson KH, Abarenkov K, Nilsson RH, Alexander IJ, Eberhardt U, Erland S, Høiland K, Kjøller R, Larsson E, Pennanen T, Sen R, Taylor AF, Tedersoo L, Vrålstad T, Ursing BM. UNITE: a database providing web-based methods for the molecular identification of ectomycorrhizal fungi. New Phytol. 2005 Jun;166(3):1063-8. doi: 10.1111/j.1469-8137.2005.01376.x. PMID: 15869663. - - [MIDORI2 - a collection of reference databases](https://doi.org/10.1002/edn3.303/) +- [MIDORI2 - a collection of reference databases](https://doi.org/10.1002/edn3.303/) + + > Leray, M., Knowlton, N., & Machida, R. J. (2022). MIDORI2: A collection of quality controlled, preformatted, and regularly updated reference databases for taxonomic assignment of eukaryotic mitochondrial sequences. Environmental DNA, 4, 894– 907. doi: https://doi.org/10.1002/edn3.303. + +- [COIDB - CO1 Taxonomy Database](https://doi.org/10.17044/scilifelab.20514192.v2) + + > Sundh J, Manoharan L, Iwaszkiewicz-Eggebrecht E, Miraldo A, Andersson A, Ronquist F. COI reference sequences from BOLD DB. doi: https://doi.org/10.17044/scilifelab.20514192.v2. + +- [PhytoRef plastid 16S rRNA database for photosynthetic eukaryotes](https://pubmed.ncbi.nlm.nih.gov/25740460/) - > Leray, M., Knowlton, N., & Machida, R. J. (2022). MIDORI2: A collection of quality controlled, preformatted, and regularly updated reference databases for taxonomic assignment of eukaryotic mitochondrial sequences. Environmental DNA, 4, 894– 907. https://doi.org/10.1002/edn3.303. + > Decelle J, Romac S, Stern RF, Bendif el M, Zingone A, Audic S, Guiry MD, Guillou L, Tessier D, Le Gall F, Gourvil P, Dos Santos AL, Probert I, Vaulot D, de Vargas C, Christen R. PhytoREF: a reference database of the plastidial 16S rRNA gene of photosynthetic eukaryotes with curated taxonomy. Mol Ecol Resour. 2015 Nov;15(6):1435-45. doi: 10.1111/1755-0998.12401. Epub 2015 Apr 6. PMID: 25740460. - - [COIDB - CO1 Taxonomy Database](https://doi.org/10.17044/scilifelab.20514192.v2) +- [Zehr lab nifH database](http://doi.org/10.5281/zenodo.7996213) - > Sundh J, Manoharan L, Iwaszkiewicz-Eggebrecht E, Miraldo A, Andersson A, Ronquist F. COI reference sequences from BOLD DB. doi: https://doi.org/10.17044/scilifelab.20514192.v2. + > M. A. Moynihan & C. Furbo Reeder 2023. nifHdada2 GitHub repository, v2.0.5. Zenodo. doi: http://doi.org/10.5281/zenodo.7996213 ### Phylogenetic placement diff --git a/README.md b/README.md index ec59cee55..2e04e6cc8 100644 --- a/README.md +++ b/README.md @@ -47,11 +47,8 @@ By default, the pipeline currently performs the following: ## Usage -:::note -If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how -to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) -with `-profile test` before running the workflow on actual data. -::: +> [!NOTE] +> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. First, you need to know whether the sequencing files at hand are expected to contain primer sequences (usually yes) and if yes, what primer sequences. In the example below, the paired end sequencing data was produced with 515f (GTGYCAGCMGCCGCGGTAA) and 806r (GGACTACNVGGGTWTCTAAT) primers of the V4 region of the 16S rRNA gene. Please note, that those sequences should not contain any sequencing adapter sequences, only the sequence that matches the biological amplicon. @@ -68,19 +65,15 @@ nextflow run nf-core/ampliseq \ --outdir ``` -:::note -Adding metadata will considerably increase the output, see [metadata documentation](https://nf-co.re/ampliseq/usage#metadata). -::: +> [!NOTE] +> Adding metadata will considerably increase the output, see [metadata documentation](https://nf-co.re/ampliseq/usage#metadata). -:::note -By default the taxonomic assignment will be performed with DADA2 on SILVA database, but there are various tools and databases readily available, see [taxonomic classification documentation](https://nf-co.re/ampliseq/usage#taxonomic-classification). -::: +> [!TIP] +> By default the taxonomic assignment will be performed with DADA2 on SILVA database, but there are various tools and databases readily available, see [taxonomic classification documentation](https://nf-co.re/ampliseq/usage#taxonomic-classification). -:::warning -Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those -provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; -see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). -::: +> [!WARNING] +> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; +> see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/ampliseq/usage) and the [parameter documentation](https://nf-co.re/ampliseq/parameters). diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 4fd6b6ea9..af96c9d1a 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,7 @@ report_comment: > - This report has been generated by the nf-core/ampliseq + This report has been generated by the nf-core/ampliseq analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-ampliseq-methods-description": order: -1000 diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 8c8fc21e7..752a6b17a 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -181,17 +181,13 @@ supporting denoising of any amplicon and supports a variety of taxonomic databas ```{r, results='asis'} if ( !isFALSE(params$metadata) ) { - cat(paste0(" -# Data input and Metadata - -Pipeline input was saved to the [input](../input) directory. - ")) + cat("# Data input and Metadata\n\n") } else { - cat(paste0(" -# Data input + cat("# Data input\n\n") +} -Pipeline input was saved in folder [input](../input). - ")) +if ( !isFALSE(params$metadata) || !isFALSE(params$input_samplesheet) ) { + cat("Pipeline input was saved in folder [input](../input).\n\n") } if ( !isFALSE(params$input_samplesheet) ) { @@ -262,8 +258,7 @@ the denoising tool or sequences might be lost due to being labelled as PCR chime # import tsv cutadapt_summary <- read.table(file = params$cutadapt_summary, header = TRUE, sep = "\t") -cutadapt_passed_col <- as.numeric(substr( - cutadapt_summary$cutadapt_passing_filters_percent, 1, 4)) +cutadapt_passed_col <- as.numeric( gsub("%","",cutadapt_summary$cutadapt_passing_filters_percent) ) cutadapt_max_discarded <- round( 100 - min(cutadapt_passed_col), 1 ) cutadapt_avg_passed <- round(mean(cutadapt_passed_col),1) @@ -980,9 +975,15 @@ cat("\n\nDADA2 taxonomy assignments can be found in folder [dada2](../dada2) in # Header cat("## QIIME2\n") -cat("The taxonomic classification was performed by [QIIME2](https://www.nature.com/articles/s41587-019-0209-9) - using the database: `", params$qiime2_ref_tax_title, "`. - More details about the reference taxonomy database can be found in the ['Methods section'](#methods).\n\n", sep = "") +# indicate reference taxonomy +if ( !isFALSE(params$qiime2_ref_tax_title) ) { + cat("The taxonomic classification was performed by [QIIME2](https://www.nature.com/articles/s41587-019-0209-9) + using the database: `", params$qiime2_ref_tax_title, "`. + More details about the reference taxonomy database can be found in the ['Methods section'](#methods).\n\n", sep = "") +} else { + cat("The taxonomic classification was performed by [QIIME2](https://www.nature.com/articles/s41587-019-0209-9) using a custom database ", + "provided by the user.\n\n", sep = "") +} # Read file and prepare table asv_tax <- read.table(params$qiime2_taxonomy, header = TRUE, sep = "\t") diff --git a/assets/slackreport.json b/assets/slackreport.json index b170caabe..6eab3738f 100644 --- a/assets/slackreport.json +++ b/assets/slackreport.json @@ -3,7 +3,7 @@ { "fallback": "Plain-text summary of the attachment.", "color": "<% if (success) { %>good<% } else { %>danger<%} %>", - "author_name": "nf-core/ampliseq v${version} - ${runName}", + "author_name": "nf-core/ampliseq ${version} - ${runName}", "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", "fields": [ diff --git a/bin/sbdiexportreannotate.R b/bin/sbdiexportreannotate.R index 19d5e3ae6..68a2b3928 100755 --- a/bin/sbdiexportreannotate.R +++ b/bin/sbdiexportreannotate.R @@ -18,7 +18,10 @@ dbversion <- args[1] taxfile <- args[2] taxmethod <- args[3] wfversion <- args[4] -predfile <- args[5] +cut_its <- args[5] +predfile <- args[6] + +cut_its = ifelse(cut_its == 'none', '', paste(' cut_its:', cut_its, sep='')) # Read taxonomy table taxonomy <- read.delim(taxfile, sep = '\t', stringsAsFactors = FALSE) @@ -108,10 +111,10 @@ taxtable <- taxonomy %>% date_identified = as.character(lubridate::today()), reference_db = dbversion, annotation_algorithm = case_when( - (taxmethod == 'sintax') ~ paste('Ampliseq',wfversion,'(https://nf-co.re/ampliseq) VSEARCH:sintax', sep=' '), - (!(is.na(otu) | otu == '')) ~ paste('Ampliseq',wfversion,'(https://nf-co.re/ampliseq) addsh', sep=' '), - (!(is.na(species_exact) | species_exact == '')) ~ paste('Ampliseq',wfversion,'(https://nf-co.re/ampliseq) DADA2:assignTaxonomy:addSpecies', sep=' '), - TRUE ~ paste('Ampliseq',wfversion,'(https://nf-co.re/ampliseq) DADA2:assignTaxonomy', sep=' ') + (taxmethod == 'sintax') ~ paste('Ampliseq ',wfversion,' (https://nf-co.re/ampliseq) VSEARCH:sintax',cut_its, sep=' '), + (!(is.na(otu) | otu == '')) ~ paste('Ampliseq ',wfversion,' (https://nf-co.re/ampliseq) addsh',cut_its, sep=' '), + (!(is.na(species_exact) | species_exact == '')) ~ paste('Ampliseq ',wfversion,' (https://nf-co.re/ampliseq) DADA2:assignTaxonomy:addSpecies',cut_its, sep=' '), + TRUE ~ paste('Ampliseq ',wfversion,' (https://nf-co.re/ampliseq) DADA2:assignTaxonomy',cut_its, sep='') ), identification_references = 'https://docs.biodiversitydata.se/analyse-data/molecular-tools/#taxonomy-annotation', taxon_remarks = ifelse(!(is.na(domain) | domain == ''), paste('Domain = \'',domain,'\'',sep=''),''), diff --git a/bin/taxref_reformat_phytoref.sh b/bin/taxref_reformat_phytoref.sh new file mode 100755 index 000000000..c61c081ed --- /dev/null +++ b/bin/taxref_reformat_phytoref.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +# Write the assignTaxonomy() fasta file: assignTaxonomy.fna +cat PhytoRef_with_taxonomy.fasta | sed '/>/s/>[^|]*|/>/' | sed '/>/s/|/;/g' > assignTaxonomy.fna + +# Write the addSpecies() fasta file: addSpecies.fna +cat PhytoRef_with_taxonomy.fasta | sed '/^>/s/>\([^|]\+\)|.*|\([^|]\+\)/>\1 \2/' > addSpecies.fna diff --git a/bin/taxref_reformat_qiime_greengenes2022.sh b/bin/taxref_reformat_qiime_greengenes2022.sh new file mode 100755 index 000000000..69c75faed --- /dev/null +++ b/bin/taxref_reformat_qiime_greengenes2022.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +# Decompress files. +gzip -c -d *.seqs.fna.gz > greengenes2.fna +gzip -c -d *.taxonomy.md5.tsv.gz > greengenes2.tax diff --git a/bin/taxref_reformat_zehr-nifh.sh b/bin/taxref_reformat_zehr-nifh.sh new file mode 100755 index 000000000..54171f51b --- /dev/null +++ b/bin/taxref_reformat_zehr-nifh.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +# Write the assignTaxonomy() fasta file: assignTaxonomy.fna +cp *.fasta assignTaxonomy.fna + +# Write the addSpecies() fasta file: addSpecies.fna +cut -d, -f 2,6,7 *.csv | grep -v '^sequence,' | sed 's/\(.*\),[0-9]* \(.*\),\(.*\)/>\3 \2\n\1/' > addSpecies.fna diff --git a/conf/base.config b/conf/base.config index c628a10c3..c16be532b 100644 --- a/conf/base.config +++ b/conf/base.config @@ -63,4 +63,9 @@ process { withName:CUSTOM_DUMPSOFTWAREVERSIONS { cache = false } + withName:QIIME2_EXTRACT { + cpus = { check_max( 12 * task.attempt, 'cpus' ) } + memory = { check_max( 12.GB * task.attempt, 'memory' ) } + time = { check_max( 24.h * task.attempt, 'time' ) } + } } diff --git a/conf/modules.config b/conf/modules.config index 68794ab70..7b28c7e92 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -355,8 +355,7 @@ process { ext.seed = "${params.seed}" ext.args = [ 'minBoot = 50', - params.pacbio ? "tryRC = TRUE" : - params.iontorrent ? "tryRC = TRUE" : "" + params.dada_taxonomy_rc || params.pacbio || params.iontorrent ? "tryRC = TRUE" : "tryRC = FALSE" ].join(',').replaceAll('(,)*$', "") publishDir = [ [ @@ -375,9 +374,9 @@ process { withName: DADA2_ADDSPECIES { ext.seed = "${params.seed}" ext.args = [ - 'allowMultiple = FALSE, n = 1e5', - params.pacbio ? "tryRC = TRUE" : - params.iontorrent ? "tryRC = TRUE" : "" + 'n = 1e5', + params.dada_addspecies_allowmultiple ? "allowMultiple = TRUE" : "", + params.dada_taxonomy_rc || params.pacbio || params.iontorrent ? "tryRC = TRUE" : "tryRC = FALSE" ].join(',').replaceAll('(,)*$', "") publishDir = [ [ @@ -451,7 +450,7 @@ process { } withName: VSEARCH_CLUSTER { - ext.args = "--id ${params.vsearch_cluster_id} --usersort" + ext.args = "--id ${params.vsearch_cluster_id} --usersort --qmask 'none'" ext.args2 = '--cluster_smallmem' ext.args3 = '--clusters' } @@ -836,7 +835,7 @@ process { } withName: MULTIQC { - ext.args = params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' + ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ path: { "${params.outdir}/multiqc" }, mode: params.publish_dir_mode, diff --git a/conf/ref_databases.config b/conf/ref_databases.config index c80820ecc..e89df3383 100644 --- a/conf/ref_databases.config +++ b/conf/ref_databases.config @@ -25,22 +25,6 @@ params { fmtscript = "taxref_reformat_coidb.sh" dbversion = "COIDB 221216 (https://doi.org/10.17044/scilifelab.20514192.v2)" } - 'midori2-co1' { - title = "MIDORI2 - CO1 Taxonomy Database - Release GB250" - file = [ "http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz" ] - citation = "Machida RJ, Leray M, Ho SL, Knowlton N. Metazoan mitochondrial gene sequence reference datasets for taxonomic assignment of environmental samples. Sci Data. 2017 Mar 14;4:170027. doi: 10.1038/sdata.2017.27. PMID: 28291235; PMCID: PMC5349245." - fmtscript = "taxref_reformat_midori2.sh" - dbversion = "MIDORI2-CO1 GB250 (http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz)" - taxlevels = "Phylum,Class,Order,Family,Genus,Species" - } - 'midori2-co1=gb250' { - title = "MIDORI2 - CO1 Taxonomy Database - Release GB250" - file = [ "http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz" ] - citation = "Machida RJ, Leray M, Ho SL, Knowlton N. Metazoan mitochondrial gene sequence reference datasets for taxonomic assignment of environmental samples. Sci Data. 2017 Mar 14;4:170027. doi: 10.1038/sdata.2017.27. PMID: 28291235; PMCID: PMC5349245." - fmtscript = "taxref_reformat_midori2.sh" - dbversion = "MIDORI2-CO1 GB250 (http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz)" - taxlevels = "Phylum,Class,Order,Family,Genus,Species" - } 'gtdb' { title = "GTDB - Genome Taxonomy Database - Release R08-RS214.1" file = [ "https://data.ace.uq.edu.au/public/gtdb/data/releases/release214/214.1/genomic_files_reps/bac120_ssu_reps_r214.tar.gz", "https://data.ace.uq.edu.au/public/gtdb/data/releases/release214/214.1/genomic_files_reps/ar53_ssu_reps_r214.tar.gz" ] @@ -76,6 +60,30 @@ params { fmtscript = "taxref_reformat_gtdb.sh" dbversion = "GTDB R05-RS95 (https://data.ace.uq.edu.au/public/gtdb/data/releases/release95/95.0/)" } + 'midori2-co1' { + title = "MIDORI2 - CO1 Taxonomy Database - Release GB250" + file = [ "http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz" ] + citation = "Machida RJ, Leray M, Ho SL, Knowlton N. Metazoan mitochondrial gene sequence reference datasets for taxonomic assignment of environmental samples. Sci Data. 2017 Mar 14;4:170027. doi: 10.1038/sdata.2017.27. PMID: 28291235; PMCID: PMC5349245." + fmtscript = "taxref_reformat_midori2.sh" + dbversion = "MIDORI2-CO1 GB250 (http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz)" + taxlevels = "Phylum,Class,Order,Family,Genus,Species" + } + 'midori2-co1=gb250' { + title = "MIDORI2 - CO1 Taxonomy Database - Release GB250" + file = [ "http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz" ] + citation = "Machida RJ, Leray M, Ho SL, Knowlton N. Metazoan mitochondrial gene sequence reference datasets for taxonomic assignment of environmental samples. Sci Data. 2017 Mar 14;4:170027. doi: 10.1038/sdata.2017.27. PMID: 28291235; PMCID: PMC5349245." + fmtscript = "taxref_reformat_midori2.sh" + dbversion = "MIDORI2-CO1 GB250 (http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz)" + taxlevels = "Phylum,Class,Order,Family,Genus,Species" + } + 'phytoref' { + title = "PhytoRef plastid 16S rRNA database for photosynthetic eukaryotes" + file = [ "http://phytoref.sb-roscoff.fr/static/downloads/PhytoRef_with_taxonomy.fasta" ] + citation = "Decelle, Johan, Sarah Romac, Rowena F. Stern, El Mahdi Bendif, Adriana Zingone, Stéphane Audic, Michael D. Guiry, et al. 2015. PhytoREF: A Reference Database of the Plastidial 16S rRNA Gene of Photosynthetic Eukaryotes with Curated Taxonomy. Molecular Ecology Resources 15 (6): 1435–45. https://doi.org/10.1111/1755-0998.12401." + fmtscript = "taxref_reformat_phytoref.sh" + dbversion = "unversioned" + taxlevels = "Domain,Supergroup,Subphylum,Class,Subclass,Order,Suborder,Family,Genus,Species" + } 'pr2' { title = "PR2 - Protist Reference Ribosomal Database - Version 5.0.0" file = [ "https://github.com/pr2database/pr2database/releases/download/v5.0.0/pr2_version_5.0.0_SSU_dada2.fasta.gz", "https://github.com/pr2database/pr2database/releases/download/v5.0.0/pr2_version_5.0.0_SSU_UTAX.fasta.gz" ] @@ -239,6 +247,22 @@ params { dbversion = "UNITE-alleuk v8.2 (https://doi.org/10.15156/BIO/786370)" shfile = [ "https://scilifelab.figshare.com/ndownloader/files/34994569", "https://scilifelab.figshare.com/ndownloader/files/34994572"] } + 'zehr-nifh' { + title = "Zehr lab nifH database - version 2.5.0" + file = [ "https://raw.githubusercontent.com/moyn413/nifHdada2/master/nifH_dada2_v2.0.5.fasta", "https://raw.githubusercontent.com/moyn413/nifHdada2/master/nifH_dada2_phylum_v2.0.5.csv" ] + citation = "M. A. Moynihan & C. Furbo Reeder 2023. nifHdada2 GitHub repository, v2.0.5. Zenodo. http://doi.org/10.5281/zenodo.7996213" + fmtscript = "taxref_reformat_zehr-nifh.sh" + dbversion = "Zehr-nifH v. 2.5.0" + taxlevels = "Domain,Phylum,Class,Order,Family,Genus" + } + 'zehr-nifh=2.5.0' { + title = "Zehr lab nifH database - version 2.5.0" + file = [ "https://raw.githubusercontent.com/moyn413/nifHdada2/master/nifH_dada2_v2.0.5.fasta", "https://raw.githubusercontent.com/moyn413/nifHdada2/master/nifH_dada2_phylum_v2.0.5.csv" ] + citation = "M. A. Moynihan & C. Furbo Reeder 2023. nifHdada2 GitHub repository, v2.0.5. Zenodo. http://doi.org/10.5281/zenodo.7996213" + fmtscript = "taxref_reformat_zehr-nifh.sh" + dbversion = "Zehr-nifH v. 2.5.0" + taxlevels = "Domain,Phylum,Class,Order,Family,Genus" + } } //QIIME2 taxonomic reference databases qiime_ref_databases { @@ -306,6 +330,18 @@ params { citation = "McDonald, D., Price, M., Goodrich, J. et al. An improved Greengenes taxonomy with explicit ranks for ecological and evolutionary analyses of bacteria and archaea. ISME J 6, 610–618 (2012). https://doi.org/10.1038/ismej.2011.139" fmtscript = "taxref_reformat_qiime_greengenes85.sh" } + 'greengenes2' { + title = "Greengenes2 16S - Version 2022.10" + file = [ "http://ftp.microbio.me/greengenes_release/2022.10/2022.10.seqs.fna.gz", "http://ftp.microbio.me/greengenes_release/2022.10/2022.10.taxonomy.md5.tsv.gz" ] + citation = "McDonald, D., Jiang, Y., Balaban, M. et al. Greengenes2 unifies microbial data in a single reference tree. Nat Biotechnol (2023). https://doi.org/10.1038/s41587-023-01845-1" + fmtscript = "taxref_reformat_qiime_greengenes2022.sh" + } + 'greengenes2=2022.10' { + title = "Greengenes2 16S - Version 2022.10" + file = [ "http://ftp.microbio.me/greengenes_release/2022.10/2022.10.seqs.fna.gz", "http://ftp.microbio.me/greengenes_release/2022.10/2022.10.taxonomy.md5.tsv.gz" ] + citation = "McDonald, D., Jiang, Y., Balaban, M. et al. Greengenes2 unifies microbial data in a single reference tree. Nat Biotechnol (2023). https://doi.org/10.1038/s41587-023-01845-1" + fmtscript = "taxref_reformat_qiime_greengenes2022.sh" + } } //Sintax taxonomic reference databases sintax_ref_databases { diff --git a/conf/test_qiimecustom.config b/conf/test_qiimecustom.config new file mode 100644 index 000000000..dd02eb4e9 --- /dev/null +++ b/conf/test_qiimecustom.config @@ -0,0 +1,33 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/ampliseq -profile test_qiimecustom, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test custom QIIME2 reference taxonomy database profile' + config_profile_description = 'Minimal test dataset to check --qiime_ref_tax_custom' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + FW_primer = "GTGYCAGCMGCCGCGGTAA" + RV_primer = "GGACTACNVGGGTWTCTAAT" + input = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/Samplesheet.tsv" + + // Custom reference taxonomy + qiime_ref_tax_custom = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/testdata/85_greengenes.fna.gz,https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/testdata/85_greengenes.tax.gz" + + // Skip downstream analysis with QIIME2 + skip_qiime_downstream = true + skip_dada_taxonomy = true +} diff --git a/conf/test_reftaxcustom.config b/conf/test_reftaxcustom.config index 4233d1ea0..40408bfb1 100644 --- a/conf/test_reftaxcustom.config +++ b/conf/test_reftaxcustom.config @@ -30,7 +30,8 @@ params { dada_assign_taxlevels = "Kingdom,Phylum,Class,Order,Family,Genus" kraken2_ref_tax_custom = "https://genome-idx.s3.amazonaws.com/kraken/16S_Greengenes13.5_20200326.tgz" kraken2_assign_taxlevels = "D,P,C,O" + qiime_ref_tax_custom = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/testdata/85_greengenes.tar.gz" // Skip downstream analysis with QIIME2 - skip_qiime = true + skip_qiime_downstream = true } diff --git a/docs/output.md b/docs/output.md index f12fc41fa..c2b935373 100644 --- a/docs/output.md +++ b/docs/output.md @@ -140,7 +140,7 @@ DADA2 reduces sequence errors and dereplicates sequences by quality filtering, d - `ASV_table.tsv`: Counts for each ASV sequence. - `DADA2_stats.tsv`: Tracking read numbers through DADA2 processing steps, for each sample. - `DADA2_table.rds`: DADA2 ASV table as R object. - - `DADA2_tables.tsv`: DADA2 ASV table. + - `DADA2_table.tsv`: DADA2 ASV table. - `dada2/args/`: Directory containing files with all parameters for DADA2 steps. - `dada2/log/`: Directory containing log files for DADA2 steps. - `dada2/QC/` @@ -407,7 +407,7 @@ All following analysis is based on these filtered tables. - `seven_number_summary.tsv`: Length of ASV sequences in different quantiles. - `filtered-sequences.qza`: QIIME2 fragment. - `qiime2/abundance_tables/` - - `abs-abund-table-*.tsv`: Tab-separated absolute abundance table at taxa level `*`, where `*` ranges by default from 2 to 6 or 7, depending on the used reference taxonomy database. + - `abs-abund-table-*.tsv`: Tab-separated absolute abundance table at taxa level `*`, where `*` ranges by default from 2 to 6, specified by the `--tax_agglom_min` and `--tax_agglom_max` parameters. - `count_table_filter_stats.tsv`: Tab-separated table with information on how much counts were filtered for each sample. - `feature-table.biom`: Abundance table in biom format for importing into downstream analysis tools. - `feature-table.tsv`: Tab-separated abundance table for each ASV and each sample. @@ -423,7 +423,7 @@ Absolute abundance tables produced by the previous steps contain count data, but Output files - `qiime2/rel_abundance_tables/` - - `rel-table-*.tsv`: Tab-separated absolute abundance table at taxa level `*`, where `*` ranges by default from 2 to 6 or 7, depending on the used reference taxonomy database. + - `rel-table-*.tsv`: Tab-separated absolute abundance table at taxa level `*`, where `*` ranges by default from 2 to 6, specified by the `--tax_agglom_min` and `--tax_agglom_max` parameters. - `rel-table-ASV.tsv`: Tab-separated relative abundance table for all ASVs. - `rel-table-ASV_with-DADA2-tax.tsv`: Tab-separated table for all ASVs with DADA2 taxonomic classification, sequence and relative abundance. - `rel-table-ASV_with-QIIME2-tax.tsv`: Tab-separated table for all ASVs with QIIME2 taxonomic classification, sequence and relative abundance. diff --git a/docs/usage.md b/docs/usage.md index 38c2cc23f..acf62a379 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -221,18 +221,21 @@ Pre-configured reference taxonomy databases are: | Database key | DADA2 | SINTAX | Kraken2 | QIIME2 | Target genes | | ------------ | ----- | ------ | ------- | ------ | --------------------------------------------- | | silva | + | - | + | + | 16S rRNA | -| gtdb | + | - | - | - | 16S rRNA | +| gtdb | +¹ | - | - | - | 16S rRNA | | sbdi-gtdb | + | - | - | - | 16S rRNA | | rdp | + | - | + | - | 16S rRNA | -| greengenes | - | - | + | (+)¹ | 16S rRNA | +| greengenes | - | - | + | (+)² | 16S rRNA | +| greengenes2 | - | - | - | + | 16S rRNA | | pr2 | + | - | - | - | 18S rRNA | | unite-fungi | + | + | - | + | eukaryotic nuclear ribosomal ITS region | | unite-alleuk | + | + | - | + | eukaryotic nuclear ribosomal ITS region | | coidb | + | + | - | - | eukaryotic Cytochrome Oxidase I (COI) | | midori2-co1 | + | - | - | - | eukaryotic Cytochrome Oxidase I (COI) | -| standard | - | - | + | - | any in genomes of archaea, bacteria, viruses² | +| phytoref | + | - | - | - | eukaryotic plastid 16S rRNA | +| zehr-nifh | + | - | - | - | Nitrogenase iron protein NifH | +| standard | - | - | + | - | any in genomes of archaea, bacteria, viruses³ | -¹: de-replicated at 85%, only for testing purposes; ²: quality of results might vary +¹[`--dada_taxonomy_rc`](https://nf-co.re/ampliseq/parameters#dada_taxonomy_rc) is recommended; ²: de-replicated at 85%, only for testing purposes; ³: quality of results might vary Special features of taxonomic classification tools: @@ -241,7 +244,7 @@ Special features of taxonomic classification tools: - QIIME2's reference taxonomy databases will have regions matching the amplicon extracted with primer sequences. - DADA2, Kraken2, and QIIME2 have specific parameters to accept custom databases (but theoretically possible with all classifiers) -Parameter guidance is given in [nf-core/ampliseq website parameter documentation](https://nf-co.re/ampliseq/parameters/#taxonomic-database). +Parameter guidance is given in [nf-core/ampliseq website parameter documentation](https://nf-co.re/ampliseq/parameters/#taxonomic-database). Citations are listed in [`CITATIONS.md`](CITATIONS.md). ### Metadata diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index 01b8653d0..e248e4c3f 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -4,6 +4,7 @@ import org.yaml.snakeyaml.Yaml import groovy.json.JsonOutput +import nextflow.extension.FilesEx class NfcoreTemplate { @@ -141,12 +142,14 @@ class NfcoreTemplate { try { if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } // Try to send HTML e-mail using sendmail + def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + sendmail_tf.withWriter { w -> w << sendmail_html } [ 'sendmail', '-t' ].execute() << sendmail_html log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" } catch (all) { // Catch failures and try with plaintext def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] - if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) { + if ( mqc_report != null && mqc_report.size() <= max_multiqc_email_size.toBytes() ) { mail_cmd += [ '-A', mqc_report ] } mail_cmd.execute() << email_html @@ -155,14 +158,16 @@ class NfcoreTemplate { } // Write summary e-mail HTML to a file - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def output_hf = new File(output_d, "pipeline_report.html") + def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") output_hf.withWriter { w -> w << email_html } - def output_tf = new File(output_d, "pipeline_report.txt") + FilesEx.copyTo(output_hf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.html"); + output_hf.delete() + + // Write summary e-mail TXT to a file + def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") output_tf.withWriter { w -> w << email_txt } + FilesEx.copyTo(output_tf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.txt"); + output_tf.delete() } // @@ -227,15 +232,14 @@ class NfcoreTemplate { // Dump pipeline parameters in a json file // public static void dump_parameters(workflow, params) { - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') - def output_pf = new File(output_d, "params_${timestamp}.json") + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") def jsonStr = JsonOutput.toJson(params) - output_pf.text = JsonOutput.prettyPrint(jsonStr) + temp_pf.text = JsonOutput.prettyPrint(jsonStr) + + FilesEx.copyTo(temp_pf.toPath(), "${params.outdir}/pipeline_info/params_${timestamp}.json") + temp_pf.delete() } // diff --git a/lib/WorkflowAmpliseq.groovy b/lib/WorkflowAmpliseq.groovy index 5e1039115..25db3ed68 100755 --- a/lib/WorkflowAmpliseq.groovy +++ b/lib/WorkflowAmpliseq.groovy @@ -77,12 +77,12 @@ class WorkflowAmpliseq { } if (params.skip_dada_taxonomy && params.sbdiexport) { - if (!params.sintax_ref_taxonomy && (params.skip_qiime || !params.qiime_ref_taxonomy)) { + if (!params.sintax_ref_taxonomy && (params.skip_qiime || (!params.qiime_ref_taxonomy && !params.qiime_ref_tax_custom))) { Nextflow.error("Incompatible parameters: `--sbdiexport` expects taxa annotation and therefore annotation with either DADA2, SINTAX, or QIIME2 is needed.") } } - if ( (!params.FW_primer || !params.RV_primer) && params.qiime_ref_taxonomy && !params.skip_qiime && !params.skip_taxonomy ) { + if ( (!params.FW_primer || !params.RV_primer) && (params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && !params.skip_qiime && !params.skip_taxonomy ) { Nextflow.error("Incompatible parameters: `--FW_primer` and `--RV_primer` are required for cutting the QIIME2 reference database to the amplicon sequences. Please specify primers or do not use `--qiime_ref_taxonomy`.") } @@ -90,8 +90,8 @@ class WorkflowAmpliseq { Nextflow.error("Incompatible parameters: `--FW_primer` and `--RV_primer` are required for cutting the DADA2 reference database to the amplicon sequences. Please specify primers or do not use `--cut_dada_ref_taxonomy`.") } - if (params.qiime_ref_taxonomy && params.classifier) { - Nextflow.error("Incompatible parameters: `--qiime_ref_taxonomy` will produce a classifier but `--classifier` points to a precomputed classifier, therefore, only use one of those.") + if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && params.classifier) { + Nextflow.error("Incompatible parameters: `--qiime_ref_taxonomy` and `--qiime_ref_tax_custom` will produce a classifier but `--classifier` points to a precomputed classifier, therefore, only use one of those.") } if (params.kraken2_ref_tax_custom && !params.kraken2_assign_taxlevels ) { diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index 7f49735e4..4b7ec2afc 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -34,7 +34,7 @@ class WorkflowMain { if (params.sintax_ref_taxonomy && !params.skip_taxonomy) { sintaxreftaxonomyExistsError(params, log) } - if (params.qiime_ref_taxonomy && !params.skip_taxonomy && !params.classifier) { + if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && !params.skip_taxonomy && !params.classifier) { qiimereftaxonomyExistsError(params, log) } diff --git a/modules.json b/modules.json index b85f77f5d..595c024f8 100644 --- a/modules.json +++ b/modules.json @@ -7,7 +7,7 @@ "nf-core": { "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "c7494026693ba1a7db683e1520816709db3f05a0", + "git_sha": "bba7e362e4afead70653f84d8700588ea28d0f9e", "installed_by": ["modules"] }, "cutadapt": { @@ -27,7 +27,7 @@ }, "fastqc": { "branch": "master", - "git_sha": "c7494026693ba1a7db683e1520816709db3f05a0", + "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53", "installed_by": ["modules"] }, "gappa/examineassign": { @@ -78,7 +78,12 @@ }, "multiqc": { "branch": "master", - "git_sha": "a6e11ac655e744f7ebc724be669dd568ffdc0e80", + "git_sha": "4ab13872435962dadc239979554d13709e20bf29", + "installed_by": ["modules"] + }, + "pigz/uncompress": { + "branch": "master", + "git_sha": "4ef7becf6a2bbc8df466885d10b4051d1f318a6a", "installed_by": ["modules"] }, "untar": { diff --git a/modules/local/filter_ssu.nf b/modules/local/filter_ssu.nf index 5b3c623c6..314a63c70 100644 --- a/modules/local/filter_ssu.nf +++ b/modules/local/filter_ssu.nf @@ -2,7 +2,7 @@ process FILTER_SSU { tag "${fasta}" label 'process_low' - conda "bioconductor::biostrings=2.58.0" + conda "bioconda::bioconductor-biostrings=2.58.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bioconductor-biostrings:2.58.0--r40h037d062_0' : 'biocontainers/bioconductor-biostrings:2.58.0--r40h037d062_0' }" diff --git a/modules/local/phyloseq.nf b/modules/local/phyloseq.nf index bbc6218b3..946c91fa0 100644 --- a/modules/local/phyloseq.nf +++ b/modules/local/phyloseq.nf @@ -8,8 +8,7 @@ process PHYLOSEQ { 'biocontainers/bioconductor-phyloseq:1.44.0--r43hdfd78af_0' }" input: - tuple val(prefix), path(tax_tsv) - path otu_tsv + tuple val(prefix), path(tax_tsv), path(otu_tsv) path sam_tsv path tree diff --git a/modules/local/qiime2_extract.nf b/modules/local/qiime2_extract.nf index f3a61b6e5..7ff383fd8 100644 --- a/modules/local/qiime2_extract.nf +++ b/modules/local/qiime2_extract.nf @@ -1,7 +1,5 @@ process QIIME2_EXTRACT { tag "${meta.FW_primer}-${meta.RV_primer}" - label 'process_low' - label 'single_cpu' container "qiime2/core:2023.7" @@ -20,6 +18,7 @@ process QIIME2_EXTRACT { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { error "QIIME2 does not support Conda. Please use Docker / Singularity / Podman instead." } + def args = task.ext.args ?: '' """ export XDG_CONFIG_HOME="./xdgconfig" export MPLCONFIGDIR="./mplconfigdir" @@ -37,9 +36,11 @@ process QIIME2_EXTRACT { --output-path ref-taxonomy.qza #Extract sequences based on primers qiime feature-classifier extract-reads \\ + --p-n-jobs ${task.cpus} \\ --i-sequences ref-seq.qza \\ --p-f-primer ${meta.FW_primer} \\ --p-r-primer ${meta.RV_primer} \\ + $args \\ --o-reads ${meta.FW_primer}-${meta.RV_primer}-ref-seq.qza \\ --quiet diff --git a/modules/local/sbdiexportreannotate.nf b/modules/local/sbdiexportreannotate.nf index f06fae364..8ebe870ce 100644 --- a/modules/local/sbdiexportreannotate.nf +++ b/modules/local/sbdiexportreannotate.nf @@ -11,6 +11,7 @@ process SBDIEXPORTREANNOTATE { path taxonomytable val taxonomymethod val dbversion + val cut_its path predictions output: @@ -28,7 +29,7 @@ process SBDIEXPORTREANNOTATE { ampliseq_version="v$workflow.manifest.version" fi - sbdiexportreannotate.R \"$dbversion\" $taxonomytable $taxonomymethod \"\$ampliseq_version\" $predictions + sbdiexportreannotate.R \"$dbversion\" $taxonomytable $taxonomymethod \"\$ampliseq_version\" $cut_its $predictions cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index d886f19bb..1a288a0fb 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -118,7 +118,8 @@ process SUMMARY_REPORT { kraken2_tax ? "kraken2_taxonomy='$kraken2_tax',kraken2_confidence='$params.kraken2_confidence'" : "", kraken2_tax && !params.kraken2_ref_tax_custom ? "kraken2_ref_tax_title='${params.kraken2_ref_databases[params.kraken2_ref_taxonomy]["title"]}',kraken2_ref_tax_file='${params.kraken2_ref_databases[params.kraken2_ref_taxonomy]["file"]}',kraken2_ref_tax_citation='${params.kraken2_ref_databases[params.kraken2_ref_taxonomy]["citation"]}'" : "", pplace_tax ? "pplace_taxonomy='$pplace_tax',pplace_heattree='$pplace_heattree'" : "", - qiime2_tax ? "qiime2_taxonomy='$qiime2_tax',qiime2_ref_tax_title='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["title"]}',qiime2_ref_tax_file='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["file"]}',qiime2_ref_tax_citation='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["citation"]}'" : "", + qiime2_tax ? "qiime2_taxonomy='$qiime2_tax'" : "", + qiime2_tax && params.qiime_ref_taxonomy ? "qiime2_ref_tax_title='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["title"]}',qiime2_ref_tax_file='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["file"]}',qiime2_ref_tax_citation='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["citation"]}'" : "", run_qiime2 ? "val_used_taxonomy='$val_used_taxonomy'" : "", filter_stats_tsv ? "filter_stats_tsv='$filter_stats_tsv',qiime2_filtertaxa='$qiime2_filtertaxa',exclude_taxa='$params.exclude_taxa',min_frequency='$params.min_frequency',min_samples='$params.min_samples'" : "", barplot ? "barplot=TRUE" : "", diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml new file mode 100644 index 000000000..f0c63f698 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/environment.yml @@ -0,0 +1,7 @@ +name: custom_dumpsoftwareversions +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::multiqc=1.17 diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index c9d014b11..7685b33cd 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -2,10 +2,10 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda "bioconda::multiqc=1.15" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.15--pyhdfd78af_0' : - 'biocontainers/multiqc:1.15--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.17--pyhdfd78af_0' : + 'biocontainers/multiqc:1.17--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml index c32657de7..5f15a5fde 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml +++ b/modules/nf-core/custom/dumpsoftwareversions/meta.yml @@ -1,4 +1,4 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: custom_dumpsoftwareversions description: Custom module used to dump software versions within the nf-core pipeline template keywords: @@ -16,7 +16,6 @@ input: type: file description: YML file containing software versions pattern: "*.yml" - output: - yml: type: file @@ -30,7 +29,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@drpatelh" - "@grst" +maintainers: + - "@drpatelh" + - "@grst" diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap index 8713b9216..4274ed57a 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap @@ -3,25 +3,25 @@ "content": [ { "0": [ - "software_versions.yml:md5,a027f820f30b8191a20ca16465daaf37" + "software_versions.yml:md5,1c851188476409cda5752ce971b20b58" ], "1": [ - "software_versions_mqc.yml:md5,ee4a1d028ad29987f9ac511f4668f17c" + "software_versions_mqc.yml:md5,2570f4ba271ad08357b0d3d32a9cf84d" ], "2": [ - "versions.yml:md5,f47ebd22aba1dd987b7e5d5247b766c3" + "versions.yml:md5,3843ac526e762117eedf8825b40683df" ], "mqc_yml": [ - "software_versions_mqc.yml:md5,ee4a1d028ad29987f9ac511f4668f17c" + "software_versions_mqc.yml:md5,2570f4ba271ad08357b0d3d32a9cf84d" ], "versions": [ - "versions.yml:md5,f47ebd22aba1dd987b7e5d5247b766c3" + "versions.yml:md5,3843ac526e762117eedf8825b40683df" ], "yml": [ - "software_versions.yml:md5,a027f820f30b8191a20ca16465daaf37" + "software_versions.yml:md5,1c851188476409cda5752ce971b20b58" ] } ], - "timestamp": "2023-10-11T17:10:02.930699" + "timestamp": "2023-11-03T14:43:22.157011" } } diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml new file mode 100644 index 000000000..405aa24ae --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml @@ -0,0 +1,2 @@ +custom/dumpsoftwareversions: + - modules/nf-core/custom/dumpsoftwareversions/** diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastqc/environment.yml new file mode 100644 index 000000000..1787b38a9 --- /dev/null +++ b/modules/nf-core/fastqc/environment.yml @@ -0,0 +1,7 @@ +name: fastqc +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::fastqc=0.12.1 diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 67209f793..9e19a74c5 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -2,7 +2,7 @@ process FASTQC { tag "$meta.id" label 'process_medium' - conda "bioconda::fastqc=0.12.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' : 'biocontainers/fastqc:0.12.1--hdfd78af_0' }" @@ -37,7 +37,7 @@ process FASTQC { cat <<-END_VERSIONS > versions.yml "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) END_VERSIONS """ @@ -49,7 +49,7 @@ process FASTQC { cat <<-END_VERSIONS > versions.yml "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) END_VERSIONS """ } diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml index 4da5bb5a0..ee5507e06 100644 --- a/modules/nf-core/fastqc/meta.yml +++ b/modules/nf-core/fastqc/meta.yml @@ -50,3 +50,8 @@ authors: - "@grst" - "@ewels" - "@FelixKrueger" +maintainers: + - "@drpatelh" + - "@grst" + - "@ewels" + - "@FelixKrueger" diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test index 6437a144d..b9e8f926e 100644 --- a/modules/nf-core/fastqc/tests/main.nf.test +++ b/modules/nf-core/fastqc/tests/main.nf.test @@ -38,4 +38,72 @@ nextflow_process { ) } } +// TODO +// // +// // Test with paired-end data +// // +// workflow test_fastqc_paired_end { +// input = [ +// [id: 'test', single_end: false], // meta map +// [ +// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), +// file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) +// ] +// ] + +// FASTQC ( input ) +// } + +// // +// // Test with interleaved data +// // +// workflow test_fastqc_interleaved { +// input = [ +// [id: 'test', single_end: false], // meta map +// file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) +// ] + +// FASTQC ( input ) +// } + +// // +// // Test with bam data +// // +// workflow test_fastqc_bam { +// input = [ +// [id: 'test', single_end: false], // meta map +// file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) +// ] + +// FASTQC ( input ) +// } + +// // +// // Test with multiple samples +// // +// workflow test_fastqc_multiple { +// input = [ +// [id: 'test', single_end: false], // meta map +// [ +// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), +// file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true), +// file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true), +// file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true) +// ] +// ] + +// FASTQC ( input ) +// } + +// // +// // Test with custom prefix +// // +// workflow test_fastqc_custom_prefix { +// input = [ +// [ id:'mysample', single_end:true ], // meta map +// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) +// ] + +// FASTQC ( input ) +// } } diff --git a/modules/nf-core/fastqc/tests/tags.yml b/modules/nf-core/fastqc/tests/tags.yml new file mode 100644 index 000000000..7834294ba --- /dev/null +++ b/modules/nf-core/fastqc/tests/tags.yml @@ -0,0 +1,2 @@ +fastqc: + - modules/nf-core/fastqc/** diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml new file mode 100644 index 000000000..bc0bdb5b6 --- /dev/null +++ b/modules/nf-core/multiqc/environment.yml @@ -0,0 +1,7 @@ +name: multiqc +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::multiqc=1.18 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 65d7dd0de..00cc48d27 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,10 +1,10 @@ process MULTIQC { label 'process_single' - conda "bioconda::multiqc=1.15" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.15--pyhdfd78af_0' : - 'biocontainers/multiqc:1.15--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.18--pyhdfd78af_0' : + 'biocontainers/multiqc:1.18--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" @@ -25,12 +25,14 @@ process MULTIQC { def args = task.ext.args ?: '' def config = multiqc_config ? "--config $multiqc_config" : '' def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' + def logo = multiqc_logo ? /--cl-config 'custom_logo: "${multiqc_logo}"'/ : '' """ multiqc \\ --force \\ $args \\ $config \\ $extra_config \\ + $logo \\ . cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index f93b5ee51..f1aa660eb 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,5 +1,5 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json -name: MultiQC +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: multiqc description: Aggregate results from bioinformatics analyses across many samples into a single report keywords: - QC @@ -13,7 +13,6 @@ tools: homepage: https://multiqc.info/ documentation: https://multiqc.info/docs/ licence: ["GPL-3.0-or-later"] - input: - multiqc_files: type: file @@ -31,7 +30,6 @@ input: type: file description: Optional logo file for MultiQC pattern: "*.{png}" - output: - report: type: file @@ -54,3 +52,8 @@ authors: - "@bunop" - "@drpatelh" - "@jfy133" +maintainers: + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test new file mode 100644 index 000000000..c2dad217c --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -0,0 +1,63 @@ +nextflow_process { + + name "Test Process MULTIQC" + script "../main.nf" + process "MULTIQC" + tag "modules" + tag "modules_nfcore" + tag "multiqc" + + test("MULTIQC: FASTQC") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.report.get(0)).exists() }, + { assert path(process.out.data.get(0)).exists() }, + { assert path(process.out.versions.get(0)).getText().contains("multiqc") } + ) + } + + } + + test("MULTIQC: FASTQC and a config file") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) + input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.report.get(0)).exists() }, + { assert path(process.out.data.get(0)).exists() }, + { assert path(process.out.versions.get(0)).getText().contains("multiqc") } + ) + } + + } +} diff --git a/modules/nf-core/multiqc/tests/tags.yml b/modules/nf-core/multiqc/tests/tags.yml new file mode 100644 index 000000000..bea6c0d37 --- /dev/null +++ b/modules/nf-core/multiqc/tests/tags.yml @@ -0,0 +1,2 @@ +multiqc: + - modules/nf-core/multiqc/** diff --git a/modules/nf-core/pigz/uncompress/main.nf b/modules/nf-core/pigz/uncompress/main.nf new file mode 100644 index 000000000..9383c1464 --- /dev/null +++ b/modules/nf-core/pigz/uncompress/main.nf @@ -0,0 +1,48 @@ +process PIGZ_UNCOMPRESS { + label 'process_low' + //stageInMode 'copy' // this directive can be set in case the original input should be kept + + conda "conda-forge::pigz" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pigz:2.8': + 'biocontainers/pigz:2.8' }" + + input: + path zip + + output: + path "${uncompressed_filename}" , emit: file + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + uncompressed_filename = zip.toString() - '.gz' + // calling pigz -f to make it follow symlinks + """ + unpigz \\ + -p $task.cpus \\ + -fk \\ + $args \\ + ${zip} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + uncompressed_filename = zip.toString() - '.gz' + """ + touch ${zip.dropRight(3)} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\w*//' )) + END_VERSIONS + """ +} diff --git a/modules/nf-core/pigz/uncompress/meta.yml b/modules/nf-core/pigz/uncompress/meta.yml new file mode 100644 index 000000000..574a004be --- /dev/null +++ b/modules/nf-core/pigz/uncompress/meta.yml @@ -0,0 +1,32 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +name: "pigz_uncompress" +description: write your description here +keywords: + - uncompress + - gzip + - parallelized +tools: + - "pigz": + description: "Parallel implementation of the gzip algorithm." + homepage: "https://zlib.net/pigz/" + documentation: "https://zlib.net/pigz/pigz.pdf" + +input: + - zip: + type: file + description: Gzipped file + pattern: "*.{gzip}" + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - file: + type: file + description: File to compress + pattern: "*" + +authors: + - "@lrauschning" diff --git a/modules/nf-core/pigz/uncompress/tests/main.nf.test b/modules/nf-core/pigz/uncompress/tests/main.nf.test new file mode 100644 index 000000000..579556586 --- /dev/null +++ b/modules/nf-core/pigz/uncompress/tests/main.nf.test @@ -0,0 +1,33 @@ +nextflow_process { + + name "Test Process PIGZ_UNCOMPRESS" + script "modules/nf-core/pigz/uncompress/main.nf" + process "PIGZ_UNCOMPRESS" + tag "modules" + tag "modules_nfcore" + tag "pigz" + tag "pigz/uncompress" + + test("Should run without failures") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + + } + +} diff --git a/modules/nf-core/pigz/uncompress/tests/main.nf.test.snap b/modules/nf-core/pigz/uncompress/tests/main.nf.test.snap new file mode 100644 index 000000000..038cf2d72 --- /dev/null +++ b/modules/nf-core/pigz/uncompress/tests/main.nf.test.snap @@ -0,0 +1,21 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ], + "1": [ + "versions.yml:md5,a2d5ce72baa8b303f25afb9cf094f683" + ], + "file": [ + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ], + "versions": [ + "versions.yml:md5,a2d5ce72baa8b303f25afb9cf094f683" + ] + } + ], + "timestamp": "2023-10-18T12:37:21.987858" + } +} \ No newline at end of file diff --git a/modules/nf-core/pigz/uncompress/tests/tags.yml b/modules/nf-core/pigz/uncompress/tests/tags.yml new file mode 100644 index 000000000..6719a90ad --- /dev/null +++ b/modules/nf-core/pigz/uncompress/tests/tags.yml @@ -0,0 +1,2 @@ +pigz/uncompress: + - modules/nf-core/pigz/uncompress/** diff --git a/nextflow.config b/nextflow.config index 93e19e86a..903eda546 100644 --- a/nextflow.config +++ b/nextflow.config @@ -87,6 +87,7 @@ params { skip_dada_quality = false skip_barrnap = false skip_qiime = false + skip_qiime_downstream = false skip_fastqc = false skip_alpha_rarefaction = false skip_abundance_tables = false @@ -106,12 +107,15 @@ params { dada_ref_tax_custom = null dada_ref_tax_custom_sp = null cut_dada_ref_taxonomy = false + dada_addspecies_allowmultiple = false + dada_taxonomy_rc = false sintax_ref_taxonomy = null qiime_ref_taxonomy = null + qiime_ref_tax_custom = null kraken2_ref_taxonomy = null kraken2_assign_taxlevels = null kraken2_ref_tax_custom = null - kraken2_confidence = 0 + kraken2_confidence = 0.0 // MultiQC options multiqc_config = null @@ -177,6 +181,7 @@ profiles { dumpHashes = true process.beforeScript = 'echo $HOSTNAME' cleanup = false + nextflow.enable.configProcessNamesValidation = true } conda { conda.enabled = true @@ -199,16 +204,16 @@ profiles { } docker { docker.enabled = true - docker.userEmulation = true conda.enabled = false singularity.enabled = false podman.enabled = false shifter.enabled = false charliecloud.enabled = false apptainer.enabled = false + docker.runOptions = '-u $(id -u):$(id -g)' } arm { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { singularity.enabled = true @@ -272,6 +277,7 @@ profiles { test_failed { includeConfig 'conf/test_failed.config' } test_full { includeConfig 'conf/test_full.config' } test_reftaxcustom { includeConfig 'conf/test_reftaxcustom.config' } + test_qiimecustom { includeConfig 'conf/test_qiimecustom.config' } test_novaseq { includeConfig 'conf/test_novaseq.config' } test_pplace { includeConfig 'conf/test_pplace.config' } test_sintax { includeConfig 'conf/test_sintax.config' } @@ -287,7 +293,7 @@ singularity.registry = 'quay.io' // Nextflow plugins plugins { - id 'nf-validation' // Validation of pipeline parameters and creation of an input channel from a sample sheet + id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet } // Export these variables to prevent local Python/R libraries from conflicting with those in the container @@ -304,6 +310,9 @@ env { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] +// Disable process selector warnings by default. Use debug profile to enable warnings. +nextflow.enable.configProcessNamesValidation = false + def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true @@ -329,7 +338,7 @@ manifest { description = """Amplicon sequencing analysis workflow using DADA2 and QIIME2""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '2.7.1' + version = '2.8.0' doi = '10.5281/zenodo.1493841' } diff --git a/nextflow_schema.json b/nextflow_schema.json index 1d3098da5..938690d47 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -16,7 +16,7 @@ "mimetype": "text/tsv", "fa_icon": "fas fa-dna", "description": "Path to tab-separated sample sheet", - "help_text": "Path to sample sheet, either tab-separated (.tsv), comma-separated (.csv), or in YAML format (.yml/.yaml), that points to compressed fastq files.\n\nThe sample sheet must have two to four tab-separated columns/entries with the following headers: \n- `sampleID` (required): Unique sample IDs, must start with a letter, and can only contain letters, numbers or underscores\n- `forwardReads` (required): Paths to (forward) reads zipped FastQ files\n- `reverseReads` (optional): Paths to reverse reads zipped FastQ files, required if the data is paired-end\n- `run` (optional): If the data was produced by multiple sequencing runs, any string\n\nRelated parameters are:\n- `--pacbio` and `--iontorrent` if the sequencing data is PacBio data or IonTorrent data (default expected: paired-end Illumina data)\n- `--single_end` if the sequencing data is single-ended Illumina data (default expected: paired-end Illumina data)\n- `--dada_ref_taxonomy`, `--qiime_ref_taxonomy`, and/or `--sintax_ref_taxonomy` to choose an appropriate reference taxonomy for the type of amplicon (16S/18S/ITS/CO1) (default: DADA2 assignTaxonomy and 16S rRNA sequence database)", + "help_text": "Path to sample sheet, either tab-separated (.tsv), comma-separated (.csv), or in YAML format (.yml/.yaml), that points to compressed fastq files.\n\nThe sample sheet must have two to four tab-separated columns/entries with the following headers: \n- `sampleID` (required): Unique sample IDs, must start with a letter, and can only contain letters, numbers or underscores\n- `forwardReads` (required): Paths to (forward) reads zipped FastQ files\n- `reverseReads` (optional): Paths to reverse reads zipped FastQ files, required if the data is paired-end\n- `run` (optional): If the data was produced by multiple sequencing runs, any string\n\nRelated parameters are:\n- `--pacbio` and `--iontorrent` if the sequencing data is PacBio data or IonTorrent data (default expected: paired-end Illumina data)\n- `--single_end` if the sequencing data is single-ended Illumina data (default expected: paired-end Illumina data)\n- Choose an appropriate reference taxonomy for the type of amplicon (16S/18S/ITS/CO1) (default: DADA2 assignTaxonomy and 16S rRNA sequence database)", "schema": "assets/schema_input.json" }, "input_fasta": { @@ -24,14 +24,14 @@ "mimetype": "text/tsv", "fa_icon": "fas fa-dna", "description": "Path to ASV/OTU fasta file", - "help_text": "Path to fasta format file with sequences that will be taxonomically classified. The fasta file input option can be used to taxonomically classify previously produced ASV/OTU sequences.\n\nThe fasta sequence header line may contain a description, that will be kept as part of the sequence name. However, tabs will be changed into spaces.\n\nRelated parameters are:\n- `--dada_ref_taxonomy`, `--qiime_ref_taxonomy`, and/or `--sintax_ref_taxonomy` to choose an appropriate reference taxonomy for the type of amplicon (16S/18S/ITS/CO1) (default: DADA2 assignTaxonomy and 16S rRNA sequence database)" + "help_text": "Path to fasta format file with sequences that will be taxonomically classified. The fasta file input option can be used to taxonomically classify previously produced ASV/OTU sequences.\n\nThe fasta sequence header line may contain a description, that will be kept as part of the sequence name. However, tabs will be changed into spaces.\n\nRelated parameters are:\n- Choose an appropriate reference taxonomy for the type of amplicon (16S/18S/ITS/CO1) (default: DADA2 assignTaxonomy and 16S rRNA sequence database)" }, "input_folder": { "type": "string", "mimetype": "text/tsv", "fa_icon": "fas fa-dna", "description": "Path to folder containing zipped FastQ files", - "help_text": "Path to folder containing compressed fastq files.\n\nExample for input data organization from one sequencing run with two samples, paired-end data:\n\n```bash\ndata\n \u251c\u2500sample1_1_L001_R1_001.fastq.gz\n \u251c\u2500sample1_1_L001_R2_001.fastq.gz\n \u251c\u2500sample2_1_L001_R1_001.fastq.gz\n \u2514\u2500sample2_1_L001_R2_001.fastq.gz\n```\n\nPlease note the following requirements:\n\n1. The path must be enclosed in quotes\n2. The folder must contain gzip compressed demultiplexed fastq files. If the file names do not follow the default (`\"/*_R{1,2}_001.fastq.gz\"`), please check `--extension`.\n3. Sample identifiers are extracted from file names, i.e. the string before the first underscore `_`, these must be unique\n4. If your data is scattered, produce a sample sheet\n5. All sequencing data should originate from one sequencing run, because processing relies on run-specific error models that are unreliable when data from several sequencing runs are mixed. Sequencing data originating from multiple sequencing runs requires additionally the parameter `--multiple_sequencing_runs` and a specific folder structure.\n\nRelated parameters are:\n- `--pacbio` and `--iontorrent` if the sequencing data is PacBio data or IonTorrent data (default expected: paired-end Illumina data)\n- `--single_end` if the sequencing data is single-ended Illumina data (default expected: paired-end Illumina data)\n- `--multiple_sequencing_runs` if the sequencing data originates from multiple sequencing runs\n- `--extension` if the sequencing file names do not follow the default (`\"/*_R{1,2}_001.fastq.gz\"`)\n- `--dada_ref_taxonomy`, `--qiime_ref_taxonomy`, and/or `--sintax_ref_taxonomy` to choose an appropriate reference taxonomy for the type of amplicon (16S/18S/ITS/CO1) (default: DADA2 assignTaxonomy and 16S rRNA sequence database)" + "help_text": "Path to folder containing compressed fastq files.\n\nExample for input data organization from one sequencing run with two samples, paired-end data:\n\n```bash\ndata\n \u251c\u2500sample1_1_L001_R1_001.fastq.gz\n \u251c\u2500sample1_1_L001_R2_001.fastq.gz\n \u251c\u2500sample2_1_L001_R1_001.fastq.gz\n \u2514\u2500sample2_1_L001_R2_001.fastq.gz\n```\n\nPlease note the following requirements:\n\n1. The path must be enclosed in quotes\n2. The folder must contain gzip compressed demultiplexed fastq files. If the file names do not follow the default (`\"/*_R{1,2}_001.fastq.gz\"`), please check `--extension`.\n3. Sample identifiers are extracted from file names, i.e. the string before the first underscore `_`, these must be unique\n4. If your data is scattered, produce a sample sheet\n5. All sequencing data should originate from one sequencing run, because processing relies on run-specific error models that are unreliable when data from several sequencing runs are mixed. Sequencing data originating from multiple sequencing runs requires additionally the parameter `--multiple_sequencing_runs` and a specific folder structure.\n\nRelated parameters are:\n- `--pacbio` and `--iontorrent` if the sequencing data is PacBio data or IonTorrent data (default expected: paired-end Illumina data)\n- `--single_end` if the sequencing data is single-ended Illumina data (default expected: paired-end Illumina data)\n- `--multiple_sequencing_runs` if the sequencing data originates from multiple sequencing runs\n- `--extension` if the sequencing file names do not follow the default (`\"/*_R{1,2}_001.fastq.gz\"`)\n- Choose an appropriate reference taxonomy for the type of amplicon (16S/18S/ITS/CO1) (default: DADA2 assignTaxonomy and 16S rRNA sequence database)" }, "FW_primer": { "type": "string", @@ -335,41 +335,44 @@ "description": "Name of supported database, and optionally also version number", "default": "silva=138", "enum": [ - "midori2-co1=gb250", - "midori2-co1", + "coidb", + "coidb=221216", + "gtdb", "gtdb=R05-RS95", "gtdb=R06-RS202", "gtdb=R07-RS207", "gtdb=R08-RS214", - "gtdb", - "coidb", - "coidb=221216", - "pr2=5.0.0", - "pr2=4.14.0", - "pr2=4.13.0", + "midori2-co1", + "midori2-co1=gb250", + "phytoref", "pr2", - "rdp=18", + "pr2=4.13.0", + "pr2=4.14.0", + "pr2=5.0.0", "rdp", + "rdp=18", "sbdi-gtdb", - "sbdi-gtdb=R07-RS207-1", - "sbdi-gtdb=R06-RS202-3", "sbdi-gtdb=R06-RS202-1", + "sbdi-gtdb=R06-RS202-3", + "sbdi-gtdb=R07-RS207-1", + "silva", "silva=132", "silva=138", - "silva", - "unite-fungi=9.0", - "unite-fungi=8.3", - "unite-fungi=8.2", - "unite-fungi", - "unite-alleuk=9.0", - "unite-alleuk=8.3", + "unite-alleuk", "unite-alleuk=8.2", - "unite-alleuk" + "unite-alleuk=8.3", + "unite-alleuk=9.0", + "unite-fungi", + "unite-fungi=8.2", + "unite-fungi=8.3", + "unite-fungi=9.0", + "zehr-nifh", + "zehr-nifh=2.5.0" ] }, "dada_ref_tax_custom": { "type": "string", - "help_text": "Is preferred over `--dada_ref_taxonomy`. Either `--skip_dada_addspecies` (no species annotation) or `--dada_ref_tax_custom_sp` (species annotation) is additionally required. Consider also setting `--dada_assign_taxlevels`.\n\nMust be compatible to DADA2's assignTaxonomy function: 'Can be compressed. This reference fasta file should be formatted so that the id lines correspond to the taxonomy (or classification) of the associated sequence, and each taxonomic level is separated by a semicolon.' See also https://rdrr.io/bioc/dada2/man/assignTaxonomy.html", + "help_text": "Overwrites `--dada_ref_taxonomy`. Either `--skip_dada_addspecies` (no species annotation) or `--dada_ref_tax_custom_sp` (species annotation) is additionally required. Consider also setting `--dada_assign_taxlevels`.\n\nMust be compatible to DADA2's assignTaxonomy function: 'Can be compressed. This reference fasta file should be formatted so that the id lines correspond to the taxonomy (or classification) of the associated sequence, and each taxonomic level is separated by a semicolon.' See also https://rdrr.io/bioc/dada2/man/assignTaxonomy.html", "description": "Path to a custom DADA2 reference taxonomy database" }, "dada_ref_tax_custom_sp": { @@ -387,6 +390,16 @@ "help_text": "Expected amplified sequences are extracted from the DADA2 reference taxonomy using the primer sequences, that might improve classification. This is not applied to species classification (assignSpecies) but only for lower taxonomic levels (assignTaxonomy).", "description": "If the expected amplified sequences are extracted from the DADA2 reference taxonomy database" }, + "dada_addspecies_allowmultiple": { + "type": "boolean", + "help_text": "Defines the behavior when multiple exact matches against different species are returned. By default only unambiguous identifications are returned. If TRUE, a concatenated string of all exactly matched species is returned.", + "description": "If multiple exact matches against different species are returned" + }, + "dada_taxonomy_rc": { + "type": "boolean", + "help_text": "Reverse-complement of each sequences will be used for classification if it is a better match to the reference sequences than the forward sequence.", + "description": "If reverse-complement of each sequences will be also tested for classification" + }, "pplace_tree": { "type": "string", "description": "Newick file with reference phylogenetic tree. Requires also `--pplace_aln` and `--pplace_model`." @@ -429,9 +442,16 @@ "unite-alleuk=8.3", "unite-alleuk=8.2", "unite-alleuk", - "greengenes85" + "greengenes85", + "greengenes2", + "greengenes2=2022.10" ] }, + "qiime_ref_tax_custom": { + "type": "string", + "help_text": "Overwrites `--qiime_ref_taxonomy`. Either path to tarball (`*.tar.gz` or `*.tgz`) that contains sequence (`*.fna`) and taxonomy (`*.tax`) data, or alternatively a comma separated pair of filepaths to sequence (`*.fna`) and taxonomy (`*.tax`) data (possibly gzipped `*.gz`).", + "description": "Path to files of a custom QIIME2 reference taxonomy database (tarball, or two comma-separated files)" + }, "classifier": { "type": "string", "description": "Path to QIIME2 trained classifier file (typically *-classifier.qza)", @@ -455,7 +475,7 @@ }, "kraken2_ref_tax_custom": { "type": "string", - "help_text": "Is preferred over `--kraken2_ref_taxonomy`. Consider also setting `--kraken2_assign_taxlevels`. Can be compressed tar archive (.tar.gz|.tgz) or folder containing the database. See also https://benlangmead.github.io/aws-indexes/k2.", + "help_text": "Overwrites `--kraken2_ref_taxonomy`. Consider also setting `--kraken2_assign_taxlevels`. Can be compressed tar archive (.tar.gz|.tgz) or folder containing the database. See also https://benlangmead.github.io/aws-indexes/k2.", "description": "Path to a custom Kraken2 reference taxonomy database (*.tar.gz|*.tgz archive or folder)" }, "kraken2_assign_taxlevels": { @@ -651,6 +671,10 @@ "type": "boolean", "description": "Skip all steps that are executed by QIIME2, including QIIME2 software download, taxonomy assignment by QIIME2, barplots, relative abundance tables, diversity analysis, differential abundance testing." }, + "skip_qiime_downstream": { + "type": "boolean", + "description": "Skip steps that are executed by QIIME2 except for taxonomic classification. Skip steps including barplots, relative abundance tables, diversity analysis, differential abundance testing." + }, "skip_taxonomy": { "type": "boolean", "description": "Skip taxonomic classification. Incompatible with `--sbdiexport`" diff --git a/subworkflows/local/parse_input.nf b/subworkflows/local/parse_input.nf index ba8aa4846..ae134ae9d 100644 --- a/subworkflows/local/parse_input.nf +++ b/subworkflows/local/parse_input.nf @@ -11,7 +11,7 @@ workflow PARSE_INPUT { //Check folders in folder when multiple_sequencing_runs folders = multiple_sequencing_runs ? "/*" : "" error_message = "\nCannot find any reads matching: \"${input}${folders}${extension}\"\n" - error_message += "Please revise the input folder (\"--input\"): \"${input}\"\n" + error_message += "Please revise the input folder (\"--input_folder\"): \"${input}\"\n" error_message += "and the input file pattern (\"--extension\"): \"${extension}\"\n" error_message += "*Please note: Path needs to be enclosed in quotes!*\n" error_message += multiple_sequencing_runs ? "If you do not have multiple sequencing runs, please do not use \"--multiple_sequencing_runs\"!\n" : "If you have multiple sequencing runs, please add \"--multiple_sequencing_runs\"!\n" diff --git a/subworkflows/local/phyloseq_workflow.nf b/subworkflows/local/phyloseq_workflow.nf index adf208b70..3b6d9dd46 100644 --- a/subworkflows/local/phyloseq_workflow.nf +++ b/subworkflows/local/phyloseq_workflow.nf @@ -36,7 +36,7 @@ workflow PHYLOSEQ_WORKFLOW { ch_phyloseq_inasv = ch_tsv } - PHYLOSEQ ( ch_tax, ch_phyloseq_inasv, ch_phyloseq_inmeta, ch_phyloseq_intree ) + PHYLOSEQ ( ch_tax.combine(ch_phyloseq_inasv), ch_phyloseq_inmeta, ch_phyloseq_intree ) emit: rds = PHYLOSEQ.out.rds diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 7f3cb80b2..ce7bac788 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -2,6 +2,8 @@ * Training of a classifier with QIIME2 */ +include { UNTAR } from '../../modules/nf-core/untar/main' +include { PIGZ_UNCOMPRESS } from '../../modules/nf-core/pigz/uncompress/main' include { FORMAT_TAXONOMY_QIIME } from '../../modules/local/format_taxonomy_qiime' include { QIIME2_EXTRACT } from '../../modules/local/qiime2_extract' include { QIIME2_TRAIN } from '../../modules/local/qiime2_train' @@ -9,13 +11,82 @@ include { QIIME2_TRAIN } from '../../modules/local/qiime2_train' workflow QIIME2_PREPTAX { take: ch_qiime_ref_taxonomy //channel, list of files + val_qiime_ref_taxonomy //val FW_primer //val RV_primer //val main: - FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy ) + ch_qiime2_preptax_versions = Channel.empty() + + if (params.qiime_ref_tax_custom) { + // Handle case where we have been provided a pair of filepaths. + if ("${params.qiime_ref_tax_custom}".contains(",")) { + ch_qiime_ref_taxonomy.flatten() + .branch { + compressed: it.isFile() && it.getName().endsWith(".gz") + decompressed: it.isFile() && ( it.getName().endsWith(".fna") || it.getName().endsWith(".tax") ) + failed: true + }.set { ch_qiime_ref_tax_branched } + ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a compressed (ends with `.gz`) or decompressed sequence (ends with `.fna`) or taxonomy file (ends with `.tax`). Please review input." } + + PIGZ_UNCOMPRESS(ch_qiime_ref_tax_branched.compressed) + ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(PIGZ_UNCOMPRESS.out.versions) + + ch_qiime_db_files = PIGZ_UNCOMPRESS.out.file + ch_qiime_db_files = ch_qiime_db_files.mix(ch_qiime_ref_tax_branched.decompressed) + + ch_ref_database_fna = ch_qiime_db_files.filter { + it.getName().endsWith(".fna") + } + ch_ref_database_tax = ch_qiime_db_files.filter { + it.getName().endsWith(".tax") + } + + ch_ref_database = ch_ref_database_fna.combine(ch_ref_database_tax) + // Handle case we have been provided a single filepath (tarball or directory). + } else { + ch_qiime_ref_taxonomy.flatten() + .branch { + tar: it.isFile() && ( it.getName().endsWith(".tar.gz") || it.getName().endsWith (".tgz") ) + dir: it.isDirectory() + failed: true + }.set { ch_qiime_ref_tax_branched } + ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a directory nor a file that ends in '.tar.gz' or '.tgz'. Please review input." } + + UNTAR ( + ch_qiime_ref_tax_branched.tar + .map { + db -> + def meta = [:] + meta.id = val_qiime_ref_taxonomy + [ meta, db ] } ) + ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(UNTAR.out.versions) + + ch_qiime_db_dir = UNTAR.out.untar.map{ it[1] } + ch_qiime_db_dir = ch_qiime_db_dir.mix(ch_qiime_ref_tax_branched.dir) + + ch_ref_database_fna = ch_qiime_db_dir.map{ dir -> + files = file(dir.resolve("*.fna"), checkIfExists: true) + } | filter { + if (it.size() > 1) log.warn "Found multiple fasta files for QIIME2 reference database." + it.size() == 1 + } + ch_ref_database_tax = ch_qiime_db_dir.map{ dir -> + files = file(dir.resolve("*.tax"), checkIfExists: true) + } | filter { + if (it.size() > 1) log.warn "Found multiple tax files for QIIME2 reference database." + it.size() == 1 + } + + ch_ref_database = ch_ref_database_fna.combine(ch_ref_database_tax) + } + } else { + FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy ) + ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(FORMAT_TAXONOMY_QIIME.out.versions) + + ch_ref_database = FORMAT_TAXONOMY_QIIME.out.fasta.combine(FORMAT_TAXONOMY_QIIME.out.tax) + } - ch_ref_database = FORMAT_TAXONOMY_QIIME.out.fasta.combine(FORMAT_TAXONOMY_QIIME.out.tax) ch_ref_database .map { db -> @@ -24,10 +95,15 @@ workflow QIIME2_PREPTAX { meta.RV_primer = RV_primer [ meta, db ] } .set { ch_ref_database } + QIIME2_EXTRACT ( ch_ref_database ) + ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(QIIME2_EXTRACT.out.versions) + QIIME2_TRAIN ( QIIME2_EXTRACT.out.qza ) + ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(QIIME2_TRAIN.out.versions) emit: - classifier = QIIME2_TRAIN.out.qza - versions = QIIME2_TRAIN.out.versions + classifier = QIIME2_TRAIN.out.qza + versions = ch_qiime2_preptax_versions } + diff --git a/tests/pipeline/doubleprimers.nf.test.snap b/tests/pipeline/doubleprimers.nf.test.snap index 26ffdc7ad..7cb5f68a6 100644 --- a/tests/pipeline/doubleprimers.nf.test.snap +++ b/tests/pipeline/doubleprimers.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.7.1}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0}}" ], "timestamp": "2023-07-27T13:49:03+0000" }, @@ -52,8 +52,8 @@ }, "multiqc": { "content": [ - "multiqc_general_stats.txt:md5,8429be0a16adf09b6634bf31b430bfac", - "multiqc_cutadapt.txt:md5,e89359b4478ef5d10620709f651f26a2" + "multiqc_general_stats.txt:md5,bb1d98b03d4cd5091acfbef93cb38fc4", + "multiqc_cutadapt.txt:md5,0ef33b6eb4d202c34fcfa51a0dffadda" ], "timestamp": "2023-05-28T21:08:54+0000" } diff --git a/tests/pipeline/fasta.nf.test.snap b/tests/pipeline/fasta.nf.test.snap index bde435d9a..c049085ce 100644 --- a/tests/pipeline/fasta.nf.test.snap +++ b/tests/pipeline/fasta.nf.test.snap @@ -7,7 +7,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FILTER_CODONS={pandas=1.1.5, python=3.9.1}, FILTER_LEN_ASV={Biostrings=2.58.0, R=4.0.3}, Workflow={nf-core/ampliseq=2.7.1}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FILTER_CODONS={pandas=1.1.5, python=3.9.1}, FILTER_LEN_ASV={Biostrings=2.58.0, R=4.0.3}, Workflow={nf-core/ampliseq=2.8.0}}" ], "timestamp": "2023-05-28T21:06:17+0000" }, diff --git a/tests/pipeline/iontorrent.nf.test.snap b/tests/pipeline/iontorrent.nf.test.snap index 70bdf6153..989cbd593 100644 --- a/tests/pipeline/iontorrent.nf.test.snap +++ b/tests/pipeline/iontorrent.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.7.1}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.8.0}}" ], "timestamp": "2023-06-20T01:42:35+0000" }, diff --git a/tests/pipeline/multi.nf.test.snap b/tests/pipeline/multi.nf.test.snap index 10a5f78eb..913b38188 100644 --- a/tests/pipeline/multi.nf.test.snap +++ b/tests/pipeline/multi.nf.test.snap @@ -14,7 +14,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.7.1}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.8.0}}" ], "timestamp": "2023-05-28T21:15:03+0000" }, diff --git a/tests/pipeline/novaseq.nf.test.snap b/tests/pipeline/novaseq.nf.test.snap index 89bf199bf..e6c17e7fb 100644 --- a/tests/pipeline/novaseq.nf.test.snap +++ b/tests/pipeline/novaseq.nf.test.snap @@ -7,7 +7,7 @@ }, "software_versions": { "content": [ - "{CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_CODONS={pandas=1.1.5, python=3.9.1}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.7.1}}" + "{CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_CODONS={pandas=1.1.5, python=3.9.1}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0}}" ], "timestamp": "2023-06-20T00:10:02+0000" }, diff --git a/tests/pipeline/pacbio_its.nf.test.snap b/tests/pipeline/pacbio_its.nf.test.snap index 0adfdad85..3cbdef01a 100644 --- a/tests/pipeline/pacbio_its.nf.test.snap +++ b/tests/pipeline/pacbio_its.nf.test.snap @@ -35,7 +35,7 @@ }, "software_versions": { "content": [ - "{ASSIGNSH={pandas=1.1.5, python=3.9.1}, BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FORMAT_TAXRESULTS_STD={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_USEARCHGLOBAL={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.7.1}}" + "{ASSIGNSH={pandas=1.1.5, python=3.9.1}, BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FORMAT_TAXRESULTS_STD={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_USEARCHGLOBAL={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.8.0}}" ], "timestamp": "2023-06-20T02:07:02+0000" }, diff --git a/tests/pipeline/pplace.nf.test b/tests/pipeline/pplace.nf.test index 564cf2b9b..781d3dcd9 100644 --- a/tests/pipeline/pplace.nf.test +++ b/tests/pipeline/pplace.nf.test @@ -57,7 +57,8 @@ nextflow_pipeline { { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, { assert new File("$outputDir/summary_report/summary_report.html").exists() }, - { assert new File("$outputDir/phyloseq/qiime2_phyloseq.rds").exists() } + { assert new File("$outputDir/phyloseq/qiime2_phyloseq.rds").exists() }, + { assert new File("$outputDir/phyloseq/pplace_phyloseq.rds").exists() } ) } } diff --git a/tests/pipeline/pplace.nf.test.snap b/tests/pipeline/pplace.nf.test.snap index abd94f98f..c8c3a9f1a 100644 --- a/tests/pipeline/pplace.nf.test.snap +++ b/tests/pipeline/pplace.nf.test.snap @@ -8,7 +8,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, EPANG_PLACE={epang=0.3.8}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, GAPPA_ASSIGN={gappa=0.8.0}, GAPPA_GRAFT={gappa=0.8.0}, GAPPA_HEATTREE={gappa=0.8.0}, HMMER_AFAFORMATQUERY={hmmer/easel=0.48}, HMMER_AFAFORMATREF={hmmer/easel=0.48}, HMMER_HMMALIGNQUERY={hmmer=3.3.2}, HMMER_HMMALIGNREF={hmmer=3.3.2}, HMMER_HMMBUILD={hmmer=3.3.2}, HMMER_MASKQUERY={hmmer/easel=0.48}, HMMER_MASKREF={hmmer/easel=0.48}, HMMER_UNALIGNREF={hmmer/easel=0.48}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.7.1}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, EPANG_PLACE={epang=0.3.8}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, GAPPA_ASSIGN={gappa=0.8.0}, GAPPA_GRAFT={gappa=0.8.0}, GAPPA_HEATTREE={gappa=0.8.0}, HMMER_AFAFORMATQUERY={hmmer/easel=0.48}, HMMER_AFAFORMATREF={hmmer/easel=0.48}, HMMER_HMMALIGNQUERY={hmmer=3.3.2}, HMMER_HMMALIGNREF={hmmer=3.3.2}, HMMER_HMMBUILD={hmmer=3.3.2}, HMMER_MASKQUERY={hmmer/easel=0.48}, HMMER_MASKREF={hmmer/easel=0.48}, HMMER_UNALIGNREF={hmmer/easel=0.48}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0}}" ], "timestamp": "2023-06-20T17:24:03+0000" }, @@ -52,8 +52,8 @@ }, "multiqc": { "content": [ - "multiqc_general_stats.txt:md5,9e8ff06d7285ab8748a80e639d3dd54a", - "multiqc_cutadapt.txt:md5,330a7b72dc671ca99fcb3fb84b6776c1" + "multiqc_general_stats.txt:md5,cbe0b448f630111ee18976891354701a", + "multiqc_cutadapt.txt:md5,c640ad4fa88bc31b09fa353e606013a2" ], "timestamp": "2023-06-20T17:24:03+0000" } diff --git a/tests/pipeline/qiimecustom.nf.test b/tests/pipeline/qiimecustom.nf.test new file mode 100644 index 000000000..493968153 --- /dev/null +++ b/tests/pipeline/qiimecustom.nf.test @@ -0,0 +1,51 @@ +nextflow_pipeline { + + name "Test Workflow main.nf" + script "main.nf" + tag "test_qiimecustom" + tag "qiime2" + tag "pipeline" + + test("Custom QIIME2 Reference Taxonomy Database") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(UTILS.removeNextflowVersion("$outputDir")).match("software_versions") }, + { assert snapshot(path("$outputDir/overall_summary.tsv")).match("overall_summary_tsv") }, + { assert snapshot(path("$outputDir/barrnap/rrna.arc.gff"), + path("$outputDir/barrnap/rrna.bac.gff"), + path("$outputDir/barrnap/rrna.euk.gff"), + path("$outputDir/barrnap/rrna.mito.gff")).match("barrnap") }, + { assert new File("$outputDir/barrnap/summary.tsv").exists() }, + { assert snapshot(path("$outputDir/cutadapt/cutadapt_summary.tsv")).match("cutadapt") }, + { assert snapshot(path("$outputDir/dada2/ASV_seqs.fasta"), + path("$outputDir/dada2/ASV_table.tsv"), + path("$outputDir/dada2/DADA2_stats.tsv"), + path("$outputDir/dada2/DADA2_table.rds"), + path("$outputDir/dada2/DADA2_table.tsv")).match("dada2") }, + { assert new File("$outputDir/fastqc/sampleID_1_1_fastqc.html").exists() }, + { assert new File("$outputDir/fastqc/sampleID_1_2_fastqc.html").exists() }, + { assert new File("$outputDir/fastqc/sampleID_1a_1_fastqc.html").exists() }, + { assert new File("$outputDir/fastqc/sampleID_1a_2_fastqc.html").exists() }, + { assert new File("$outputDir/fastqc/sampleID_2_1_fastqc.html").exists() }, + { assert new File("$outputDir/fastqc/sampleID_2_2_fastqc.html").exists() }, + { assert new File("$outputDir/fastqc/sampleID_2a_1_fastqc.html").exists() }, + { assert new File("$outputDir/fastqc/sampleID_2a_2_fastqc.html").exists() }, + { assert snapshot(path("$outputDir/input/Samplesheet.tsv")).match("input") }, + { assert new File("$outputDir/qiime2/taxonomy/taxonomy.tsv").exists() }, + { assert new File("$outputDir/qiime2/taxonomy/GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza").exists() }, + { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, + { assert new File("$outputDir/summary_report/summary_report.html").exists() } + ) + } + } +} diff --git a/tests/pipeline/qiimecustom.nf.test.snap b/tests/pipeline/qiimecustom.nf.test.snap new file mode 100644 index 000000000..5f758fd48 --- /dev/null +++ b/tests/pipeline/qiimecustom.nf.test.snap @@ -0,0 +1,43 @@ +{ + "input": { + "content": [ + "Samplesheet.tsv:md5,dbf8d1a2b7933dab9e5a139f33c2b1f4" + ], + "timestamp": "2023-05-28T21:18:54+0000" + }, + "cutadapt": { + "content": [ + "cutadapt_summary.tsv:md5,5d02749984a811479e7d534fda75163f" + ], + "timestamp": "2023-05-28T21:18:54+0000" + }, + "software_versions": { + "content": [ + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0}}" + ], + "timestamp": "2023-05-28T21:18:54+0000" + }, + "overall_summary_tsv": { + "content": [ + "overall_summary.tsv:md5,3231d6ee72b9a1e7742e5605caaff05a" + ], + "timestamp": "2023-05-28T21:18:54+0000" + }, + "barrnap": { + "content": [ + "rrna.arc.gff:md5,6dae470aace9293d5eb8c318584852dd", + "rrna.bac.gff:md5,439a9084f089120f700f938dfb58fa41", + "rrna.euk.gff:md5,c9bc1d9d8fb77dc19c95dee2d53840eb", + "rrna.mito.gff:md5,df19e1b84ba6f691d20c72b397c88abf" + ], + "timestamp": "2023-05-28T21:18:54+0000" + }, + "multiqc": { + "content": [ + "multiqc_fastqc.txt:md5,147764e40079c3abf97a17cfe2275c52", + "multiqc_general_stats.txt:md5,c6456e36c17e592f12f9a2f9069f24f8", + "multiqc_cutadapt.txt:md5,c640ad4fa88bc31b09fa353e606013a2" + ], + "timestamp": "2023-05-28T21:18:54+0000" + } +} diff --git a/tests/pipeline/reftaxcustom.nf.test b/tests/pipeline/reftaxcustom.nf.test index abd2a38a2..dba78c126 100644 --- a/tests/pipeline/reftaxcustom.nf.test +++ b/tests/pipeline/reftaxcustom.nf.test @@ -44,11 +44,14 @@ nextflow_pipeline { { assert snapshot(path("$outputDir/kraken2/ASV_tax.user.kraken2.classifiedreads.txt"), path("$outputDir/kraken2/ASV_tax.user.kraken2.complete.tsv"), path("$outputDir/kraken2/ASV_tax.user.kraken2.tsv")).match("kraken2") }, + { assert new File("$outputDir/qiime2/taxonomy/taxonomy.tsv").exists() }, + { assert new File("$outputDir/qiime2/taxonomy/GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza").exists() }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, { assert new File("$outputDir/summary_report/summary_report.html").exists() }, - { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() } + { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() }, + { assert new File("$outputDir/phyloseq/kraken2_phyloseq.rds").exists() } ) } } diff --git a/tests/pipeline/reftaxcustom.nf.test.snap b/tests/pipeline/reftaxcustom.nf.test.snap index 875389718..4bfd8c2d7 100644 --- a/tests/pipeline/reftaxcustom.nf.test.snap +++ b/tests/pipeline/reftaxcustom.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.7.1}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0}}" ], "timestamp": "2023-05-28T21:18:54+0000" }, @@ -53,8 +53,8 @@ "multiqc": { "content": [ "multiqc_fastqc.txt:md5,147764e40079c3abf97a17cfe2275c52", - "multiqc_general_stats.txt:md5,88c2b9e6d02b83afe4f9551e6c9a91a7", - "multiqc_cutadapt.txt:md5,330a7b72dc671ca99fcb3fb84b6776c1" + "multiqc_general_stats.txt:md5,c6456e36c17e592f12f9a2f9069f24f8", + "multiqc_cutadapt.txt:md5,c640ad4fa88bc31b09fa353e606013a2" ], "timestamp": "2023-05-28T21:18:54+0000" } diff --git a/tests/pipeline/single.nf.test.snap b/tests/pipeline/single.nf.test.snap index 751d9a832..1b6e33c07 100644 --- a/tests/pipeline/single.nf.test.snap +++ b/tests/pipeline/single.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.7.1}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.8.0}}" ], "timestamp": "2023-05-28T20:35:33+0000" }, diff --git a/tests/pipeline/sintax.nf.test.snap b/tests/pipeline/sintax.nf.test.snap index aae3466cd..fed045a40 100644 --- a/tests/pipeline/sintax.nf.test.snap +++ b/tests/pipeline/sintax.nf.test.snap @@ -16,7 +16,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_SINTAX={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.7.1}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_SINTAX={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.8.0}}" ], "timestamp": "2023-06-20T16:40:18+0000" }, diff --git a/tests/pipeline/test.nf.test.snap b/tests/pipeline/test.nf.test.snap index 8fc51fa05..8441f2a56 100644 --- a/tests/pipeline/test.nf.test.snap +++ b/tests/pipeline/test.nf.test.snap @@ -22,7 +22,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_CLUSTERS={pandas=1.1.5, python=3.9.1}, FILTER_LEN_ASV={Biostrings=2.58.0, R=4.0.3}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, VSEARCH_CLUSTER={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.7.1}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_CLUSTERS={pandas=1.1.5, python=3.9.1}, FILTER_LEN_ASV={Biostrings=2.58.0, R=4.0.3}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, VSEARCH_CLUSTER={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.8.0}}" ], "timestamp": "2023-05-28T20:55:32+0000" }, @@ -58,8 +58,8 @@ "multiqc": { "content": [ "multiqc_fastqc.txt:md5,147764e40079c3abf97a17cfe2275c52", - "multiqc_general_stats.txt:md5,88c2b9e6d02b83afe4f9551e6c9a91a7", - "multiqc_cutadapt.txt:md5,330a7b72dc671ca99fcb3fb84b6776c1" + "multiqc_general_stats.txt:md5,c6456e36c17e592f12f9a2f9069f24f8", + "multiqc_cutadapt.txt:md5,c640ad4fa88bc31b09fa353e606013a2" ], "timestamp": "2023-05-28T20:55:32+0000" }, diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 05ddfee76..3f158200d 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -59,9 +59,25 @@ if (params.dada_ref_tax_custom) { val_dada_ref_taxonomy = "none" } -if (params.qiime_ref_taxonomy && !params.skip_taxonomy && !params.classifier) { +if (params.qiime_ref_tax_custom) { + if ("${params.qiime_ref_tax_custom}".contains(",")) { + qiime_ref_paths = "${params.qiime_ref_tax_custom}".split(",") + if (qiime_ref_paths.length != 2) { + error "--qiime_ref_tax_custom accepts a single filepath to a directory or tarball, or two filepaths separated by a comma. Please review input." + } + + ch_qiime_ref_taxonomy = Channel.fromPath(Arrays.asList(qiime_ref_paths), checkIfExists: true) + } else { + ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}", checkIfExists: true) + } + val_qiime_ref_taxonomy = "user" +} else if (params.qiime_ref_taxonomy && !params.skip_taxonomy && !params.classifier) { ch_qiime_ref_taxonomy = Channel.fromList(params.qiime_ref_databases[params.qiime_ref_taxonomy]["file"]).map { file(it) } -} else { ch_qiime_ref_taxonomy = Channel.empty() } + val_qiime_ref_taxonomy = params.qiime_ref_taxonomy.replace('=','_').replace('.','_') +} else { + ch_qiime_ref_taxonomy = Channel.empty() + val_qiime_ref_taxonomy = "none" +} if (params.sintax_ref_taxonomy && !params.skip_taxonomy) { ch_sintax_ref_taxonomy = Channel.fromList(params.sintax_ref_databases[params.sintax_ref_taxonomy]["file"]).map { file(it) } @@ -130,8 +146,15 @@ if ( params.dada_ref_taxonomy && !params.skip_dada_addspecies && !params.skip_da } } -//only run QIIME2 when taxonomy is actually calculated and all required data is available -if ( !(workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) && !params.skip_taxonomy && !params.skip_qiime && (!params.skip_dada_taxonomy || params.sintax_ref_taxonomy || params.qiime_ref_taxonomy || params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom) ) { +// Only run QIIME2 taxonomy classification if needed parameters are passed and we are not skipping taxonomy or qiime steps. +if ( !(workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) && !params.skip_taxonomy && !params.skip_qiime && (params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.classifier) ) { + run_qiime2_taxonomy = true +} else { + run_qiime2_taxonomy = false +} + +//only run QIIME2 downstream analysis when taxonomy is actually calculated and all required data is available +if ( !(workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) && !params.skip_taxonomy && !params.skip_qiime && !params.skip_qiime_downstream && (!params.skip_dada_taxonomy || params.sintax_ref_taxonomy || params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom) ) { run_qiime2 = true } else { run_qiime2 = false @@ -551,10 +574,11 @@ workflow AMPLISEQ { } //QIIME2 - if ( run_qiime2 ) { - if (params.qiime_ref_taxonomy && !params.classifier) { + if ( run_qiime2_taxonomy ) { + if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && !params.classifier) { QIIME2_PREPTAX ( ch_qiime_ref_taxonomy.collect(), + val_qiime_ref_taxonomy, params.FW_primer, params.RV_primer ) @@ -607,7 +631,7 @@ workflow AMPLISEQ { log.info "Use Kraken2 taxonomy classification" val_used_taxonomy = "Kraken2" ch_tax = QIIME2_INTAX ( ch_kraken2_tax, "" ).qza - } else if ( params.qiime_ref_taxonomy || params.classifier ) { + } else if ( params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.classifier ) { log.info "Use QIIME2 taxonomy classification" val_used_taxonomy = "QIIME2" ch_tax = QIIME2_TAXONOMY.out.qza @@ -707,7 +731,7 @@ workflow AMPLISEQ { // MODULE: Predict functional potential of a bacterial community from marker genes with Picrust2 // if ( params.picrust ) { - if ( run_qiime2 && !params.skip_abundance_tables && ( params.dada_ref_taxonomy || params.qiime_ref_taxonomy || params.classifier || params.sintax_ref_taxonomy || params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom ) && !params.skip_taxonomy ) { + if ( run_qiime2 && !params.skip_abundance_tables && ( params.dada_ref_taxonomy || params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.classifier || params.sintax_ref_taxonomy || params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom ) && !params.skip_taxonomy ) { PICRUST ( QIIME2_EXPORT.out.abs_fasta, QIIME2_EXPORT.out.abs_tsv, "QIIME2", "This Picrust2 analysis is based on filtered reads from QIIME2" ) } else { PICRUST ( ch_fasta, ch_dada2_asv, "DADA2", "This Picrust2 analysis is based on unfiltered reads from DADA2" ) @@ -722,11 +746,11 @@ workflow AMPLISEQ { if ( params.sintax_ref_taxonomy ) { SBDIEXPORT ( ch_dada2_asv, ch_sintax_tax, ch_metadata ) db_version = params.sintax_ref_databases[params.sintax_ref_taxonomy]["dbversion"] - SBDIEXPORTREANNOTATE ( ch_sintax_tax, "sintax", db_version, ch_barrnapsummary.ifEmpty([]) ) + SBDIEXPORTREANNOTATE ( ch_sintax_tax, "sintax", db_version, params.cut_its, ch_barrnapsummary.ifEmpty([]) ) } else { SBDIEXPORT ( ch_dada2_asv, ch_dada2_tax, ch_metadata ) db_version = params.dada_ref_databases[params.dada_ref_taxonomy]["dbversion"] - SBDIEXPORTREANNOTATE ( ch_dada2_tax, "dada2", db_version, ch_barrnapsummary.ifEmpty([]) ) + SBDIEXPORTREANNOTATE ( ch_dada2_tax, "dada2", db_version, params.cut_its, ch_barrnapsummary.ifEmpty([]) ) } ch_versions = ch_versions.mix(SBDIEXPORT.out.versions.first()) } @@ -839,7 +863,7 @@ workflow AMPLISEQ { !params.skip_taxonomy && ( params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom ) ? KRAKEN2_TAXONOMY_WF.out.tax_tsv.ifEmpty( [] ) : [], !params.skip_taxonomy && params.pplace_tree ? ch_pplace_tax.ifEmpty( [] ) : [], !params.skip_taxonomy && params.pplace_tree ? FASTA_NEWICK_EPANG_GAPPA.out.heattree.ifEmpty( [[],[]] ) : [[],[]], - !params.skip_taxonomy && ( params.qiime_ref_taxonomy || params.classifier ) && run_qiime2 ? QIIME2_TAXONOMY.out.tsv.ifEmpty( [] ) : [], + !params.skip_taxonomy && ( params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.classifier ) && run_qiime2_taxonomy ? QIIME2_TAXONOMY.out.tsv.ifEmpty( [] ) : [], run_qiime2, run_qiime2 ? val_used_taxonomy : "", run_qiime2 && ( params.exclude_taxa != "none" || params.min_frequency != 1 || params.min_samples != 1 ) ? ch_dada2_asv.countLines()+","+QIIME2_FILTERTAXA.out.tsv.countLines() : "",