diff --git a/CHANGELOG.md b/CHANGELOG.md index b613e083..e1250b27 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,17 +7,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` -- [#417](https://github.com/nf-core/taxprofiler/pull/417) - Added reference-free metagenome complexity/coverage estimation with Nonpareil (added by @jfy133) -- [#466](https://github.com/nf-core/taxprofiler/pull/466) - Input database sheets can specify a `db_type` column to distinguish between short- and long-read databases (added by @LilyAnderssonLee) -- [#505](https://github.com/nf-core/taxprofiler/pull/505) - Add small files to the file `tower.yml` (added by @LilyAnderssonLee) -- [#508](https://github.com/nf-core/taxprofiler/pull/508) - Add `nanoq` as a filtering tool for nanopore reads (added by @LilyAnderssonLee) -- [#511](https://github.com/nf-core/taxprofiler/pull/511) - Add `porechop_abi` as an alternative adapter removal tool for long reads nanopore data (added by @LilyAnderssonLee) -- [#512](https://github.com/nf-core/taxprofiler/pull/512) - Update all tools to the latest version and include nf-test (updated by @LilyAnderssonLee & @jfy133) +- [#417](https://github.com/nf-core/taxprofiler/pull/417) Added reference-free metagenome complexity/coverage estimation with Nonpareil (added by @jfy133) +- [#466](https://github.com/nf-core/taxprofiler/pull/466) Input database sheets can specify a `db_type` column to distinguish between short- and long-read databases (added by @LilyAnderssonLee) +- [#505](https://github.com/nf-core/taxprofiler/pull/505) Add small files to the file `tower.yml` (added by @LilyAnderssonLee) +- [#508](https://github.com/nf-core/taxprofiler/pull/508) Add `nanoq` as a filtering tool for nanopore reads (added by @LilyAnderssonLee) +- [#511](https://github.com/nf-core/taxprofiler/pull/511) Add `porechop_abi` as an alternative adapter removal tool for long reads nanopore data (added by @LilyAnderssonLee) +- [#512](https://github.com/nf-core/taxprofiler/pull/512) Update all tools to the latest version and include nf-test (updated by @LilyAnderssonLee & @jfy133) +- [#512](https://github.com/nf-core/taxprofiler/pull/532) Configure MultiQC to collapse stats of paired-read files into one line (by @jfy133) ### `Fixed` - [#518](https://github.com/nf-core/taxprofiler/pull/518) Fixed a bug where Oxford Nanopore FASTA input files would not be processed (❤️ to @ikarls for reporting, fixed by @jfy133) - [#523](https://github.com/nf-core/taxprofiler/pull/523) Removed hardcoded `-m lca` from GANON_CLASSIFY due to more options in new version of ganon (fixed by @LilyAnderssonLee & @jfy133) +- [#531](https://github.com/nf-core/taxprofiler/pull/531) Fix FASTA input validation in schema allowing FASTQ extension, expand allowed FASTA extensions (fixed by @jfy133) +- [#512](https://github.com/nf-core/taxprofiler/pull/532) Minor formatting and ordering improvements in MultiQC report (by @jfy133) - [#532](https://github.com/nf-core/taxprofiler/pull/532) - Added missing documentation behind the 'ignore' BRACKEN_BRACKEN error strategy (❤️ to @Mavti for reporting, fixed by @jfy133) ### `Dependencies` @@ -35,7 +38,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | metaphlan | 4.0.6 | 4.1.1 | | minimap2 | 2.24 | 2.28 | | motus/profile | 3.0.3 | 3.1.0 | -| multiqc | 1.21 | 1.24.1 | +| multiqc | 1.21 | 1.25 | | samtools | 1.17 | 1.20 | ### `Deprecated` diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 82334688..73256fa4 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -18,41 +18,41 @@ report_section_order: fastqc-1: order: 800 fastp: + order: 750 + adapterremoval: order: 700 - adapterRemoval: - order: 600 nonpareil: + order: 600 + bbduk: order: 500 + prinseqplusplus: + order: 550 porechop: - order: 400 - porechop_abi: order: 450 - bbduk: - order: 300 - prinseqplusplus: - order: 200 + porechop_abi: + order: 400 filtlong: - order: 100 + order: 350 nanoq: - order: 95 + order: 300 bowtie2: - order: 90 + order: 200 samtools: - order: 80 + order: 100 kraken: - order: 70 + order: 90 bracken: - order: 60 + order: 80 centrifuge: - order: 50 + order: 70 malt: - order: 40 + order: 60 diamond: - order: 30 + order: 50 kaiju: - order: 20 + order: 40 motus: - order: 10 + order: 30 export_plots: true @@ -63,7 +63,7 @@ custom_logo_title: "nf-core/taxprofiler" run_modules: - fastqc - - adapterRemoval + - adapterremoval - fastp - nonpareil - bbduk @@ -72,7 +72,6 @@ run_modules: - filtlong - nanoq - bowtie2 - - minimap2 - samtools - kraken - kaiju @@ -83,7 +82,7 @@ run_modules: sp: diamond: - fn_re: ".*.diamond.log$" + fn: "*.diamond.log" fastqc/data: fn_re: ".*(fastqc|falco)_data.txt$" fastqc/zip: @@ -210,7 +209,8 @@ table_columns_placement: Filtlong: Target bases: 600 nanoq: - Read N50: 700 + Reads: 700 + Read N50: 710 BBDuk: Input reads: 800 Total Removed bases percent: 810 @@ -312,6 +312,7 @@ table_columns_visible: Target bases: True nanoq: ReadN50: True + Reads: True BBDuk: Input reads: False Total Removed bases Percent: False @@ -356,6 +357,17 @@ table_columns_name: reads_mapped: "Nr. Mapped Reads" reads_mapped_percent: "% Mapped Reads" +## Allow collapsing of file names with _R1/_R2 or _1/_2 at the end +table_sample_merge: + "Read 1": + - "_R1" + - type: regex + pattern: "[_.-][rR]?1$" + "Read 2": + - "_R2" + - type: regex + pattern: "[_.-][rR]?2$" + extra_fn_clean_exts: - "kraken2.report.txt" - ".txt" @@ -366,6 +378,7 @@ extra_fn_clean_exts: - "porechop" - "porechop_abi" - "_processed" + - ".diamond" - type: remove pattern: "_falco" diff --git a/assets/schema_input.json b/assets/schema_input.json index bfa0e6cd..0536930b 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -39,21 +39,21 @@ "format": "file-path", "pattern": "^\\S+\\.f(ast)?q\\.gz$", "unique": true, - "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + "errorMessage": "Gzipped FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" }, "fastq_2": { "type": "string", "format": "file-path", "pattern": "^\\S+\\.f(ast)?q\\.gz$", "unique": true, - "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'. If not applicable, leave it empty." + "errorMessage": "Gzipped FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'. If not applicable, leave it empty." }, "fasta": { "type": "string", "format": "file-path", - "pattern": "^\\S+\\.(f(ast)?q|fa(sta)?)\\.gz$", + "pattern": "^\\S+\\.(fasta|fas|fna|fa)\\.gz?$", "unique": true, - "errorMessage": "FastA file must be provided, cannot contain spaces and must have extension '.fa.gz' or '.fasta.gz'. If not applicable, leave it empty." + "errorMessage": "Gzipped FastA file must be provided, cannot contain spaces and must have extension '.fa.gz', 'fna.gz', 'fas.gz', or '.fasta.gz'. If not applicable, leave it empty." } }, "required": ["sample", "run_accession", "instrument_platform"] diff --git a/docs/output.md b/docs/output.md index 1b069b52..18b2c004 100644 --- a/docs/output.md +++ b/docs/output.md @@ -723,7 +723,17 @@ You can expect in the MultiQC reports either sections and/or general stats colum - motus :::info -The 'General Stats' table by default will only show statistics referring to pre-processing steps, and will not display possible values from each classifier/profiler, unless turned on by the user within the 'Configure Columns' menu or via a custom MultiQC config file (`--multiqc_config`) +The 'General Stats' table by default will only show statistics referring to pre-processing steps, and will not display possible values from each classifier/profiler, unless turned on by the user within the 'Configure Columns' menu or via a custom MultiQC config file (`--multiqc_config`). + +For example, DIAMOND output does not have a dedicated section in the MultiQC HTML, only in the general stats table. To turn this on, copy the nf-core/taxprofiler [MultiQC config](https://github.com/nf-core/taxprofiler/blob/master/assets/multiqc_config.yml) and change the DIAMOND entry in `table_columns_visible:` to True. +::: + +:::info +In the 'General Stats' table, files that end with `_R1/_R2` or `_1/_2` prior the file format extension will be collapsed into single rows. + +It is assumed that file names only differening by these characters are associated paired-end reads and stats should be reported together. + +For example `sample1_R1.fastq.gz` and `sample1_R2.fastq.gz` will be reported together as `sample1`, with R1/R2 specific stats included inside the collapsed row. ::: ### Pipeline information diff --git a/docs/usage.md b/docs/usage.md index 34ffcb46..68a98788 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -101,6 +101,11 @@ An [example samplesheet](../assets/samplesheet.csv) has been provided with the p FASTA input will not go through any preprocessing steps, and will go directly to profiling. ::: +:::warning +Files names prior the file format extension that include `_R1`/`_R2`, or `_1`/`_2` will be automatically be collapsed in the MultiQC report's General Stats table. +Please see output documentation for more information. +::: + ### Full database sheet nf-core/taxprofiler supports multiple databases being classified/profiled against in parallel for each tool. diff --git a/modules.json b/modules.json index 9c8ef6cc..f8f5f1e5 100644 --- a/modules.json +++ b/modules.json @@ -192,7 +192,7 @@ }, "multiqc": { "branch": "master", - "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", + "git_sha": "7c316cae26baf55e0add993bed2b0c9f7105c653", "installed_by": ["modules"] }, "nanoq": { diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml index f1cd99b0..0fe12644 100644 --- a/modules/nf-core/multiqc/environment.yml +++ b/modules/nf-core/multiqc/environment.yml @@ -2,4 +2,4 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::multiqc=1.24.1 + - bioconda::multiqc=1.25 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index ceaec139..b9ccebdb 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -3,8 +3,8 @@ process MULTIQC { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.24.1--pyhdfd78af_0' : - 'biocontainers/multiqc:1.24.1--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.25--pyhdfd78af_0' : + 'biocontainers/multiqc:1.25--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap index 83fa080c..b779e469 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test.snap +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -2,7 +2,7 @@ "multiqc_versions_single": { "content": [ [ - "versions.yml:md5,6eb13f3b11bbcbfc98ad3166420ff760" + "versions.yml:md5,8c8724363a5efe0c6f43ab34faa57efd" ] ], "meta": { @@ -17,7 +17,7 @@ "multiqc_report.html", "multiqc_data", "multiqc_plots", - "versions.yml:md5,6eb13f3b11bbcbfc98ad3166420ff760" + "versions.yml:md5,8c8724363a5efe0c6f43ab34faa57efd" ] ], "meta": { @@ -29,7 +29,7 @@ "multiqc_versions_config": { "content": [ [ - "versions.yml:md5,6eb13f3b11bbcbfc98ad3166420ff760" + "versions.yml:md5,8c8724363a5efe0c6f43ab34faa57efd" ] ], "meta": { diff --git a/subworkflows/local/nonpareil.nf b/subworkflows/local/nonpareil.nf index 3489ab09..6810eb4c 100644 --- a/subworkflows/local/nonpareil.nf +++ b/subworkflows/local/nonpareil.nf @@ -15,8 +15,9 @@ workflow NONPAREIL { .map { meta, reads -> def reads_new = meta.single_end ? reads : reads[0] + // taxprofiler only accepts gzipped input files, + // so don't need to account for getBaseName removing all extensions def format = reads_new[0].getBaseName().split('\\.').last() in ['fasta', 'fna', 'fa', 'fas'] ? 'fasta' : 'fastq' - [meta, reads_new, format] } .multiMap {