From ae985a0148f0b1debf3de42c6ed9b065a7f64d0b Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Mon, 10 Jul 2023 08:56:52 +0200 Subject: [PATCH] Add support for virus -e --- CHANGELOG.md | 5 +++-- conf/modules.config | 3 +++ nextflow.config | 5 +++-- nextflow_schema.json | 20 ++++++-------------- 4 files changed, 15 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6cafe8b6..013d2cc1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,9 +11,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#298](https://github.com/nf-core/taxprofiler/pull/298) Added [ganon](https://pirovc.github.io/ganon/) (added by @jfy133) - Additional functionality - [#276](https://github.com/nf-core/taxprofiler/pull/276) Implemented batching in the KrakenUniq samples processing (added by @Midnighter) - - [#272](https://github.com/nf-core/taxprofiler/pull/272) Add saving of final 'analysis-ready-reads' to dedicated directory (❤️ to @alexhbnr for reporting, added by @jfy133) - - [#303](https://github.com/nf-core/taxprofiler/pull/303) Add support for taxpasta profile standardisation in single sample pipeline runs (❤️ to @artur-matysik for reporting, added by @jfy133) + - [#272](https://github.com/nf-core/taxprofiler/pull/272) Add saving of final 'analysis-ready-reads' to dedicated directory (❤️ to @alexhbnr for request, added by @jfy133) + - [#303](https://github.com/nf-core/taxprofiler/pull/303) Add support for taxpasta profile standardisation in single sample pipeline runs (❤️ to @artur-matysik for request, added by @jfy133) - [#315](https://github.com/nf-core/taxprofiler/pull/315) Updated to nf-core pipeline template v2.9 (added by @sofstam & @jfy133) + - [#319](https://github.com/nf-core/taxprofiler/pull/319) Added support for virus hit expansion in Kaiju (❤️ to @dnlrxn for requesting, added by @jfy133) ### `Fixed` diff --git a/conf/modules.config b/conf/modules.config index 150f8218..2c64c656 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -582,6 +582,9 @@ process { } withName: 'KAIJU_KAIJU2TABLE_SINGLE' { + ext.args = {[ + params.kaiju_expand_viruses ? "-e" : "" + ].join(' ').trim() } ext.prefix = params.perform_runmerging ? { "${meta.id}_${meta.db_name}.kaijutable" } : { "${meta.id}_${meta.run_accession}_${meta.db_name}.kaijutable" } publishDir = [ path: { "${params.outdir}/kaiju/${meta.db_name}/" }, diff --git a/nextflow.config b/nextflow.config index 3d5e3827..1b4895c3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -15,7 +15,7 @@ params { genome = null igenomes_base = 's3://ngi-igenomes/igenomes' igenomes_ignore = false - + // MultiQC options multiqc_config = null @@ -42,7 +42,7 @@ params { custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" config_profile_contact = null config_profile_url = null - + // Max resource options // Defaults only, expecting to be overwritten @@ -141,6 +141,7 @@ params { // kaiju run_kaiju = false + kaiju_expand_viruses = false kaiju_taxon_rank = 'species' // diamond diff --git a/nextflow_schema.json b/nextflow_schema.json index e6db4f34..4124bf5e 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -106,21 +106,18 @@ }, "shortread_qc_adapter1": { "type": "string", - "default": "None", "fa_icon": "fas fa-grip-lines", "description": "Specify adapter 1 nucleotide sequence", "help_text": "Specify a custom forward or R1 adapter sequence to be removed from reads. \n\nIf not set, the selected short-read QC tool's defaults will be used.\n\n> Modifies tool parameter(s):\n> - fastp: `--adapter_sequence`. fastp default: `AGATCGGAAGAGCACACGTCTGAACTCCAGTCA`\n> - AdapterRemoval: `--adapter1`. AdapteRemoval2 default: `AGATCGGAAGAGCACACGTCTGAACTCCAGTCACNNNNNNATCTCGTATGCCGTCTTCTGCTTG`" }, "shortread_qc_adapter2": { "type": "string", - "default": "None", "fa_icon": "fas fa-grip-lines", "description": "Specify adapter 2 nucleotide sequence", "help_text": "Specify a custom reverse or R2 adapter sequence to be removed from reads. \n\nIf not set, the selected short-read QC tool's defaults will be used.\n\n> Modifies tool parameter(s):\n> - fastp: `--adapter_sequence`. fastp default: `AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT`\n> - AdapterRemoval: `--adapter1`. AdapteRemoval2 default: `AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT`" }, "shortread_qc_adapterlist": { "type": "string", - "default": "None", "description": "Specify a list of all possible adapters to trim. Overrides --shortread_qc_adapter1/2. Formats: .txt (AdapterRemoval) or .fasta. (fastp).", "help_text": "Allows to supply a file with a list of adapter (combinations) to remove from all files. \n\nOverrides the --shortread_qc_adapter1/--shortread_qc_adapter2 parameters . \n\nFor AdapterRemoval this consists of a two column table with a `.txt` extension: first column represents forward strand, second column for reverse strand. You must supply all possible combinations, one per line, and this list is applied to all files. See AdapterRemoval documentation for more information.\n\nFor fastp this consists of a standard FASTA format with a `.fasta`/`.fa`/`.fna`/`.fas` extension. The adapter sequence in this file should be at least 6bp long, otherwise it will be skipped. fastp trims the adapters present in the FASTA file one by one.\n\n> Modifies AdapterRemoval parameter: --adapter-list\n> Modifies fastp parameter: --adapter_fasta", "fa_icon": "fas fa-th-list" @@ -275,21 +272,18 @@ }, "hostremoval_reference": { "type": "string", - "default": "None", "fa_icon": "fas fa-file-alt", "description": "Specify path to single reference FASTA of host(s) genome(s)", "help_text": "Specify a path to the FASTA file (optionally gzipped) of the reference genome of the organism to be removed.\n\nIf you have two or more host organisms or contaminants you wish to remove, you can concatenate the FASTAs of the different taxa into a single one to provide to the pipeline." }, "shortread_hostremoval_index": { "type": "string", - "default": "None", "fa_icon": "fas fa-address-book", "description": "Specify path to the directory containing pre-made BowTie2 indexes of the host removal reference", "help_text": "Specify the path to a _directory_ containing pre-made Bowtie2 reference index files (i.e. the directory containing `.bt1`, `.bt2` files etc.). These should sit in the same directory alongside the the reference file specified in `--hostremoval_reference`.\n\nSpecifying premade indices can speed up runtime of the host-removal step, however if not supplied the pipeline will generate the indices for you." }, "longread_hostremoval_index": { "type": "string", - "default": "None", "fa_icon": "fas fa-address-book", "description": "Specify path to a pre-made Minimap2 index file (.mmi) of the host removal reference", "help_text": "Specify path to a pre-made Minimap2 index file (.mmi) of the host removal reference file given to `--hostremoval_reference`.\n\nSpecifying a premade index file can speed up runtime of the host-removal step, however if not supplied the pipeline will generate the indices for you." @@ -377,6 +371,12 @@ "fa_icon": "fas fa-toggle-on", "description": "Turn on profiling with Kaiju. Requires database to be present CSV file passed to --databases" }, + "kaiju_expand_viruses": { + "type": "boolean", + "description": "Turn on expanding of virus hits to individual viruses rather than aggregating at a taxonomic level.", + "help_text": "Turn on the reporting by Kaiju of viruses at specific virus levels, rather than aggregating at specific taxonomic levels as specified by `-- kaiju_taxon_rank` (i.e., read counts will not be summarised at higher taxonomic levels).\n\n> Modifies tool parameter(s):\n> - kaiju2table: `-e`", + "fa_icon": "fas fa-expand-arrows-alt" + }, "kaiju_taxon_rank": { "type": "string", "default": "species", @@ -571,7 +571,6 @@ }, "krona_taxonomy_directory": { "type": "string", - "default": "None", "fa_icon": "fas fa-folder-open", "description": "Specify path to krona taxonomy directories (required for MALT krona plots)", "help_text": "Specify a path to a Krona taxonomy database directory (i.e. a directory containing a krona generated `.tab` file).\n\nThis is only required for generating Krona plots of MALT output.\n\nNote this taxonomy database must be downloaded and generated with the `updateTaxonomy.sh` script from the krona-tools package." @@ -710,14 +709,12 @@ "type": "boolean", "description": "Display help text.", "fa_icon": "fas fa-question-circle", - "default": false, "hidden": true }, "version": { "type": "boolean", "description": "Display version and exit.", "fa_icon": "fas fa-question-circle", - "default": false, "hidden": true }, "publish_dir_mode": { @@ -741,7 +738,6 @@ "type": "boolean", "description": "Send plain-text email instead of HTML.", "fa_icon": "fas fa-remove-format", - "default": false, "hidden": true }, "max_multiqc_email_size": { @@ -756,7 +752,6 @@ "type": "boolean", "description": "Do not use coloured log outputs.", "fa_icon": "fas fa-palette", - "default": false, "hidden": true }, "hook_url": { @@ -795,7 +790,6 @@ "type": "boolean", "fa_icon": "far fa-eye-slash", "description": "Show all params when using `--help`", - "default": false, "hidden": true, "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." }, @@ -803,7 +797,6 @@ "type": "boolean", "fa_icon": "far fa-check-circle", "description": "Validation of parameters fails when an unrecognised parameter is found.", - "default": false, "hidden": true, "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." }, @@ -811,7 +804,6 @@ "type": "boolean", "fa_icon": "far fa-check-circle", "description": "Validation of parameters in lenient more.", - "default": false, "hidden": true, "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." }