From b6f2d69fbc8745ac4d3b62c43e0c2174538c5567 Mon Sep 17 00:00:00 2001 From: Adam Talbot <12817534+adamrtalbot@users.noreply.github.com> Date: Tue, 10 Oct 2023 09:52:29 +0100 Subject: [PATCH 1/2] Reorganise arguments for clearer syntax Changes: - Grouped arguments into sections based on _what they do_ - Reordered slightly to go in chronological order of the pipeline and set up - I think it's clearer for a new user Fixes #1090 --- CHANGELOG.md | 1 + nextflow_schema.json | 364 +++++++++++++++++++++++-------------------- 2 files changed, 194 insertions(+), 171 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c589534d3..b05afcc6a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ Thank you to everyone else that has contributed by reporting bugs, enhancements - [PR #1054](https://github.com/nf-core/rnaseq/pull/1054) - Template update to nf-core/tools v2.9 - [PR #1058](https://github.com/nf-core/rnaseq/pull/1058) - Use `nf-validation` plugin for parameter and samplesheet validation - [PR #1068](https://github.com/nf-core/rnaseq/pull/1068) - Update `grep` version for `untar` module +- [PR ####](https://github.com/nf-core/rnaseq/pull/####) - Reorganise command line for better usability ## [[3.12.0](https://github.com/nf-core/rnaseq/releases/tag/3.12.0)] - 2023-06-02 diff --git a/nextflow_schema.json b/nextflow_schema.json index e3f16078a..3877cd533 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -40,137 +40,9 @@ "type": "string", "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", "fa_icon": "fas fa-file-signature" - }, - "save_merged_fastq": { - "type": "boolean", - "fa_icon": "fas fa-save", - "description": "Save FastQ files after merging re-sequenced libraries in the results directory." } } }, - "umi_options": { - "title": "UMI options", - "type": "object", - "description": "Options for processing reads with unique molecular identifiers", - "default": "", - "properties": { - "with_umi": { - "type": "boolean", - "fa_icon": "fas fa-barcode", - "description": "Enable UMI-based read deduplication." - }, - "umitools_extract_method": { - "type": "string", - "default": "string", - "fa_icon": "fas fa-barcode", - "description": "UMI pattern to use. Can be either 'string' (default) or 'regex'.", - "help_text": "More details can be found in the [UMI-tools documentation](https://umi-tools.readthedocs.io/en/latest/reference/extract.html#extract-method).\n" - }, - "skip_umi_extract": { - "type": "boolean", - "fa_icon": "fas fa-compress-alt", - "description": "Skip the UMI extraction from the read in case the UMIs have been moved to the headers in advance of the pipeline run." - }, - "umitools_bc_pattern": { - "type": "string", - "fa_icon": "fas fa-barcode", - "help_text": "More details can be found in the [UMI-tools documentation](https://umi-tools.readthedocs.io/en/latest/reference/extract.html#extract-method).", - "description": "The UMI barcode pattern to use e.g. 'NNNNNN' indicates that the first 6 nucleotides of the read are from the UMI." - }, - "umitools_bc_pattern2": { - "type": "string", - "fa_icon": "fas fa-barcode", - "description": "The UMI barcode pattern to use if the UMI is located in read 2." - }, - "umi_discard_read": { - "type": "integer", - "fa_icon": "fas fa-barcode", - "description": "After UMI barcode extraction discard either R1 or R2 by setting this parameter to 1 or 2, respectively." - }, - "umitools_umi_separator": { - "type": "string", - "fa_icon": "fas fa-star-half-alt", - "description": "The character that separates the UMI in the read name. Most likely a colon if you skipped the extraction with UMI-tools and used other software." - }, - "umitools_grouping_method": { - "type": "string", - "default": "directional", - "fa_icon": "far fa-object-ungroup", - "description": "Method to use to determine read groups by subsuming those with similar UMIs. All methods start by identifying the reads with the same mapping position, but treat similar yet nonidentical UMIs differently.", - "enum": ["unique", "percentile", "cluster", "adjacency", "directional"] - }, - "umitools_dedup_stats": { - "type": "boolean", - "fa_icon": "fas fa-barcode", - "help_text": "It can be quite time consuming generating these output stats - see [#827](https://github.com/nf-core/rnaseq/issues/827).", - "description": "Generate output stats when running \"umi_tools dedup\"." - }, - "save_umi_intermeds": { - "type": "boolean", - "fa_icon": "fas fa-save", - "description": "If this option is specified, intermediate FastQ and BAM files produced by UMI-tools are also saved in the results directory." - } - }, - "fa_icon": "fas fa-barcode" - }, - "read_filtering_options": { - "title": "Read filtering options", - "type": "object", - "description": "Options for filtering reads prior to alignment", - "default": "", - "properties": { - "bbsplit_fasta_list": { - "type": "string", - "format": "file-path", - "exists": true, - "mimetype": "text/plain", - "fa_icon": "fas fa-list-alt", - "description": "Path to comma-separated file containing a list of reference genomes to filter reads against with BBSplit. You have to also explicitly set `--skip_bbsplit false` if you want to use BBSplit.", - "help_text": "The file should contain 2 columns: short name and full path to reference genome(s) e.g. \n```\nmm10,/path/to/mm10.fa\necoli,/path/to/ecoli.fa\n```" - }, - "bbsplit_index": { - "type": "string", - "format": "path", - "exists": true, - "fa_icon": "fas fa-bezier-curve", - "description": "Path to directory or tar.gz archive for pre-built BBSplit index.", - "help_text": "The BBSplit index will have to be built at least once with this pipeline (see `--save_reference` to save index). It can then be provided via `--bbsplit_index` for future runs." - }, - "save_bbsplit_reads": { - "type": "boolean", - "fa_icon": "fas fa-save", - "description": "If this option is specified, FastQ files split by reference will be saved in the results directory." - }, - "skip_bbsplit": { - "type": "boolean", - "default": true, - "fa_icon": "fas fa-fast-forward", - "description": "Skip BBSplit for removal of non-reference genome reads." - }, - "remove_ribo_rna": { - "type": "boolean", - "fa_icon": "fas fa-trash-alt", - "description": "Enable the removal of reads derived from ribosomal RNA using SortMeRNA.", - "help_text": "Any patterns found in the sequences defined by the '--ribo_database_manifest' parameter will be used." - }, - "ribo_database_manifest": { - "type": "string", - "format": "file-path", - "exists": true, - "mimetype": "text/plain", - "default": "${projectDir}/assets/rrna-db-defaults.txt", - "fa_icon": "fas fa-database", - "description": "Text file containing paths to fasta files (one per line) that will be used to create the database for SortMeRNA.", - "help_text": "By default, [rRNA databases](https://github.com/biocore/sortmerna/tree/master/data/rRNA_databases) defined in the SortMeRNA GitHub repo are used. You can see an example in the pipeline Github repository in `assets/rrna-default-dbs.txt`.\nPlease note that commercial/non-academic entities require [`licensing for SILVA`](https://www.arb-silva.de/silva-license-information) for these default databases." - }, - "save_non_ribo_reads": { - "type": "boolean", - "fa_icon": "fas fa-save", - "description": "If this option is specified, intermediate FastQ files containing non-rRNA reads will be saved in the results directory." - } - }, - "fa_icon": "fas fa-trash-alt" - }, "reference_genome_options": { "title": "Reference genome options", "type": "object", @@ -317,12 +189,6 @@ "fa_icon": "fas fa-indent", "help_text": "The feature type used from the GTF file when generating the biotype plot with featureCounts." }, - "save_reference": { - "type": "boolean", - "description": "If generated by the pipeline save the STAR index in the results directory.", - "help_text": "If an alignment index is generated by the pipeline use this parameter to save it to your results folder. These can then be used for future pipeline runs, reducing processing times.", - "fa_icon": "fas fa-save" - }, "igenomes_base": { "type": "string", "format": "directory-path", @@ -369,20 +235,105 @@ "default": 10000, "fa_icon": "fas fa-hand-paper", "description": "Minimum number of trimmed reads below which samples are removed from further processing. Some downstream steps in the pipeline will fail if this threshold is too low." + } + } + }, + "read_filtering_options": { + "title": "Read filtering options", + "type": "object", + "description": "Options for filtering reads prior to alignment", + "default": "", + "properties": { + "bbsplit_fasta_list": { + "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", + "fa_icon": "fas fa-list-alt", + "description": "Path to comma-separated file containing a list of reference genomes to filter reads against with BBSplit. You have to also explicitly set `--skip_bbsplit false` if you want to use BBSplit.", + "help_text": "The file should contain 2 columns: short name and full path to reference genome(s) e.g. \n```\nmm10,/path/to/mm10.fa\necoli,/path/to/ecoli.fa\n```" }, - "skip_trimming": { + "bbsplit_index": { + "type": "string", + "format": "path", + "exists": true, + "fa_icon": "fas fa-bezier-curve", + "description": "Path to directory or tar.gz archive for pre-built BBSplit index.", + "help_text": "The BBSplit index will have to be built at least once with this pipeline (see `--save_reference` to save index). It can then be provided via `--bbsplit_index` for future runs." + }, + "remove_ribo_rna": { "type": "boolean", - "description": "Skip the adapter trimming step.", - "help_text": "Use this if your input FastQ files have already been trimmed outside of the workflow or if you're very confident that there is no adapter contamination in your data.", - "fa_icon": "fas fa-fast-forward" + "fa_icon": "fas fa-trash-alt", + "description": "Enable the removal of reads derived from ribosomal RNA using SortMeRNA.", + "help_text": "Any patterns found in the sequences defined by the '--ribo_database_manifest' parameter will be used." }, - "save_trimmed": { + "ribo_database_manifest": { + "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", + "default": "${projectDir}/assets/rrna-db-defaults.txt", + "fa_icon": "fas fa-database", + "description": "Text file containing paths to fasta files (one per line) that will be used to create the database for SortMeRNA.", + "help_text": "By default, [rRNA databases](https://github.com/biocore/sortmerna/tree/master/data/rRNA_databases) defined in the SortMeRNA GitHub repo are used. You can see an example in the pipeline Github repository in `assets/rrna-default-dbs.txt`.\nPlease note that commercial/non-academic entities require [`licensing for SILVA`](https://www.arb-silva.de/silva-license-information) for these default databases." + } + }, + "fa_icon": "fas fa-trash-alt" + }, + "umi_options": { + "title": "UMI options", + "type": "object", + "description": "Options for processing reads with unique molecular identifiers", + "default": "", + "properties": { + "with_umi": { "type": "boolean", - "description": "Save the trimmed FastQ files in the results directory.", - "help_text": "By default, trimmed FastQ files will not be saved to the results directory. Specify this flag (or set to true in your config file) to copy these files to the results directory when complete.", - "fa_icon": "fas fa-save" + "fa_icon": "fas fa-barcode", + "description": "Enable UMI-based read deduplication." + }, + "umitools_extract_method": { + "type": "string", + "default": "string", + "fa_icon": "fas fa-barcode", + "description": "UMI pattern to use. Can be either 'string' (default) or 'regex'.", + "help_text": "More details can be found in the [UMI-tools documentation](https://umi-tools.readthedocs.io/en/latest/reference/extract.html#extract-method).\n" + }, + "umitools_bc_pattern": { + "type": "string", + "fa_icon": "fas fa-barcode", + "help_text": "More details can be found in the [UMI-tools documentation](https://umi-tools.readthedocs.io/en/latest/reference/extract.html#extract-method).", + "description": "The UMI barcode pattern to use e.g. 'NNNNNN' indicates that the first 6 nucleotides of the read are from the UMI." + }, + "umitools_bc_pattern2": { + "type": "string", + "fa_icon": "fas fa-barcode", + "description": "The UMI barcode pattern to use if the UMI is located in read 2." + }, + "umi_discard_read": { + "type": "integer", + "fa_icon": "fas fa-barcode", + "description": "After UMI barcode extraction discard either R1 or R2 by setting this parameter to 1 or 2, respectively." + }, + "umitools_umi_separator": { + "type": "string", + "fa_icon": "fas fa-star-half-alt", + "description": "The character that separates the UMI in the read name. Most likely a colon if you skipped the extraction with UMI-tools and used other software." + }, + "umitools_grouping_method": { + "type": "string", + "default": "directional", + "fa_icon": "far fa-object-ungroup", + "description": "Method to use to determine read groups by subsuming those with similar UMIs. All methods start by identifying the reads with the same mapping position, but treat similar yet nonidentical UMIs differently.", + "enum": ["unique", "percentile", "cluster", "adjacency", "directional"] + }, + "umitools_dedup_stats": { + "type": "boolean", + "fa_icon": "fas fa-barcode", + "help_text": "It can be quite time consuming generating these output stats - see [#827](https://github.com/nf-core/rnaseq/issues/827).", + "description": "Generate output stats when running \"umi_tools dedup\"." } - } + }, + "fa_icon": "fas fa-barcode" }, "alignment_options": { "title": "Alignment options", @@ -445,54 +396,119 @@ "type": "string", "description": "Extra arguments to pass to Salmon quant command in addition to defaults defined by the pipeline.", "fa_icon": "fas fa-plus" + } + } + }, + "optional_outputs": { + "title": "Optional outputs", + "type": "object", + "description": "Additional output files produces as intermediates that can be saved", + "default": "", + "properties": { + "save_merged_fastq": { + "type": "boolean", + "fa_icon": "fas fa-save", + "description": "Save FastQ files after merging re-sequenced libraries in the results directory." }, - "save_unaligned": { + "save_umi_intermeds": { "type": "boolean", "fa_icon": "fas fa-save", - "description": "Where possible, save unaligned reads from either STAR, HISAT2 or Salmon to the results directory.", - "help_text": "This may either be in the form of FastQ or BAM files depending on the options available for that particular tool." + "description": "If this option is specified, intermediate FastQ and BAM files produced by UMI-tools are also saved in the results directory." }, - "save_align_intermeds": { + "save_non_ribo_reads": { "type": "boolean", - "description": "Save the intermediate BAM files from the alignment step.", - "help_text": "By default, intermediate BAM files will not be saved. The final BAM files created after the appropriate filtering step are always saved to limit storage usage. Set this parameter to also save other intermediate BAM files.", + "fa_icon": "fas fa-save", + "description": "If this option is specified, intermediate FastQ files containing non-rRNA reads will be saved in the results directory." + }, + "save_bbsplit_reads": { + "type": "boolean", + "fa_icon": "fas fa-save", + "description": "If this option is specified, FastQ files split by reference will be saved in the results directory." + }, + "save_reference": { + "type": "boolean", + "description": "If generated by the pipeline save the STAR index in the results directory.", + "help_text": "If an alignment index is generated by the pipeline use this parameter to save it to your results folder. These can then be used for future pipeline runs, reducing processing times.", "fa_icon": "fas fa-save" }, - "skip_markduplicates": { + "save_trimmed": { "type": "boolean", - "fa_icon": "fas fa-fast-forward", - "description": "Skip picard MarkDuplicates step." + "description": "Save the trimmed FastQ files in the results directory.", + "help_text": "By default, trimmed FastQ files will not be saved to the results directory. Specify this flag (or set to true in your config file) to copy these files to the results directory when complete.", + "fa_icon": "fas fa-save" }, - "skip_alignment": { + "save_align_intermeds": { "type": "boolean", - "fa_icon": "fas fa-fast-forward", - "description": "Skip all of the alignment-based processes within the pipeline." + "description": "Save the intermediate BAM files from the alignment step.", + "help_text": "By default, intermediate BAM files will not be saved. The final BAM files created after the appropriate filtering step are always saved to limit storage usage. Set this parameter to also save other intermediate BAM files.", + "fa_icon": "fas fa-save" }, - "skip_pseudo_alignment": { + "save_unaligned": { "type": "boolean", - "fa_icon": "fas fa-fast-forward", - "description": "Skip all of the pseudo-alignment-based processes within the pipeline." + "fa_icon": "fas fa-save", + "description": "Where possible, save unaligned reads from either STAR, HISAT2 or Salmon to the results directory.", + "help_text": "This may either be in the form of FastQ or BAM files depending on the options available for that particular tool." } } }, - "process_skipping_options": { - "title": "Process skipping options", + "quality_control": { + "title": "Quality Control", "type": "object", - "fa_icon": "fas fa-fast-forward", - "description": "Options to skip various steps within the workflow.", + "description": "Additional quality control options.", + "default": "", "properties": { + "deseq2_vst": { + "type": "boolean", + "description": "Use vst transformation instead of rlog with DESeq2.", + "help_text": "See [DESeq2 docs](http://bioconductor.org/packages/devel/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#data-transformations-and-visualization).", + "fa_icon": "fas fa-dolly", + "default": true + }, "rseqc_modules": { "type": "string", "default": "bam_stat,inner_distance,infer_experiment,junction_annotation,junction_saturation,read_distribution,read_duplication", "fa_icon": "fas fa-chart-pie", "description": "Specify the RSeQC modules to run." + } + } + }, + "process_skipping_options": { + "title": "Process skipping options", + "type": "object", + "fa_icon": "fas fa-fast-forward", + "description": "Options to skip various steps within the workflow.", + "properties": { + "skip_bbsplit": { + "type": "boolean", + "default": true, + "fa_icon": "fas fa-fast-forward", + "description": "Skip BBSplit for removal of non-reference genome reads." }, - "deseq2_vst": { + "skip_umi_extract": { "type": "boolean", - "description": "Use vst transformation instead of rlog with DESeq2.", - "help_text": "See [DESeq2 docs](http://bioconductor.org/packages/devel/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#data-transformations-and-visualization).", - "fa_icon": "fas fa-dolly", - "default": true + "fa_icon": "fas fa-compress-alt", + "description": "Skip the UMI extraction from the read in case the UMIs have been moved to the headers in advance of the pipeline run." + }, + "skip_trimming": { + "type": "boolean", + "description": "Skip the adapter trimming step.", + "help_text": "Use this if your input FastQ files have already been trimmed outside of the workflow or if you're very confident that there is no adapter contamination in your data.", + "fa_icon": "fas fa-fast-forward" + }, + "skip_alignment": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip all of the alignment-based processes within the pipeline." + }, + "skip_pseudo_alignment": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip all of the pseudo-alignment-based processes within the pipeline." + }, + "skip_markduplicates": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip picard MarkDuplicates step." }, "skip_bigwig": { "type": "boolean", @@ -766,20 +782,26 @@ "$ref": "#/definitions/input_output_options" }, { - "$ref": "#/definitions/umi_options" + "$ref": "#/definitions/reference_genome_options" }, { - "$ref": "#/definitions/read_filtering_options" + "$ref": "#/definitions/read_trimming_options" }, { - "$ref": "#/definitions/reference_genome_options" + "$ref": "#/definitions/read_filtering_options" }, { - "$ref": "#/definitions/read_trimming_options" + "$ref": "#/definitions/umi_options" }, { "$ref": "#/definitions/alignment_options" }, + { + "$ref": "#/definitions/optional_outputs" + }, + { + "$ref": "#/definitions/quality_control" + }, { "$ref": "#/definitions/process_skipping_options" }, From dd45b04072f1f82601701926187f58e4097e5721 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Wed, 11 Oct 2023 21:14:38 +0200 Subject: [PATCH 2/2] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b05afcc6a..769b06625 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,7 +23,7 @@ Thank you to everyone else that has contributed by reporting bugs, enhancements - [PR #1054](https://github.com/nf-core/rnaseq/pull/1054) - Template update to nf-core/tools v2.9 - [PR #1058](https://github.com/nf-core/rnaseq/pull/1058) - Use `nf-validation` plugin for parameter and samplesheet validation - [PR #1068](https://github.com/nf-core/rnaseq/pull/1068) - Update `grep` version for `untar` module -- [PR ####](https://github.com/nf-core/rnaseq/pull/####) - Reorganise command line for better usability +- [PR #1091](https://github.com/nf-core/rnaseq/pull/1091) - Reorganise parameters in schema for better usability ## [[3.12.0](https://github.com/nf-core/rnaseq/releases/tag/3.12.0)] - 2023-06-02