From a0f0602ddef4bfc49952a0188be85015274a6a75 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Thu, 12 Oct 2023 19:10:07 +0100 Subject: [PATCH 1/6] Run nf-core modules update --all --- CHANGELOG.md | 8 +-- docs/usage.md | 6 +-- modules.json | 46 ++++++++-------- modules/nf-core/bbmap/bbsplit/main.nf | 1 + .../custom/dumpsoftwareversions/main.nf | 6 +-- .../dumpsoftwareversions/tests/main.nf.test | 37 +++++++++++++ .../tests/main.nf.test.snap | 27 ++++++++++ modules/nf-core/gunzip/main.nf | 10 ++-- modules/nf-core/hisat2/align/main.nf | 5 +- modules/nf-core/hisat2/build/main.nf | 6 +-- modules/nf-core/picard/markduplicates/main.nf | 4 ++ modules/nf-core/qualimap/rnaseq/main.nf | 2 +- modules/nf-core/qualimap/rnaseq/meta.yml | 52 +++++++++++++++++++ modules/nf-core/salmon/quant/main.nf | 4 +- modules/nf-core/salmon/quant/meta.yml | 5 +- modules/nf-core/samtools/flagstat/main.nf | 11 ++++ modules/nf-core/samtools/idxstats/main.nf | 12 +++++ modules/nf-core/star/align/main.nf | 4 +- modules/nf-core/star/align/meta.yml | 24 +++++++++ modules/nf-core/star/genomegenerate/main.nf | 8 +-- modules/nf-core/star/genomegenerate/meta.yml | 15 ++++++ modules/nf-core/ucsc/bedclip/meta.yml | 6 ++- modules/nf-core/ucsc/bedgraphtobigwig/main.nf | 20 +++++-- .../nf-core/ucsc/bedgraphtobigwig/meta.yml | 3 ++ modules/nf-core/umitools/dedup/main.nf | 23 ++++++-- modules/nf-core/umitools/dedup/meta.yml | 5 ++ .../meta.yml | 3 +- .../bam_markduplicates_picard/meta.yml | 3 +- .../nf-core/bam_sort_stats_samtools/meta.yml | 3 +- .../nf-core/bam_stats_samtools/meta.yml | 2 +- .../meta.yml | 2 +- .../nf-core/fastq_align_hisat2/meta.yml | 3 +- .../fastq_subsample_fq_salmon/meta.yml | 3 +- 33 files changed, 303 insertions(+), 66 deletions(-) create mode 100644 modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test create mode 100644 modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap create mode 100644 modules/nf-core/qualimap/rnaseq/meta.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index 7ed5e4e1f..307cdb809 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,9 +30,11 @@ Thank you to everyone else that has contributed by reporting bugs, enhancements ### Software dependencies -| Dependency | Old version | New version | -| ---------- | ----------- | ----------- | -| `fastqc` | 0.11.9 | 0.12.1 | +| Dependency | Old version | New version | +| ----------------------- | ----------- | ----------- | +| `fastqc` | 0.11.9 | 0.12.1 | +| `multiqc` | 1.14 | 1.15 | +| `ucsc-bedgraphtobigwig` | 377 | 445 | > **NB:** Dependency has been **updated** if both old and new version information is present. > diff --git a/docs/usage.md b/docs/usage.md index f006725cf..4d353159a 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -91,9 +91,9 @@ The `--umitools_grouping_method` parameter affects [how similar, but non-identic #### Examples: -| UMI type | Source | Pipeline parameters | -| ------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- | -| In read name | [Illumina BCL convert >3.7.5](https://emea.support.illumina.com/content/dam/illumina-support/documents/documentation/software_documentation/bcl_convert/bcl-convert-v3-7-5-software-guide-1000000163594-00.pdf) | `--with_umi --skip_umi_extract --umitools_umi_separator ":"` | +| UMI type | Source | Pipeline parameters | +| ------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- | +| In read name | [Illumina BCL convert >3.7.5](https://emea.support.illumina.com/content/dam/illumina-support/documents/documentation/software_documentation/bcl_convert/bcl-convert-v3-7-5-software-guide-1000000163594-00.pdf) | `--with_umi --skip_umi_extract --umitools_umi_separator ":"` | | In sequence | [Lexogen QuantSeq® 3’ mRNA-Seq V2 FWD](https://www.lexogen.com/quantseq-3mrna-sequencing) + [UMI Second Strand Synthesis Module](https://faqs.lexogen.com/faq/how-can-i-add-umis-to-my-quantseq-libraries) | `--with_umi --umitools_extract_method "regex" --umitools_bc_pattern "^(?P.{6})(?P.{4}).*"` | | In sequence | [Lexogen CORALL® Total RNA-Seq V1](https://www.lexogen.com/corall-total-rna-seq/)
> _mind [Appendix H](https://www.lexogen.com/wp-content/uploads/2020/04/095UG190V0130_CORALL-Total-RNA-Seq_2020-03-31.pdf) regarding optional trimming_ | `--with_umi --umitools_extract_method "regex" --umitools_bc_pattern "^(?P.{12}).*"`
Optional: `--clip_r2 9 --three_prime_clip_r2 12` | | In sequence | [Takara Bio SMARTer® Stranded Total RNA-Seq Kit v3](https://www.takarabio.com/documents/User%20Manual/SMARTer%20Stranded%20Total%20RNA/SMARTer%20Stranded%20Total%20RNA-Seq%20Kit%20v3%20-%20Pico%20Input%20Mammalian%20User%20Manual-a_114949.pdf) | `--with_umi --umitools_extract_method "regex" --umitools_bc_pattern2 "^(?P.{8})(?P.{6}).*"` | diff --git a/modules.json b/modules.json index bbe00aa0c..3ae59acc3 100644 --- a/modules.json +++ b/modules.json @@ -7,7 +7,7 @@ "nf-core": { "bbmap/bbsplit": { "branch": "master", - "git_sha": "e228790f2957152ad2534e39abd7b3878963e89d", + "git_sha": "de3e6fc949dcffb8d3508c015f435ace5773ff08", "installed_by": ["modules"] }, "cat/fastq": { @@ -17,7 +17,7 @@ }, "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "1b372269755a5c4a13c23bc130ebada8cb9d4cd0", "installed_by": ["modules"] }, "custom/getchromsizes": { @@ -47,17 +47,17 @@ }, "gunzip": { "branch": "master", - "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", + "git_sha": "e06548bfa36ee31869b81041879dd6b3a83b1d57", "installed_by": ["modules"] }, "hisat2/align": { "branch": "master", - "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", + "git_sha": "a1881f6374506f9e031b7af814768cdb44a6a7d3", "installed_by": ["fastq_align_hisat2"] }, "hisat2/build": { "branch": "master", - "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", + "git_sha": "f2f48836bf5c59434966a6c3b2211b29363f31ab", "installed_by": ["modules"] }, "hisat2/extractsplicesites": { @@ -67,7 +67,7 @@ }, "picard/markduplicates": { "branch": "master", - "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", + "git_sha": "2ee934606f1fdf7fc1cb05d6e8abc13bec8ab448", "installed_by": ["bam_markduplicates_picard"] }, "preseq/lcextrap": { @@ -77,7 +77,7 @@ }, "qualimap/rnaseq": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "4657d98bc9f565e067c4d924126ce107056f5e2f", "installed_by": ["modules"] }, "rsem/calculateexpression": { @@ -137,17 +137,17 @@ }, "salmon/quant": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "c5b528d0a51c31621b485ab3bcc008f483619ea6", "installed_by": ["modules", "fastq_subsample_fq_salmon"] }, "samtools/flagstat": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "570ec5bcfe19c49e16c9ca35a7a116563af6cc1c", "installed_by": ["bam_stats_samtools"] }, "samtools/idxstats": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "e662ab16e0c11f1e62983e21de9871f59371a639", "installed_by": ["bam_stats_samtools"] }, "samtools/index": { @@ -176,12 +176,12 @@ }, "star/align": { "branch": "master", - "git_sha": "57d75dbac06812c59798a48585032f6e50bb1914", + "git_sha": "cc08a888069f67cab8120259bddab8032d4c0fe3", "installed_by": ["modules"] }, "star/genomegenerate": { "branch": "master", - "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", + "git_sha": "cc08a888069f67cab8120259bddab8032d4c0fe3", "installed_by": ["modules"] }, "stringtie/stringtie": { @@ -201,17 +201,17 @@ }, "ucsc/bedclip": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "240937a2a9c30298110753292be041188891f2cb", "installed_by": ["bedgraph_bedclip_bedgraphtobigwig"] }, "ucsc/bedgraphtobigwig": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "66290981ab6038ea86177ade40b9449bc790b0ce", "installed_by": ["bedgraph_bedclip_bedgraphtobigwig"] }, "umitools/dedup": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "7297204bf49273300a3dbfa4b7a4027c8683f1bd", "installed_by": ["bam_dedup_stats_samtools_umitools"] }, "umitools/extract": { @@ -230,12 +230,12 @@ "nf-core": { "bam_dedup_stats_samtools_umitools": { "branch": "master", - "git_sha": "e228790f2957152ad2534e39abd7b3878963e89d", + "git_sha": "dedc0e31087f3306101c38835d051bf49789445a", "installed_by": ["subworkflows"] }, "bam_markduplicates_picard": { "branch": "master", - "git_sha": "a9784afdd5dcda23b84e64db75dc591065d64653", + "git_sha": "dedc0e31087f3306101c38835d051bf49789445a", "installed_by": ["subworkflows"] }, "bam_rseqc": { @@ -245,26 +245,26 @@ }, "bam_sort_stats_samtools": { "branch": "master", - "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", + "git_sha": "dedc0e31087f3306101c38835d051bf49789445a", "installed_by": ["fastq_align_hisat2"] }, "bam_stats_samtools": { "branch": "master", - "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", + "git_sha": "dedc0e31087f3306101c38835d051bf49789445a", "installed_by": [ - "bam_markduplicates_picard", "bam_sort_stats_samtools", + "bam_markduplicates_picard", "bam_dedup_stats_samtools_umitools" ] }, "bedgraph_bedclip_bedgraphtobigwig": { "branch": "master", - "git_sha": "a9784afdd5dcda23b84e64db75dc591065d64653", + "git_sha": "dedc0e31087f3306101c38835d051bf49789445a", "installed_by": ["subworkflows"] }, "fastq_align_hisat2": { "branch": "master", - "git_sha": "a9784afdd5dcda23b84e64db75dc591065d64653", + "git_sha": "dedc0e31087f3306101c38835d051bf49789445a", "installed_by": ["subworkflows"] }, "fastq_fastqc_umitools_fastp": { @@ -279,7 +279,7 @@ }, "fastq_subsample_fq_salmon": { "branch": "master", - "git_sha": "a9784afdd5dcda23b84e64db75dc591065d64653", + "git_sha": "dedc0e31087f3306101c38835d051bf49789445a", "installed_by": ["subworkflows"] } } diff --git a/modules/nf-core/bbmap/bbsplit/main.nf b/modules/nf-core/bbmap/bbsplit/main.nf index 67190ff1d..f82bb9e47 100644 --- a/modules/nf-core/bbmap/bbsplit/main.nf +++ b/modules/nf-core/bbmap/bbsplit/main.nf @@ -1,4 +1,5 @@ process BBMAP_BBSPLIT { + tag "$meta.id" label 'process_high' label 'error_retry' diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index ebc872733..c9d014b11 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -2,10 +2,10 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda "bioconda::multiqc=1.14" + conda "bioconda::multiqc=1.15" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.15--pyhdfd78af_0' : + 'biocontainers/multiqc:1.15--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test new file mode 100644 index 000000000..d528d3cb8 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test @@ -0,0 +1,37 @@ +nextflow_process { + + name "Test Process CUSTOM_DUMPSOFTWAREVERSIONS" + script "modules/nf-core/custom/dumpsoftwareversions/main.nf" + process "CUSTOM_DUMPSOFTWAREVERSIONS" + tag "custom" + tag "dumpsoftwareversions" + tag "custom/dumpsoftwareversions" + tag "modules_nfcore" + + test("Should run without failures") { + when { + process { + """ + def tool1_version = ''' + TOOL1: + tool1: 0.11.9 + '''.stripIndent() + + def tool2_version = ''' + TOOL2: + tool2: 1.9 + '''.stripIndent() + + input[0] = Channel.of(tool1_version, tool2_version).collectFile() + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap new file mode 100644 index 000000000..8713b9216 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap @@ -0,0 +1,27 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + "software_versions.yml:md5,a027f820f30b8191a20ca16465daaf37" + ], + "1": [ + "software_versions_mqc.yml:md5,ee4a1d028ad29987f9ac511f4668f17c" + ], + "2": [ + "versions.yml:md5,f47ebd22aba1dd987b7e5d5247b766c3" + ], + "mqc_yml": [ + "software_versions_mqc.yml:md5,ee4a1d028ad29987f9ac511f4668f17c" + ], + "versions": [ + "versions.yml:md5,f47ebd22aba1dd987b7e5d5247b766c3" + ], + "yml": [ + "software_versions.yml:md5,a027f820f30b8191a20ca16465daaf37" + ] + } + ], + "timestamp": "2023-10-11T17:10:02.930699" + } +} diff --git a/modules/nf-core/gunzip/main.nf b/modules/nf-core/gunzip/main.nf index e7189d2fb..73bf08cde 100644 --- a/modules/nf-core/gunzip/main.nf +++ b/modules/nf-core/gunzip/main.nf @@ -21,10 +21,14 @@ process GUNZIP { def args = task.ext.args ?: '' gunzip = archive.toString() - '.gz' """ - gunzip \\ - -f \\ + # Not calling gunzip itself because it creates files + # with the original group ownership rather than the + # default one for that user / the work directory + gzip \\ + -cd \\ $args \\ - $archive + $archive \\ + > $gunzip cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/hisat2/align/main.nf b/modules/nf-core/hisat2/align/main.nf index e69204692..db8e8bb6d 100644 --- a/modules/nf-core/hisat2/align/main.nf +++ b/modules/nf-core/hisat2/align/main.nf @@ -33,6 +33,7 @@ process HISAT2_ALIGN { } else if (meta.strandedness == 'reverse') { strandedness = meta.single_end ? '--rna-strandness R' : '--rna-strandness RF' } + ss = "$splicesites" ? "--known-splicesite-infile $splicesites" : '' def seq_center = params.seq_center ? "--rg-id ${prefix} --rg SM:$prefix --rg CN:${params.seq_center.replaceAll('\\s','_')}" : "--rg-id ${prefix} --rg SM:$prefix" if (meta.single_end) { def unaligned = params.save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : '' @@ -42,7 +43,7 @@ process HISAT2_ALIGN { -x \$INDEX \\ -U $reads \\ $strandedness \\ - --known-splicesite-infile $splicesites \\ + $ss \\ --summary-file ${prefix}.hisat2.summary.log \\ --threads $task.cpus \\ $seq_center \\ @@ -65,7 +66,7 @@ process HISAT2_ALIGN { -1 ${reads[0]} \\ -2 ${reads[1]} \\ $strandedness \\ - --known-splicesite-infile $splicesites \\ + $ss \\ --summary-file ${prefix}.hisat2.summary.log \\ --threads $task.cpus \\ $seq_center \\ diff --git a/modules/nf-core/hisat2/build/main.nf b/modules/nf-core/hisat2/build/main.nf index 5ad36a548..90f8efcc6 100644 --- a/modules/nf-core/hisat2/build/main.nf +++ b/modules/nf-core/hisat2/build/main.nf @@ -37,9 +37,9 @@ process HISAT2_BUILD { def hisat2_build_memory = params.hisat2_build_memory ? (params.hisat2_build_memory as nextflow.util.MemoryUnit).toGiga() : 0 if (avail_mem >= hisat2_build_memory) { log.info "[HISAT2 index build] At least ${hisat2_build_memory} GB available, so using splice sites and exons to build HISAT2 index" - extract_exons = "hisat2_extract_exons.py $gtf > ${gtf.baseName}.exons.txt" - ss = "--ss $splicesites" - exon = "--exon ${gtf.baseName}.exons.txt" + extract_exons = gtf ? "hisat2_extract_exons.py $gtf > ${gtf.baseName}.exons.txt" : "" + ss = splicesites ? "--ss $splicesites" : "" + exon = gtf ? "--exon ${gtf.baseName}.exons.txt" : "" } else { log.info "[HISAT2 index build] Less than ${hisat2_build_memory} GB available, so NOT using splice sites and exons to build HISAT2 index." log.info "[HISAT2 index build] Use --hisat2_build_memory [small number] to skip this check." diff --git a/modules/nf-core/picard/markduplicates/main.nf b/modules/nf-core/picard/markduplicates/main.nf index facd7efb9..ebfa0864d 100644 --- a/modules/nf-core/picard/markduplicates/main.nf +++ b/modules/nf-core/picard/markduplicates/main.nf @@ -30,6 +30,9 @@ process PICARD_MARKDUPLICATES { } else { avail_mem = (task.memory.mega*0.8).intValue() } + + if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ picard \\ -Xmx${avail_mem}M \\ @@ -48,6 +51,7 @@ process PICARD_MARKDUPLICATES { stub: def prefix = task.ext.prefix ?: "${meta.id}" + if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ touch ${prefix}.bam touch ${prefix}.bam.bai diff --git a/modules/nf-core/qualimap/rnaseq/main.nf b/modules/nf-core/qualimap/rnaseq/main.nf index 2c0e4105f..044c983fc 100644 --- a/modules/nf-core/qualimap/rnaseq/main.nf +++ b/modules/nf-core/qualimap/rnaseq/main.nf @@ -9,7 +9,7 @@ process QUALIMAP_RNASEQ { input: tuple val(meta), path(bam) - path gtf + tuple val(meta2), path(gtf) output: tuple val(meta), path("${prefix}"), emit: results diff --git a/modules/nf-core/qualimap/rnaseq/meta.yml b/modules/nf-core/qualimap/rnaseq/meta.yml new file mode 100644 index 000000000..7738f08d8 --- /dev/null +++ b/modules/nf-core/qualimap/rnaseq/meta.yml @@ -0,0 +1,52 @@ +name: qualimap_rnaseq +description: Evaluate alignment data +keywords: + - quality control + - qc + - rnaseq +tools: + - qualimap: + description: | + Qualimap 2 is a platform-independent application written in + Java and R that provides both a Graphical User Interface and + a command-line interface to facilitate the quality control of + alignment sequencing data and its derivatives like feature counts. + homepage: http://qualimap.bioinfo.cipf.es/ + documentation: http://qualimap.conesalab.org/doc_html/index.html + doi: 10.1093/bioinformatics/bts503 + licence: ["GPL-2.0-only"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file + pattern: "*.{bam}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - gtf: + type: file + description: GTF file of the reference genome + pattern: "*.{gtf}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - results: + type: directory + description: Qualimap results dir + pattern: "*/*" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@FriederikeHanssen" diff --git a/modules/nf-core/salmon/quant/main.nf b/modules/nf-core/salmon/quant/main.nf index e356af26d..f11b6c2e2 100644 --- a/modules/nf-core/salmon/quant/main.nf +++ b/modules/nf-core/salmon/quant/main.nf @@ -28,7 +28,9 @@ process SALMON_QUANT { prefix = task.ext.prefix ?: "${meta.id}" def reference = "--index $index" - def input_reads = meta.single_end ? "-r $reads" : "-1 ${reads[0]} -2 ${reads[1]}" + def reads1 = [], reads2 = [] + meta.single_end ? [reads].flatten().each{reads1 << it} : reads.eachWithIndex{ v, ix -> ( ix & 1 ? reads2 : reads1) << v } + def input_reads = meta.single_end ? "-r ${reads1.join(" ")}" : "-1 ${reads1.join(" ")} -2 ${reads2.join(" ")}" if (alignment_mode) { reference = "-t $transcript_fasta" input_reads = "-a $reads" diff --git a/modules/nf-core/salmon/quant/meta.yml b/modules/nf-core/salmon/quant/meta.yml index ea01e0df8..e809ade2b 100644 --- a/modules/nf-core/salmon/quant/meta.yml +++ b/modules/nf-core/salmon/quant/meta.yml @@ -22,8 +22,9 @@ input: - reads: type: file description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. + List of input FastQ files for single-end or paired-end data. + Multiple single-end fastqs or pairs of paired-end fastqs are + handled. - index: type: directory description: Folder containing the star index files diff --git a/modules/nf-core/samtools/flagstat/main.nf b/modules/nf-core/samtools/flagstat/main.nf index eb7e72fc6..b75707eca 100644 --- a/modules/nf-core/samtools/flagstat/main.nf +++ b/modules/nf-core/samtools/flagstat/main.nf @@ -32,4 +32,15 @@ process SAMTOOLS_FLAGSTAT { samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.flagstat + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/samtools/idxstats/main.nf b/modules/nf-core/samtools/idxstats/main.nf index a257d7002..83c7c34b9 100644 --- a/modules/nf-core/samtools/idxstats/main.nf +++ b/modules/nf-core/samtools/idxstats/main.nf @@ -33,4 +33,16 @@ process SAMTOOLS_IDXSTATS { samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.idxstats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/star/align/main.nf b/modules/nf-core/star/align/main.nf index 8cb8e9a4d..d0e203848 100644 --- a/modules/nf-core/star/align/main.nf +++ b/modules/nf-core/star/align/main.nf @@ -9,8 +9,8 @@ process STAR_ALIGN { input: tuple val(meta), path(reads, stageAs: "input*/*") - path index - path gtf + tuple val(meta2), path(index) + tuple val(meta3), path(gtf) val star_ignore_sjdbgtf val seq_platform val seq_center diff --git a/modules/nf-core/star/align/meta.yml b/modules/nf-core/star/align/meta.yml index bce16d360..3d8fed0cc 100644 --- a/modules/nf-core/star/align/meta.yml +++ b/modules/nf-core/star/align/meta.yml @@ -25,10 +25,34 @@ input: description: | List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively. + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] - index: type: directory description: STAR genome index pattern: "star" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - gtf: + type: file + description: Annotation GTF file + pattern: "*.{gtf}" + - star_ignore_sjdbgtf: + type: boolean + description: Ignore annotation GTF file + - seq_platform: + type: string + description: Sequencing platform + - seq_center: + type: string + description: Sequencing center + output: - bam: type: file diff --git a/modules/nf-core/star/genomegenerate/main.nf b/modules/nf-core/star/genomegenerate/main.nf index 2407d0066..434240427 100644 --- a/modules/nf-core/star/genomegenerate/main.nf +++ b/modules/nf-core/star/genomegenerate/main.nf @@ -8,12 +8,12 @@ process STAR_GENOMEGENERATE { 'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' }" input: - path fasta - path gtf + tuple val(meta), path(fasta) + tuple val(meta2), path(gtf) output: - path "star" , emit: index - path "versions.yml", emit: versions + tuple val(meta), path("star") , emit: index + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/star/genomegenerate/meta.yml b/modules/nf-core/star/genomegenerate/meta.yml index 8181157a1..eba2d9cf1 100644 --- a/modules/nf-core/star/genomegenerate/meta.yml +++ b/modules/nf-core/star/genomegenerate/meta.yml @@ -15,14 +15,29 @@ tools: doi: 10.1093/bioinformatics/bts635 licence: ["MIT"] input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] - fasta: type: file description: Fasta file of the reference genome + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] - gtf: type: file description: GTF file of the reference genome output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] - index: type: directory description: Folder containing the star index files diff --git a/modules/nf-core/ucsc/bedclip/meta.yml b/modules/nf-core/ucsc/bedclip/meta.yml index ca278552a..cc5d9d715 100755 --- a/modules/nf-core/ucsc/bedclip/meta.yml +++ b/modules/nf-core/ucsc/bedclip/meta.yml @@ -1,7 +1,9 @@ name: ucsc_bedclip -description: See http://hgdownload.cse.ucsc.edu/admin/exe/ +description: Remove lines from bed file that refer to off-chromosome locations. keywords: - - sort + - bed + - genomics + - ucsc tools: - ucsc: description: Remove lines from bed file that refer to off-chromosome locations. diff --git a/modules/nf-core/ucsc/bedgraphtobigwig/main.nf b/modules/nf-core/ucsc/bedgraphtobigwig/main.nf index 054924e7e..06bb47099 100644 --- a/modules/nf-core/ucsc/bedgraphtobigwig/main.nf +++ b/modules/nf-core/ucsc/bedgraphtobigwig/main.nf @@ -3,10 +3,10 @@ process UCSC_BEDGRAPHTOBIGWIG { label 'process_single' // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. - conda "bioconda::ucsc-bedgraphtobigwig=377" + conda "bioconda::ucsc-bedgraphtobigwig=445" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ucsc-bedgraphtobigwig:377--h446ed27_1' : - 'biocontainers/ucsc-bedgraphtobigwig:377--h446ed27_1' }" + 'https://depot.galaxyproject.org/singularity/ucsc-bedgraphtobigwig:445--h954228d_0' : + 'biocontainers/ucsc-bedgraphtobigwig:445--h954228d_0' }" input: tuple val(meta), path(bedgraph) @@ -22,7 +22,7 @@ process UCSC_BEDGRAPHTOBIGWIG { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def VERSION = '377' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def VERSION = '445' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ bedGraphToBigWig \\ $bedgraph \\ @@ -34,4 +34,16 @@ process UCSC_BEDGRAPHTOBIGWIG { ucsc: $VERSION END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '445' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + touch ${prefix}.bigWig + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ucsc: $VERSION + END_VERSIONS + """ } diff --git a/modules/nf-core/ucsc/bedgraphtobigwig/meta.yml b/modules/nf-core/ucsc/bedgraphtobigwig/meta.yml index ba8915bed..416c91e07 100755 --- a/modules/nf-core/ucsc/bedgraphtobigwig/meta.yml +++ b/modules/nf-core/ucsc/bedgraphtobigwig/meta.yml @@ -3,6 +3,9 @@ description: Convert a bedGraph file to bigWig format. keywords: - bedgraph - bigwig + - ucsc + - bedgraphtobigwig + - converter tools: - ucsc: description: Convert a bedGraph file to bigWig format. diff --git a/modules/nf-core/umitools/dedup/main.nf b/modules/nf-core/umitools/dedup/main.nf index 2bd95da7c..56ea04691 100644 --- a/modules/nf-core/umitools/dedup/main.nf +++ b/modules/nf-core/umitools/dedup/main.nf @@ -12,7 +12,8 @@ process UMITOOLS_DEDUP { val get_output_stats output: - tuple val(meta), path("*.bam") , emit: bam + tuple val(meta), path("${prefix}.bam") , emit: bam + tuple val(meta), path("*.log") , emit: log tuple val(meta), path("*edit_distance.tsv"), optional:true, emit: tsv_edit_distance tuple val(meta), path("*per_umi.tsv") , optional:true, emit: tsv_per_umi tuple val(meta), path("*per_position.tsv") , optional:true, emit: tsv_umi_per_position @@ -23,9 +24,10 @@ process UMITOOLS_DEDUP { script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + prefix = task.ext.prefix ?: "${meta.id}" def paired = meta.single_end ? "" : "--paired" - def stats = get_output_stats ? "--output-stats $prefix" : "" + stats = get_output_stats ? "--output-stats ${prefix}" : "" + if ("$bam" == "${prefix}.bam") error "Input and output names are the same, set prefix in module configuration to disambiguate!" if (!(args ==~ /.*--random-seed.*/)) {args += " --random-seed=100"} """ @@ -33,6 +35,7 @@ process UMITOOLS_DEDUP { dedup \\ -I $bam \\ -S ${prefix}.bam \\ + -L ${prefix}.log \\ $stats \\ $paired \\ $args @@ -42,4 +45,18 @@ process UMITOOLS_DEDUP { umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//') END_VERSIONS """ + + stub: + """ + touch ${prefix}.bam + touch ${prefix}.log + touch ${prefix}_edit_distance.tsv + touch ${prefix}_per_umi.tsv + touch ${prefix}_per_position.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/umitools/dedup/meta.yml b/modules/nf-core/umitools/dedup/meta.yml index 0719a9552..534d4c6b0 100644 --- a/modules/nf-core/umitools/dedup/meta.yml +++ b/modules/nf-core/umitools/dedup/meta.yml @@ -3,6 +3,7 @@ description: Deduplicate reads based on the mapping co-ordinate and the UMI atta keywords: - umitools - deduplication + - dedup tools: - umi_tools: description: > @@ -40,6 +41,10 @@ output: type: file description: BAM file with deduplicated UMIs. pattern: "*.{bam}" + - log: + type: file + description: File with logging information + pattern: "*.{log}" - tsv_edit_distance: type: file description: Reports the (binned) average edit distance between the UMIs at each position. diff --git a/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/meta.yml b/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/meta.yml index 3af412fab..f11e7ab6f 100644 --- a/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/meta.yml +++ b/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/meta.yml @@ -8,12 +8,13 @@ keywords: - bam - sam - cram -modules: +components: - umitools/dedup - samtools/index - samtools/stats - samtools/idxstats - samtools/flagstat + - bam_stats_samtools input: - ch_bam_bai: description: | diff --git a/subworkflows/nf-core/bam_markduplicates_picard/meta.yml b/subworkflows/nf-core/bam_markduplicates_picard/meta.yml index d5e716092..b924596d8 100644 --- a/subworkflows/nf-core/bam_markduplicates_picard/meta.yml +++ b/subworkflows/nf-core/bam_markduplicates_picard/meta.yml @@ -7,12 +7,13 @@ keywords: - sam - cram -modules: +components: - picard/markduplicates - samtools/index - samtools/stats - samtools/idxstats - samtools/flagstat + - bam_stats_samtools input: - ch_bam: diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml b/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml index 8dfbd58df..69c16be41 100644 --- a/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml +++ b/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml @@ -6,12 +6,13 @@ keywords: - bam - sam - cram -modules: +components: - samtools/sort - samtools/index - samtools/stats - samtools/idxstats - samtools/flagstat + - bam_stats_samtools input: - meta: type: map diff --git a/subworkflows/nf-core/bam_stats_samtools/meta.yml b/subworkflows/nf-core/bam_stats_samtools/meta.yml index b05086bc2..87863b11b 100644 --- a/subworkflows/nf-core/bam_stats_samtools/meta.yml +++ b/subworkflows/nf-core/bam_stats_samtools/meta.yml @@ -7,7 +7,7 @@ keywords: - bam - sam - cram -modules: +components: - samtools/stats - samtools/idxstats - samtools/flagstat diff --git a/subworkflows/nf-core/bedgraph_bedclip_bedgraphtobigwig/meta.yml b/subworkflows/nf-core/bedgraph_bedclip_bedgraphtobigwig/meta.yml index 8d3257773..5ad4db7db 100644 --- a/subworkflows/nf-core/bedgraph_bedclip_bedgraphtobigwig/meta.yml +++ b/subworkflows/nf-core/bedgraph_bedclip_bedgraphtobigwig/meta.yml @@ -6,7 +6,7 @@ keywords: - bigwig - clip - conversion -modules: +components: - ucsc/bedclip - ucsc/bedgraphtobigwig input: diff --git a/subworkflows/nf-core/fastq_align_hisat2/meta.yml b/subworkflows/nf-core/fastq_align_hisat2/meta.yml index 36c7c78ec..2b05beb68 100644 --- a/subworkflows/nf-core/fastq_align_hisat2/meta.yml +++ b/subworkflows/nf-core/fastq_align_hisat2/meta.yml @@ -10,11 +10,12 @@ keywords: - bam - sam - cram -modules: +components: - hisat2/align - samtools/stats - samtools/idxstats - samtools/flagstat + - bam_sort_stats_samtools input: - meta: type: map diff --git a/subworkflows/nf-core/fastq_subsample_fq_salmon/meta.yml b/subworkflows/nf-core/fastq_subsample_fq_salmon/meta.yml index db96312aa..d144269ba 100644 --- a/subworkflows/nf-core/fastq_subsample_fq_salmon/meta.yml +++ b/subworkflows/nf-core/fastq_subsample_fq_salmon/meta.yml @@ -5,9 +5,10 @@ keywords: - fastq - subsample - strandedness -modules: +components: - fq/subsample - salmon/quant + - salmon/index input: - meta: type: map From 92eba514c1aeaeacd0fce15b8425d6fe0c105212 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Thu, 12 Oct 2023 19:11:02 +0100 Subject: [PATCH 2/6] Bump MultiQC version from 1.14 -> 1.15 --- modules/local/multiqc/main.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/local/multiqc/main.nf b/modules/local/multiqc/main.nf index 6beb43310..44565a9f7 100644 --- a/modules/local/multiqc/main.nf +++ b/modules/local/multiqc/main.nf @@ -1,10 +1,10 @@ process MULTIQC { label 'process_medium' - conda "bioconda::multiqc=1.14" + conda "bioconda::multiqc=1.15" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.15--pyhdfd78af_0' : + 'biocontainers/multiqc:1.15--pyhdfd78af_0' }" input: path multiqc_config From 77145db2b1ebb095a8ef8319298bc2a60c827425 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Thu, 12 Oct 2023 19:46:01 +0100 Subject: [PATCH 3/6] Fix channel issues after updating nf-core/modules --- subworkflows/local/align_star/main.nf | 4 ++-- subworkflows/local/prepare_genome/main.nf | 2 +- workflows/rnaseq.nf | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/subworkflows/local/align_star/main.nf b/subworkflows/local/align_star/main.nf index 479ed2379..1dba0c085 100644 --- a/subworkflows/local/align_star/main.nf +++ b/subworkflows/local/align_star/main.nf @@ -9,8 +9,8 @@ include { BAM_SORT_STATS_SAMTOOLS } from '../../nf-core/bam_sort_stats_samtools' workflow ALIGN_STAR { take: reads // channel: [ val(meta), [ reads ] ] - index // channel: /path/to/star/index/ - gtf // channel: /path/to/genome.gtf + index // channel: [ val(meta), [ index ] ] + gtf // channel: [ val(meta), [ gtf ] ] star_ignore_sjdbgtf // boolean: when using pre-built STAR indices do not re-extract and use splice junctions from the GTF file seq_platform // string : sequencing platform seq_center // string : sequencing center diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf index 200957cda..eceb2c1ce 100644 --- a/subworkflows/local/prepare_genome/main.nf +++ b/subworkflows/local/prepare_genome/main.nf @@ -191,7 +191,7 @@ workflow PREPARE_GENOME { ch_star_index = STAR_GENOMEGENERATE_IGENOMES ( ch_fasta, ch_gtf ).index ch_versions = ch_versions.mix(STAR_GENOMEGENERATE_IGENOMES.out.versions) } else { - ch_star_index = STAR_GENOMEGENERATE ( ch_fasta, ch_gtf ).index + ch_star_index = STAR_GENOMEGENERATE ( ch_fasta.map { [ [:], it ] }, ch_gtf.map { [ [:], it ] } ).index.map { it[1] } ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions) } } diff --git a/workflows/rnaseq.nf b/workflows/rnaseq.nf index f2dd49a0b..955a727c3 100755 --- a/workflows/rnaseq.nf +++ b/workflows/rnaseq.nf @@ -382,8 +382,8 @@ workflow RNASEQ { if (!params.skip_alignment && params.aligner == 'star_salmon') { ALIGN_STAR ( ch_filtered_reads, - PREPARE_GENOME.out.star_index, - PREPARE_GENOME.out.gtf, + PREPARE_GENOME.out.star_index.map { [ [:], it ] }, + PREPARE_GENOME.out.gtf.map { [ [:], it ] }, params.star_ignore_sjdbgtf, '', params.seq_center ?: '', @@ -729,7 +729,7 @@ workflow RNASEQ { if (!params.skip_qualimap) { QUALIMAP_RNASEQ ( ch_genome_bam, - PREPARE_GENOME.out.gtf + PREPARE_GENOME.out.gtf.map { [ [:], it ] } ) ch_qualimap_multiqc = QUALIMAP_RNASEQ.out.results ch_versions = ch_versions.mix(QUALIMAP_RNASEQ.out.versions.first()) From e787b7ad94583ee610db3272ff7ce8d1c30c7823 Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Thu, 12 Oct 2023 21:24:41 +0000 Subject: [PATCH 4/6] [automated] Fix linting with Prettier --- docs/usage.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 4d353159a..f006725cf 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -91,9 +91,9 @@ The `--umitools_grouping_method` parameter affects [how similar, but non-identic #### Examples: -| UMI type | Source | Pipeline parameters | -| ------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- | -| In read name | [Illumina BCL convert >3.7.5](https://emea.support.illumina.com/content/dam/illumina-support/documents/documentation/software_documentation/bcl_convert/bcl-convert-v3-7-5-software-guide-1000000163594-00.pdf) | `--with_umi --skip_umi_extract --umitools_umi_separator ":"` | +| UMI type | Source | Pipeline parameters | +| ------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- | +| In read name | [Illumina BCL convert >3.7.5](https://emea.support.illumina.com/content/dam/illumina-support/documents/documentation/software_documentation/bcl_convert/bcl-convert-v3-7-5-software-guide-1000000163594-00.pdf) | `--with_umi --skip_umi_extract --umitools_umi_separator ":"` | | In sequence | [Lexogen QuantSeq® 3’ mRNA-Seq V2 FWD](https://www.lexogen.com/quantseq-3mrna-sequencing) + [UMI Second Strand Synthesis Module](https://faqs.lexogen.com/faq/how-can-i-add-umis-to-my-quantseq-libraries) | `--with_umi --umitools_extract_method "regex" --umitools_bc_pattern "^(?P.{6})(?P.{4}).*"` | | In sequence | [Lexogen CORALL® Total RNA-Seq V1](https://www.lexogen.com/corall-total-rna-seq/)
> _mind [Appendix H](https://www.lexogen.com/wp-content/uploads/2020/04/095UG190V0130_CORALL-Total-RNA-Seq_2020-03-31.pdf) regarding optional trimming_ | `--with_umi --umitools_extract_method "regex" --umitools_bc_pattern "^(?P.{12}).*"`
Optional: `--clip_r2 9 --three_prime_clip_r2 12` | | In sequence | [Takara Bio SMARTer® Stranded Total RNA-Seq Kit v3](https://www.takarabio.com/documents/User%20Manual/SMARTer%20Stranded%20Total%20RNA/SMARTer%20Stranded%20Total%20RNA-Seq%20Kit%20v3%20-%20Pico%20Input%20Mammalian%20User%20Manual-a_114949.pdf) | `--with_umi --umitools_extract_method "regex" --umitools_bc_pattern2 "^(?P.{8})(?P.{6}).*"` | From 12d8ac49b46ac13fe27fa73111ebbf4baf87d3d3 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Wed, 30 Aug 2023 09:45:25 +0100 Subject: [PATCH 5/6] Update usage.md for igenomes warning --- docs/usage.md | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index f006725cf..6081c90bd 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -135,9 +135,18 @@ If unique molecular identifiers were used to prepare the library, add the follow Please refer to the [nf-core website](https://nf-co.re/usage/reference_genomes) for general usage docs and guidelines regarding reference genomes. +:::warning +When using the --genome parameter (e.g. --genome GRCh37), you are referring to references from AWS-iGenomes. Please be aware that: + +- The igenomes file usage triggered by this option is outdated with respect to gene annotations. This can be particularly problematic for RNA-seq analysis, which relies on accurate gene annotation. +- Some iGenomes references (e.g., GRCh38) point to annotation files that use gene symbols as the primary identifier. This can cause issues for downstream analysis, such as the nf-core [differential abundance workflow](https://nf-co.re/differentialabundance) workflow. + +We recommend that you provide reference files directly, via `--gtf` and `--fasta`, and that supplied GTF files do not use gene names as `gene_id`. +::: + The minimum reference genome requirements for this pipeline are a FASTA and GTF file, all other files required to run the pipeline can be generated from these files. However, it is more storage and compute friendly if you are able to re-use reference genome files as efficiently as possible. It is recommended to use the `--save_reference` parameter if you are using the pipeline to build new indices (e.g. custom genomes that are unavailable on [AWS iGenomes](https://nf-co.re/usage/reference_genomes#custom-genomes)) so that you can save them somewhere locally. The index building step can be quite a time-consuming process and it permits their reuse for future runs of the pipeline to save disk space. You can then either provide the appropriate reference genome files on the command-line via the appropriate parameters (e.g. `--star_index '/path/to/STAR/index/'`) or via a custom config file. Another option is to run the pipeline once with `--save_reference --skip_alignment --skip_pseudo_alignment` to generate and save all of the required reference files and indices to the results directory. You can then move the reference files in `/genome/` to a more permanent location and use these paths to override the relevant parameters in the pipeline e.g. `--star_index`. -- If `--genome` is provided then the FASTA and GTF files (and existing indices) will be automatically obtained from AWS-iGenomes unless these have already been downloaded locally in the path specified by `--igenomes_base`. +- If `--genome` (discouraged- see warning above) is provided then the FASTA and GTF files (and existing indices) will be automatically obtained from AWS-iGenomes unless these have already been downloaded locally in the path specified by `--igenomes_base`. - If `--gff` is provided as input then this will be converted to a GTF file, or the latter will be used if both are provided. - If `--gene_bed` is not provided then it will be generated from the GTF file. - If `--additional_fasta` is provided then the features in this file (e.g. ERCC spike-ins) will be automatically concatenated onto both the reference FASTA file as well as the GTF annotation before building the appropriate indices. @@ -169,7 +178,13 @@ Please get in touch with us on the #rnaseq channel in the [nf-core Slack workspa The typical command for running the pipeline is as follows: ```bash -nextflow run nf-core/rnaseq --input --outdir --genome GRCh37 -profile docker +nextflow run \ + nf-core/rnaseq \ + --input \ + --outdir \ + --gtf Homo_sapiens.GRCh38.110.gtf.gz \ + --fasta Homo_sapiens.GRCh38.dna_sm.primary_assembly.fa.gz \ + -profile docker ``` This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. From 22aab0328b81f21234b0fc00cdfbfa91aa0f1035 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Fri, 13 Oct 2023 09:15:00 +0100 Subject: [PATCH 6/6] Fix merge conflicts --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 307cdb809..a235dd00f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ Thank you to everyone else that has contributed by reporting bugs, enhancements - [PR #1054](https://github.com/nf-core/rnaseq/pull/1054) - Template update to nf-core/tools v2.9 - [PR #1058](https://github.com/nf-core/rnaseq/pull/1058) - Use `nf-validation` plugin for parameter and samplesheet validation - [PR #1068](https://github.com/nf-core/rnaseq/pull/1068) - Update `grep` version for `untar` module +- [PR #1073](https://github.com/nf-core/rnaseq/pull/1073) - Update documentation to discourage use of `--genome` - [PR #1078](https://github.com/nf-core/rnaseq/pull/1078) - Updated pipeline template to [nf-core/tools 2.10](https://github.com/nf-core/tools/releases/tag/2.10) - [PR #1083](https://github.com/nf-core/rnaseq/pull/1083) - Move local modules and subworkflows to subfolders - [PR #1088](https://github.com/nf-core/rnaseq/pull/1088) - Updates contributing and code of conduct documents with nf-core template 2.10