diff --git a/conf/modules.config b/conf/modules.config index 3c31fb469..aa067b7b1 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -10,1186 +10,74 @@ ---------------------------------------------------------------------------------------- */ -def rseqc_modules = params.rseqc_modules ? params.rseqc_modules.split(',').collect{ it.trim().toLowerCase() } : [] - -// -// General configuration options -// - process { publishDir = [ path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - - withName: 'SAMPLESHEET_CHECK' { - publishDir = [ - path: { "${params.outdir}/pipeline_info" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: 'CUSTOM_DUMPSOFTWAREVERSIONS' { - publishDir = [ - path: { "${params.outdir}/pipeline_info" }, - mode: params.publish_dir_mode, - pattern: '*_versions.yml' - ] - } -} - -// -// Genome preparation options -// - -process { - withName: 'GUNZIP_.*|MAKE_TRANSCRIPTS_FASTA' { - publishDir = [ - path: { "${params.outdir}/genome" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference - ] - } - - withName: 'UNTAR_.*' { - ext.args2 = '--no-same-owner' - } - - withName: 'UNTAR_.*|STAR_GENOMEGENERATE|STAR_GENOMEGENERATE_IGENOMES|HISAT2_BUILD' { - publishDir = [ - path: { "${params.outdir}/genome/index" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference - ] - } - - withName: 'GFFREAD' { - ext.args = '--keep-exon-attrs -F -T' - publishDir = [ - path: { "${params.outdir}/genome" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference - ] - } - - withName: 'HISAT2_EXTRACTSPLICESITES' { - publishDir = [ - path: { "${params.outdir}/genome/index" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference - ] - } - - withName: 'SALMON_INDEX' { - ext.args = params.gencode ? '--gencode' : '' - publishDir = [ - path: { "${params.outdir}/genome/index" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference - ] - } - - withName: 'RSEM_PREPAREREFERENCE_GENOME' { - ext.args = '--star' - publishDir = [ - path: { "${params.outdir}/genome/index" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference - ] - } - - withName: 'GTF2BED' { - publishDir = [ - path: { "${params.outdir}/genome" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference - ] - } - - withName: 'CAT_ADDITIONAL_FASTA|PREPROCESS_TRANSCRIPTS_FASTA_GENCODE' { - publishDir = [ - path: { "${params.outdir}/genome" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference - ] - } - - withName: 'GTF_GENE_FILTER' { - publishDir = [ - path: { "${params.outdir}/genome" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference - ] - } - - withName: 'CUSTOM_GETCHROMSIZES' { - publishDir = [ - path: { "${params.outdir}/genome" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference - ] - } - - withName: 'CAT_FASTQ' { - publishDir = [ - path: { "${params.outdir}/fastq" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_merged_fastq - ] - } -} - -if (!params.skip_bbsplit && params.bbsplit_fasta_list) { - process { - withName: '.*:PREPARE_GENOME:BBMAP_BBSPLIT' { - ext.args = 'build=1' - publishDir = [ - path: { "${params.outdir}/genome/index" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference - ] - } - } -} - -// -// Read subsampling and strand inferring options -// - -process { - withName: 'FQ_SUBSAMPLE' { - ext.args = '--record-count 1000000 --seed 1' - ext.prefix = { "${meta.id}.subsampled" } - publishDir = [ - path: { "${params.outdir}/sample_fastq/fastq" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: false - ] - } - - withName: '.*:FASTQ_SUBSAMPLE_FQ_SALMON:SALMON_QUANT' { - ext.args = '--skipQuant' - publishDir = [ - path: { "${params.outdir}/sample_fastq/salmon" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') || filename.endsWith('_meta_info.json') ? null : filename }, - enabled: false - ] - } -} - -// -// Read QC and trimming options -// - -if (!(params.skip_fastqc || params.skip_qc)) { - if (params.trimmer == 'trimgalore') { - process { - withName: '.*:FASTQ_FASTQC_UMITOOLS_TRIMGALORE:FASTQC' { - ext.args = '--quiet' - } - } - } - - if (params.trimmer == 'fastp') { - process { - withName: '.*:FASTQ_FASTQC_UMITOOLS_FASTP:FASTQC_RAW' { - ext.args = '--quiet' - } - - withName: '.*:FASTQ_FASTQC_UMITOOLS_FASTP:FASTQC_TRIM' { - ext.args = '--quiet' - publishDir = [ - path: { "${params.outdir}/${params.trimmer}/fastqc" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - } - } -} - -if (!params.skip_trimming) { - if (params.trimmer == 'trimgalore') { - process { - withName: '.*:FASTQ_FASTQC_UMITOOLS_TRIMGALORE:TRIMGALORE' { - ext.args = { - [ - "--fastqc_args '-t ${task.cpus}'", - params.extra_trimgalore_args ? params.extra_trimgalore_args.split("\\s(?=--)") : '' - ].flatten().unique(false).join(' ').trim() - } - publishDir = [ - [ - path: { "${params.outdir}/${params.trimmer}/fastqc" }, - mode: params.publish_dir_mode, - pattern: "*.{html,zip}" - ], - [ - path: { "${params.outdir}/${params.trimmer}" }, - mode: params.publish_dir_mode, - pattern: "*.fq.gz", - enabled: params.save_trimmed - ], - [ - path: { "${params.outdir}/${params.trimmer}" }, - mode: params.publish_dir_mode, - pattern: "*.txt" - ] - ] - } - } - } - - if (params.trimmer == 'fastp') { - process { - withName: '.*:FASTQ_FASTQC_UMITOOLS_FASTP:FASTP' { - ext.args = params.extra_fastp_args ?: '' - publishDir = [ - [ - path: { "${params.outdir}/${params.trimmer}" }, - mode: params.publish_dir_mode, - pattern: "*.{json,html}" - ], - [ - path: { "${params.outdir}/${params.trimmer}/log" }, - mode: params.publish_dir_mode, - pattern: "*.log" - ], - [ - path: { "${params.outdir}/${params.trimmer}" }, - mode: params.publish_dir_mode, - pattern: "*.fastq.gz", - enabled: params.save_trimmed - ] - ] - } - } - } -} - -if (params.with_umi && !params.skip_umi_extract) { - process { - withName: 'UMITOOLS_EXTRACT' { - ext.args = [ - params.umitools_extract_method ? "--extract-method=${params.umitools_extract_method}" : '', - params.umitools_bc_pattern ? "--bc-pattern='${params.umitools_bc_pattern}'" : '', - params.umitools_bc_pattern2 ? "--bc-pattern2='${params.umitools_bc_pattern2}'" : '', - params.umitools_umi_separator ? "--umi-separator='${params.umitools_umi_separator}'" : '' - ].join(' ').trim() - publishDir = [ - [ - path: { "${params.outdir}/umitools" }, - mode: params.publish_dir_mode, - pattern: "*.log" - ], - [ - path: { "${params.outdir}/umitools" }, - mode: params.publish_dir_mode, - pattern: "*.fastq.gz", - enabled: params.save_umi_intermeds - ] - ] - } - } -} - -// -// Contaminant removal options -// - -if (!params.skip_bbsplit) { - process { - withName: 'BBMAP_BBSPLIT' { - ext.args = 'build=1 ambiguous2=all maxindel=150000' - publishDir = [ - [ - path: { "${params.outdir}/bbsplit" }, - mode: params.publish_dir_mode, - pattern: '*.txt' - ], - [ - path: { "${params.outdir}/bbsplit" }, - mode: params.publish_dir_mode, - pattern: '*.fastq.gz', - enabled: params.save_bbsplit_reads - ] - ] - } - } -} - -if (params.remove_ribo_rna) { - process { - withName: 'SORTMERNA' { - ext.args = '--num_alignments 1 -v' - publishDir = [ - [ - path: { "${params.outdir}/sortmerna" }, - mode: params.publish_dir_mode, - pattern: "*.log" - ], - [ - path: { "${params.outdir}/sortmerna" }, - mode: params.publish_dir_mode, - pattern: "*.fastq.gz", - enabled: params.save_non_ribo_reads - ] - ] - } - } -} - -// -// General alignment options -// - -if (!params.skip_alignment) { - process { - withName: 'NFCORE_RNASEQ:RNASEQ:.*:BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { - ext.prefix = { "${meta.id}.sorted.bam" } - publishDir = [ - path: { "${params.outdir}/${params.aligner}/samtools_stats" }, - mode: params.publish_dir_mode, - pattern: "*.{stats,flagstat,idxstats}" - ] - } - - withName: 'NFCORE_RNASEQ:RNASEQ:.*:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' { - ext.prefix = { "${meta.id}.sorted" } - publishDir = [ - path: { "${params.outdir}/${params.aligner}" }, - mode: params.publish_dir_mode, - pattern: "*.bam", - enabled: ( ['star_salmon','hisat2'].contains(params.aligner) && - ( params.save_align_intermeds || - ( !params.with_umi && params.skip_markduplicates ) - ) - ) || params.save_align_intermeds || params.skip_markduplicates - ] - } - - withName: 'NFCORE_RNASEQ:RNASEQ:.*:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_INDEX' { - ext.args = params.bam_csi_index ? '-c' : '' - publishDir = [ - path: { "${params.outdir}/${params.aligner}" }, - mode: params.publish_dir_mode, - pattern: "*.{bai,csi}", - enabled: ( ['star_salmon','hisat2'].contains(params.aligner) && - ( params.save_align_intermeds || - ( !params.with_umi && params.skip_markduplicates ) - ) - ) || params.save_align_intermeds || params.skip_markduplicates - ] - } - } - - if (!params.skip_markduplicates && !params.with_umi) { - process { - withName: '.*:BAM_MARKDUPLICATES_PICARD:PICARD_MARKDUPLICATES' { - ext.args = '--ASSUME_SORTED true --REMOVE_DUPLICATES false --VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp' - ext.prefix = { "${meta.id}.markdup.sorted" } - publishDir = [ - [ - path: { "${params.outdir}/${params.aligner}/picard_metrics" }, - mode: params.publish_dir_mode, - pattern: '*metrics.txt' - ], - [ - path: { "${params.outdir}/${params.aligner}" }, - mode: params.publish_dir_mode, - pattern: '*.bam' - ] - ] - } - - withName: '.*:BAM_MARKDUPLICATES_PICARD:SAMTOOLS_INDEX' { - ext.args = params.bam_csi_index ? '-c' : '' - ext.prefix = { "${meta.id}.markdup.sorted" } - publishDir = [ - path: { "${params.outdir}/${params.aligner}" }, - mode: params.publish_dir_mode, - pattern: '*.{bai,csi}' - ] - } - - withName: '.*:BAM_MARKDUPLICATES_PICARD:BAM_STATS_SAMTOOLS:.*' { - ext.prefix = { "${meta.id}.markdup.sorted.bam" } - publishDir = [ - path: { "${params.outdir}/${params.aligner}/samtools_stats" }, - mode: params.publish_dir_mode, - pattern: '*.{stats,flagstat,idxstats}' - ] - } - } - } - - if (params.with_umi && ['star_salmon','hisat2'].contains(params.aligner)) { - process { - withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME:UMITOOLS_DEDUP' { - ext.args = { [ - meta.single_end ? '' : '--unpaired-reads=discard --chimeric-pairs=discard', - params.umitools_grouping_method ? "--method='${params.umitools_grouping_method}'" : '', - params.umitools_umi_separator ? "--umi-separator='${params.umitools_umi_separator}'" : '' - ].join(' ').trim() } - ext.prefix = { "${meta.id}.umi_dedup.sorted" } - publishDir = [ - [ - path: { "${params.outdir}/${params.aligner}/umitools" }, - mode: params.publish_dir_mode, - pattern: '*.tsv' - ], - [ - path: { "${params.outdir}/${params.aligner}" }, - mode: params.publish_dir_mode, - pattern: '*.bam', - enabled: ( - params.save_align_intermeds || - params.with_umi || - params.save_umi_intermeds - ) - ] - ] - } - - withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME:SAMTOOLS_INDEX' { - ext.args = params.bam_csi_index ? '-c' : '' - ext.prefix = { "${meta.id}.umi_dedup.sorted" } - publishDir = [ - path: { "${params.outdir}/${params.aligner}" }, - mode: params.publish_dir_mode, - pattern: '*.{bai,csi}', - enabled: ( - params.save_align_intermeds || - params.with_umi || - params.save_umi_intermeds - ) - ] - } - - withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME:BAM_STATS_SAMTOOLS:.*' { - ext.prefix = { "${meta.id}.umi_dedup.sorted.bam" } - publishDir = [ - path: { "${params.outdir}/${params.aligner}/samtools_stats" }, - mode: params.publish_dir_mode, - pattern: '*.{stats,flagstat,idxstats}' - ] - } - } - } - - if (!params.skip_bigwig) { - process { - withName: 'BEDTOOLS_GENOMECOV' { - ext.args = '-split -du' - publishDir = [ - path: { "${params.outdir}/bedtools/${meta.id}" }, - enabled: false - ] - } - - withName: '.*:BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG_FORWARD:UCSC_BEDCLIP' { - ext.prefix = { "${meta.id}.clip.forward" } - publishDir = [ - path: { "${params.outdir}/${params.aligner}" }, - enabled: false - ] - } - - withName: '.*:BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG_FORWARD:UCSC_BEDGRAPHTOBIGWIG' { - ext.prefix = { "${meta.id}.forward" } - publishDir = [ - path: { "${params.outdir}/${params.aligner}/bigwig" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: '.*:BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG_REVERSE:UCSC_BEDCLIP' { - ext.prefix = { "${meta.id}.clip.reverse" } - publishDir = [ - path: { "${params.outdir}/${params.aligner}" }, - enabled: false - ] - } - - withName: '.*:BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG_REVERSE:UCSC_BEDGRAPHTOBIGWIG' { - ext.prefix = { "${meta.id}.reverse" } - publishDir = [ - path: { "${params.outdir}/${params.aligner}/bigwig" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - } - } - - if (!params.skip_stringtie) { - process { - withName: 'STRINGTIE_STRINGTIE' { - ext.args = [ - '-v', - params.stringtie_ignore_gtf ? '' : '-e' - ].join(' ').trim() - publishDir = [ - path: { "${params.outdir}/${params.aligner}/stringtie" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - } - } } -// -// STAR Salmon alignment options -// - -if (!params.skip_alignment && params.aligner == 'star_salmon') { - process { - withName: '.*:ALIGN_STAR:STAR_ALIGN|.*:ALIGN_STAR:STAR_ALIGN_IGENOMES' { - ext.args = [ - '--quantMode TranscriptomeSAM', - '--twopassMode Basic', - '--outSAMtype BAM Unsorted', - '--readFilesCommand zcat', - '--runRNGseed 0', - '--outFilterMultimapNmax 20', - '--alignSJDBoverhangMin 1', - '--outSAMattributes NH HI AS NM MD', - '--quantTranscriptomeBan Singleend', - '--outSAMstrandField intronMotif', - params.save_unaligned ? '--outReadsUnmapped Fastx' : '', - params.extra_star_align_args ? params.extra_star_align_args.split("\\s(?=--)") : '' - ].flatten().unique(false).join(' ').trim() - publishDir = [ - [ - path: { "${params.outdir}/${params.aligner}/log" }, - mode: params.publish_dir_mode, - pattern: '*.{out,tab}' - ], - [ - path: { "${params.outdir}/${params.aligner}" }, - mode: params.publish_dir_mode, - pattern: '*.bam', - enabled: params.save_align_intermeds - ], - [ - path: { "${params.outdir}/${params.aligner}/unmapped" }, - mode: params.publish_dir_mode, - pattern: '*.fastq.gz', - enabled: params.save_unaligned - ] - ] - } - - withName: '.*:QUANTIFY_STAR_SALMON:SALMON_QUANT' { - ext.args = params.extra_salmon_quant_args ?: '' - publishDir = [ - path: { "${params.outdir}/${params.aligner}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') || filename.endsWith('_meta_info.json') ? null : filename } - ] - } +params { - withName: '.*:QUANTIFY_STAR_SALMON:SALMON_TX2GENE' { - publishDir = [ - path: { "${params.outdir}/${params.aligner}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + // + // Genome preparation options + // - withName: '.*:QUANTIFY_STAR_SALMON:SALMON_TXIMPORT' { - publishDir = [ - path: { "${params.outdir}/${params.aligner}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + prepare_genome_untar_args = '--no-same-owner' + prepare_genome_gffread_args = '--keep-exon-attrs -F -T' + prepare_genome_rsem_args = '--star' + prepare_genome_bbsplit_args = 'build=1' - withName: '.*:QUANTIFY_STAR_SALMON:SALMON_SE_.*' { - publishDir = [ - path: { "${params.outdir}/${params.aligner}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - } + // + // Read subsampling and strand inferring options + // - if (params.with_umi) { - process { - withName: 'NFCORE_RNASEQ:RNASEQ:SAMTOOLS_SORT' { - ext.args = '-n' - ext.prefix = { "${meta.id}.umi_dedup.transcriptome" } - publishDir = [ - path: { "${params.outdir}/${params.aligner}" }, - mode: params.publish_dir_mode, - pattern: '*.bam', - enabled: ( - params.save_align_intermeds || - params.save_umi_intermeds - ) - ] - } + subsample_fq_args = '--record-count 1000000 --seed 1' + subsample_salmon_args = '--skipQuant' - withName: 'NFCORE_RNASEQ:RNASEQ:UMITOOLS_PREPAREFORSALMON' { - ext.prefix = { "${meta.id}.umi_dedup.transcriptome.filtered" } - publishDir = [ - [ - path: { "${params.outdir}/${params.aligner}/umitools/log" }, - mode: params.publish_dir_mode, - pattern: '*.log' - ], - [ - path: { "${params.outdir}/${params.aligner}" }, - mode: params.publish_dir_mode, - pattern: '*.bam', - enabled: ( - params.save_align_intermeds || - params.save_umi_intermeds - ) - ] - ] - } + // + // Read QC and trimming options + // - withName: 'NFCORE_RNASEQ:RNASEQ:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' { - ext.prefix = { "${meta.id}.transcriptome.sorted" } - publishDir = [ - path: { "${params.outdir}/${params.aligner}" }, - mode: params.publish_dir_mode, - pattern: '*.bam', - enabled: ( - params.save_align_intermeds || - params.save_umi_intermeds - ) - ] - } + fastqc_args = '--quiet' - withName: 'NFCORE_RNASEQ:RNASEQ:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_INDEX' { - publishDir = [ - path: { "${params.outdir}/${params.aligner}" }, - mode: params.publish_dir_mode, - pattern: '*.bai', - enabled: ( - params.save_align_intermeds || - params.save_umi_intermeds - ) - ] - } + // + // Contaminant removal options + // - withName: 'NFCORE_RNASEQ:RNASEQ:BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { - ext.prefix = { "${meta.id}.transcriptome.sorted.bam" } - publishDir = [ - path: { "${params.outdir}/${params.aligner}/samtools_stats" }, - mode: params.publish_dir_mode, - pattern: '*.{stats,flagstat,idxstats}', - enabled: ( - params.save_align_intermeds || - params.save_umi_intermeds - ) - ] - } - - withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME:UMITOOLS_DEDUP' { - ext.args = { [ - meta.single_end ? '' : '--unpaired-reads=discard --chimeric-pairs=discard', - params.umitools_grouping_method ? "--method='${params.umitools_grouping_method}'" : '', - params.umitools_umi_separator ? "--umi-separator='${params.umitools_umi_separator}'" : '' - ].join(' ').trim() } - ext.prefix = { "${meta.id}.umi_dedup.transcriptome.sorted" } - publishDir = [ - [ - path: { "${params.outdir}/${params.aligner}/umitools" }, - mode: params.publish_dir_mode, - pattern: '*.tsv' - ], - [ - path: { "${params.outdir}/${params.aligner}" }, - mode: params.publish_dir_mode, - pattern: '*.bam', - enabled: ( - params.save_align_intermeds || - params.save_umi_intermeds - ) - ] - ] - } - - withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME:SAMTOOLS_INDEX' { - publishDir = [ - path: { "${params.outdir}/${params.aligner}" }, - mode: params.publish_dir_mode, - pattern: '*.bai', - enabled: ( - params.save_align_intermeds || - params.save_umi_intermeds - ) - ] - } - - withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME:BAM_STATS_SAMTOOLS:.*' { - ext.prefix = { "${meta.id}.umi_dedup.transcriptome.sorted.bam" } - publishDir = [ - path: { "${params.outdir}/${params.aligner}/samtools_stats" }, - mode: params.publish_dir_mode, - pattern: '*.{stats,flagstat,idxstats}' - ] - } - } - } - - if (!params.skip_qc & !params.skip_deseq2_qc) { - process { - withName: 'DESEQ2_QC_STAR_SALMON' { - ext.args = [ - "--id_col 1", - "--sample_suffix ''", - "--outprefix deseq2", - "--count_col 3", - params.deseq2_vst ? '--vst TRUE' : '' - ].join(' ').trim() - ext.args2 = 'star_salmon' - publishDir = [ - path: { "${params.outdir}/${params.aligner}/deseq2_qc" }, - mode: params.publish_dir_mode, - pattern: "*{RData,pca.vals.txt,plots.pdf,sample.dists.txt,size_factors,log}" - ] - } - } - } -} + bbsplit_args = 'build=1 ambiguous2=all maxindel=150000' + sortmerna_args = '--num_alignments 1 -v' -// -// STAR RSEM alignment options -// + // + // General alignment options + // -if (!params.skip_alignment && params.aligner == 'star_rsem') { - process { - withName: '.*:QUANTIFY_RSEM:RSEM_CALCULATEEXPRESSION' { - ext.args = [ - '--star', - '--star-output-genome-bam', - '--star-gzipped-read-file', - '--estimate-rspd', - '--seed 1' - ].join(' ').trim() - publishDir = [ - [ - path: { "${params.outdir}/${params.aligner}" }, - mode: params.publish_dir_mode, - pattern: "*.{stat,results}" - ], - [ - path: { "${params.outdir}/${params.aligner}" }, - mode: params.publish_dir_mode, - pattern: "*.bam", - enabled: params.save_align_intermeds - ], - [ - path: { "${params.outdir}/${params.aligner}/log" }, - mode: params.publish_dir_mode, - pattern: "*.log" - ] - ] - } + picard_args = '--ASSUME_SORTED true --REMOVE_DUPLICATES false --VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp' + bedtools_args = '-split -du' - withName: '.*:QUANTIFY_RSEM:RSEM_MERGE_COUNTS' { - publishDir = [ - path: { "${params.outdir}/${params.aligner}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - } + // + // STAR Salmon alignment options + // - if (!params.skip_qc & !params.skip_deseq2_qc) { - process { - withName: 'DESEQ2_QC_RSEM' { - ext.args = [ - "--id_col 1", - "--sample_suffix ''", - "--outprefix deseq2", - "--count_col 3", - params.deseq2_vst ? '--vst TRUE' : '' - ].join(' ').trim() - ext.args2 = 'star_rsem' - publishDir = [ - path: { "${params.outdir}/${params.aligner}/deseq2_qc" }, - mode: params.publish_dir_mode, - pattern: "*{RData,pca.vals.txt,plots.pdf,sample.dists.txt,size_factors,log}" - ] - } - } - } -} - -// -// HISAT2 alignment options -// - -if (!params.skip_alignment && params.aligner == 'hisat2') { - process { - withName: '.*:FASTQ_ALIGN_HISAT2:HISAT2_ALIGN' { - ext.args = '--met-stderr --new-summary --dta' - publishDir = [ - [ - path: { "${params.outdir}/${params.aligner}/log" }, - mode: params.publish_dir_mode, - pattern: '*.log' - ], - [ - path: { "${params.outdir}/${params.aligner}" }, - mode: params.publish_dir_mode, - pattern: '*.bam', - enabled: params.save_align_intermeds - ], - [ - path: { "${params.outdir}/${params.aligner}/unmapped" }, - mode: params.publish_dir_mode, - pattern: '*.fastq.gz', - enabled: params.save_unaligned - ] - ] - } - } -} - -// -// Post-alignment QC options -// - -if (!params.skip_alignment && !params.skip_qc) { - if (!params.skip_preseq) { - process { - withName: 'PRESEQ_LCEXTRAP' { - ext.args = '-verbose -bam -seed 1 -seg_len 100000000' - publishDir = [ - [ - path: { "${params.outdir}/${params.aligner}/preseq" }, - mode: params.publish_dir_mode, - pattern: "*.txt" - ], - [ - path: { "${params.outdir}/${params.aligner}/preseq/log" }, - mode: params.publish_dir_mode, - pattern: "*.log" - ] - ] - } - } - } - - if (!params.skip_qualimap) { - process { - withName: 'QUALIMAP_RNASEQ' { - publishDir = [ - path: { "${params.outdir}/${params.aligner}/qualimap" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - } - } - - if (!params.skip_dupradar) { - process { - withName: 'DUPRADAR' { - publishDir = [ - [ - path: { "${params.outdir}/${params.aligner}/dupradar/scatter_plot" }, - mode: params.publish_dir_mode, - pattern: "*Dens.pdf" - ], - [ - path: { "${params.outdir}/${params.aligner}/dupradar/box_plot" }, - mode: params.publish_dir_mode, - pattern: "*Boxplot.pdf" - ], - [ - path: { "${params.outdir}/${params.aligner}/dupradar/histogram" }, - mode: params.publish_dir_mode, - pattern: "*Hist.pdf" - ], - [ - path: { "${params.outdir}/${params.aligner}/dupradar/gene_data" }, - mode: params.publish_dir_mode, - pattern: "*Matrix.txt" - ], - [ - path: { "${params.outdir}/${params.aligner}/dupradar/intercepts_slope" }, - mode: params.publish_dir_mode, - pattern: "*slope.txt" - ] - ] - } - } - } - - if (!params.skip_biotype_qc && params.featurecounts_group_type) { - process { - withName: 'SUBREAD_FEATURECOUNTS' { - ext.args = [ - '-B -C', - params.gencode ? "-g gene_type" : "-g $params.featurecounts_group_type", - "-t $params.featurecounts_feature_type" - ].join(' ').trim() - publishDir = [ - path: { "${params.outdir}/${params.aligner}/featurecounts" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: 'MULTIQC_CUSTOM_BIOTYPE' { - publishDir = [ - path: { "${params.outdir}/${params.aligner}/featurecounts" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - } - } - - if (!params.skip_rseqc && 'bam_stat' in rseqc_modules) { - process { - withName: '.*:BAM_RSEQC:RSEQC_BAMSTAT' { - publishDir = [ - path: { "${params.outdir}/${params.aligner}/rseqc/bam_stat" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - } - } - - if (!params.skip_rseqc && 'infer_experiment' in rseqc_modules) { - process { - withName: '.*:BAM_RSEQC:RSEQC_INFEREXPERIMENT' { - publishDir = [ - path: { "${params.outdir}/${params.aligner}/rseqc/infer_experiment" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - } - } - - if (!params.skip_rseqc && 'junction_annotation' in rseqc_modules) { - process { - withName: '.*:BAM_RSEQC:RSEQC_JUNCTIONANNOTATION' { - publishDir = [ - [ - path: { "${params.outdir}/${params.aligner}/rseqc/junction_annotation/pdf" }, - mode: params.publish_dir_mode, - pattern: '*.pdf' - ], - [ - path: { "${params.outdir}/${params.aligner}/rseqc/junction_annotation/bed" }, - mode: params.publish_dir_mode, - pattern: '*.bed' - ], - [ - path: { "${params.outdir}/${params.aligner}/rseqc/junction_annotation/xls" }, - mode: params.publish_dir_mode, - pattern: '*.xls' - ], - [ - path: { "${params.outdir}/${params.aligner}/rseqc/junction_annotation/log" }, - mode: params.publish_dir_mode, - pattern: '*.log' - ], - [ - path: { "${params.outdir}/${params.aligner}/rseqc/junction_annotation/rscript" }, - mode: params.publish_dir_mode, - pattern: '*.r' - ] - ] - } - } - } - - if (!params.skip_rseqc && 'junction_saturation' in rseqc_modules) { - process { - withName: '.*:BAM_RSEQC:RSEQC_JUNCTIONSATURATION' { - publishDir = [ - [ - path: { "${params.outdir}/${params.aligner}/rseqc/junction_saturation/pdf" }, - mode: params.publish_dir_mode, - pattern: '*.pdf' - ], - [ - path: { "${params.outdir}/${params.aligner}/rseqc/junction_saturation/rscript" }, - mode: params.publish_dir_mode, - pattern: '*.r' - ] - ] - } - } - } - - if (!params.skip_rseqc && 'read_duplication' in rseqc_modules) { - process { - withName: '.*:BAM_RSEQC:RSEQC_READDUPLICATION' { - publishDir = [ - [ - path: { "${params.outdir}/${params.aligner}/rseqc/read_duplication/pdf" }, - mode: params.publish_dir_mode, - pattern: '*.pdf' - ], - [ - path: { "${params.outdir}/${params.aligner}/rseqc/read_duplication/xls" }, - mode: params.publish_dir_mode, - pattern: '*.xls' - ], - [ - path: { "${params.outdir}/${params.aligner}/rseqc/read_duplication/rscript" }, - mode: params.publish_dir_mode, - pattern: '*.r' - ] - ] - } - } - } - - if (!params.skip_rseqc && 'read_distribution' in rseqc_modules && !params.bam_csi_index) { - process { - withName: '.*:BAM_RSEQC:RSEQC_READDISTRIBUTION' { - publishDir = [ - path: { "${params.outdir}/${params.aligner}/rseqc/read_distribution" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - } - } - - if (!params.skip_rseqc && 'inner_distance' in rseqc_modules && !params.bam_csi_index) { - process { - withName: '.*:BAM_RSEQC:RSEQC_INNERDISTANCE' { - publishDir = [ - [ - path: { "${params.outdir}/${params.aligner}/rseqc/inner_distance/txt" }, - mode: params.publish_dir_mode, - pattern: '*.txt', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ], - [ - path: { "${params.outdir}/${params.aligner}/rseqc/inner_distance/pdf" }, - mode: params.publish_dir_mode, - pattern: '*.pdf' - ], - [ - path: { "${params.outdir}/${params.aligner}/rseqc/inner_distance/rscript" }, - mode: params.publish_dir_mode, - pattern: '*.r' - ] - ] - } - } - } - - if (!params.skip_rseqc && 'tin' in rseqc_modules && !params.bam_csi_index) { - process { - withName: '.*:BAM_RSEQC:RSEQC_TIN' { - publishDir = [ - path: { "${params.outdir}/${params.aligner}/rseqc/tin" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - } - } -} - -if (!params.skip_multiqc) { - process { - withName: 'MULTIQC' { - ext.args = params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' - publishDir = [ - path: { [ - "${params.outdir}/multiqc", - params.skip_alignment? '' : "/${params.aligner}" - ].join('') }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - } -} + samtools_sort_args = '-n' -// -// Salmon pseudo-alignment options -// + // + // STAR RSEM alignment options + // -if (!params.skip_pseudo_alignment && params.pseudo_aligner == 'salmon') { - process { - withName: '.*:QUANTIFY_SALMON:SALMON_QUANT' { - ext.args = params.extra_salmon_quant_args ?: '' - publishDir = [ - path: { "${params.outdir}/${params.pseudo_aligner}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') || filename.endsWith('_meta_info.json') ? null : filename } - ] - } + // + // HISAT2 alignment options + // - withName: '.*:QUANTIFY_SALMON:SALMON_TX2GENE' { - publishDir = [ - path: { "${params.outdir}/${params.pseudo_aligner}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + // + // Post-alignment QC options + // - withName: '.*:QUANTIFY_SALMON:SALMON_TXIMPORT' { - publishDir = [ - path: { "${params.outdir}/${params.pseudo_aligner}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + preseq_lcextrap_args = '-verbose -bam -seed 1 -seg_len 100000000' - withName: '.*:QUANTIFY_SALMON:SALMON_SE_.*' { - publishDir = [ - path: { "${params.outdir}/${params.pseudo_aligner}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - } + // + // Salmon pseudo-alignment options + // - if (!params.skip_qc & !params.skip_deseq2_qc) { - process { - withName: 'DESEQ2_QC_SALMON' { - ext.args = [ - "--id_col 1", - "--sample_suffix ''", - "--outprefix deseq2", - "--count_col 3", - params.deseq2_vst ? '--vst TRUE' : '' - ].join(' ').trim() - ext.args2 = 'salmon' - publishDir = [ - path: { "${params.outdir}/${params.pseudo_aligner}/deseq2_qc" }, - mode: params.publish_dir_mode, - pattern: "*{RData,pca.vals.txt,plots.pdf,sample.dists.txt,size_factors,log}" - ] - } - } - } } diff --git a/lib/Utils.groovy b/lib/Utils.groovy index 8d030f4e8..3c3af19d0 100644 --- a/lib/Utils.groovy +++ b/lib/Utils.groovy @@ -44,4 +44,11 @@ class Utils { "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" } } + + public static Map publishDir(def params, String subPath, Map opts=[:]) { + final defaults = [ + mode: params.publish_dir_mode + ] + return defaults + [path: "${params.outdir}/${subPath}"] + opts + } } diff --git a/subworkflows/local/align_star.nf b/subworkflows/local/align_star.nf index ffa027257..f9fab4b4d 100644 --- a/subworkflows/local/align_star.nf +++ b/subworkflows/local/align_star.nf @@ -32,7 +32,42 @@ workflow ALIGN_STAR { ch_bam_transcript = Channel.empty() ch_fastq = Channel.empty() ch_tab = Channel.empty() + align_ext_args = [ + '--quantMode TranscriptomeSAM', + '--twopassMode Basic', + '--outSAMtype BAM Unsorted', + '--readFilesCommand zcat', + '--runRNGseed 0', + '--outFilterMultimapNmax 20', + '--alignSJDBoverhangMin 1', + '--outSAMattributes NH HI AS NM MD', + '--quantTranscriptomeBan Singleend', + '--outSAMstrandField intronMotif', + params.save_unaligned ? '--outReadsUnmapped Fastx' : '', + params.extra_star_align_args ? params.extra_star_align_args.split("\\s(?=--)") : '' + ].flatten().unique(false).join(' ').trim() + align_publish_dir = [ + [ + path: "${params.outdir}/${params.aligner}/log", + mode: params.publish_dir_mode, + pattern: '*.{out,tab}' + ], + [ + path: "${params.outdir}/${params.aligner}", + mode: params.publish_dir_mode, + pattern: '*.bam', + enabled: params.save_align_intermeds + ], + [ + path: "${params.outdir}/${params.aligner}/unmapped", + mode: params.publish_dir_mode, + pattern: '*.fastq.gz', + enabled: params.save_unaligned + ] + ] if (is_aws_igenome) { + STAR_ALIGN_IGENOMES.config.ext.args = align_ext_args + STAR_ALIGN_IGENOMES.config.publishDir = align_publish_dir STAR_ALIGN_IGENOMES ( reads, index, gtf, star_ignore_sjdbgtf, seq_platform, seq_center ) ch_orig_bam = STAR_ALIGN_IGENOMES.out.bam ch_log_final = STAR_ALIGN_IGENOMES.out.log_final @@ -44,6 +79,8 @@ workflow ALIGN_STAR { ch_tab = STAR_ALIGN_IGENOMES.out.tab ch_versions = ch_versions.mix(STAR_ALIGN_IGENOMES.out.versions.first()) } else { + STAR_ALIGN.config.ext.args = align_ext_args + STAR_ALIGN.config.publishDir = align_publish_dir STAR_ALIGN ( reads, index, gtf, star_ignore_sjdbgtf, seq_platform, seq_center ) ch_orig_bam = STAR_ALIGN.out.bam ch_log_final = STAR_ALIGN.out.log_final @@ -59,7 +96,40 @@ workflow ALIGN_STAR { // // Sort, index BAM file and run samtools stats, flagstat and idxstats // - BAM_SORT_STATS_SAMTOOLS ( ch_orig_bam, fasta ) + sort_ext_prefix = { "${meta.id}.sorted" } + sort_publish_dir = [ + path: "${params.outdir}/${params.aligner}", + mode: params.publish_dir_mode, + pattern: "*.bam", + enabled: ( !params.with_umi && params.skip_markduplicates ) || + params.save_align_intermeds || + params.skip_markduplicates + ] + index_ext_args = params.bam_csi_index ? '-c' : '' + index_publish_dir = [ + path: "${params.outdir}/${params.aligner}", + mode: params.publish_dir_mode, + pattern: "*.{bai,csi}", + enabled: ( !params.with_umi && params.skip_markduplicates ) || + params.save_align_intermeds || + params.skip_markduplicates + ] + stats_ext_prefix = { "${meta.id}.sorted.bam" } + stats_publish_dir = [ + path: "${params.outdir}/${params.aligner}/samtools_stats", + mode: params.publish_dir_mode, + pattern: "*.{stats,flagstat,idxstats}" + ] + BAM_SORT_STATS_SAMTOOLS ( + ch_orig_bam, + fasta, + sort_ext_prefix, + sort_publish_dir, + index_ext_args, + index_publish_dir, + stats_ext_prefix, + stats_publish_dir + ) ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) emit: diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 172d11ea5..dac198701 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -9,6 +9,11 @@ workflow INPUT_CHECK { samplesheet // file: /path/to/samplesheet.csv main: + SAMPLESHEET_CHECK.config.publishDir = [ + path: "${params.outdir}/pipeline_info", + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] SAMPLESHEET_CHECK ( samplesheet ) .csv .splitCsv ( header:true, sep:',' ) diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf index b83126cd7..18a1c0c14 100644 --- a/subworkflows/local/prepare_genome.nf +++ b/subworkflows/local/prepare_genome.nf @@ -55,10 +55,24 @@ workflow PREPARE_GENOME { ch_versions = Channel.empty() + genome_publish_dir = [ + path: "${params.outdir}/genome", + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + genome_index_publish_dir = [ + path: "${params.outdir}/genome/index", + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + // // Uncompress genome fasta file if required // if (fasta.endsWith('.gz')) { + GUNZIP_FASTA.config.publishDir = genome_publish_dir ch_fasta = GUNZIP_FASTA ( [ [:], fasta ] ).gunzip.map { it[1] } ch_versions = ch_versions.mix(GUNZIP_FASTA.out.versions) } else { @@ -70,6 +84,7 @@ workflow PREPARE_GENOME { // if (gtf) { if (gtf.endsWith('.gz')) { + GUNZIP_GTF.config.publishDir = genome_publish_dir ch_gtf = GUNZIP_GTF ( [ [:], gtf ] ).gunzip.map { it[1] } ch_versions = ch_versions.mix(GUNZIP_GTF.out.versions) } else { @@ -77,11 +92,15 @@ workflow PREPARE_GENOME { } } else if (gff) { if (gff.endsWith('.gz')) { + GUNZIP_GFF.config.publishDir = genome_publish_dir ch_gff = GUNZIP_GFF ( [ [:], gff ] ).gunzip.map { it[1] } ch_versions = ch_versions.mix(GUNZIP_GFF.out.versions) } else { ch_gff = Channel.value(file(gff)) } + + GFFREAD.config.ext.args = params.prepare_genome_gffread_args + GFFREAD.config.publishDir = genome_publish_dir ch_gtf = GFFREAD ( ch_gff ).gtf ch_versions = ch_versions.mix(GFFREAD.out.versions) } @@ -91,11 +110,14 @@ workflow PREPARE_GENOME { // if (additional_fasta) { if (additional_fasta.endsWith('.gz')) { + GUNZIP_ADDITIONAL_FASTA.config.publishDir = genome_publish_dir ch_add_fasta = GUNZIP_ADDITIONAL_FASTA ( [ [:], additional_fasta ] ).gunzip.map { it[1] } ch_versions = ch_versions.mix(GUNZIP_ADDITIONAL_FASTA.out.versions) } else { ch_add_fasta = Channel.value(file(additional_fasta)) } + + CAT_ADDITIONAL_FASTA.config.publishDir = genome_publish_dir CAT_ADDITIONAL_FASTA ( ch_fasta, ch_gtf, ch_add_fasta, biotype ) ch_fasta = CAT_ADDITIONAL_FASTA.out.fasta ch_gtf = CAT_ADDITIONAL_FASTA.out.gtf @@ -107,12 +129,14 @@ workflow PREPARE_GENOME { // if (gene_bed) { if (gene_bed.endsWith('.gz')) { + GUNZIP_GENE_BED.config.publishDir = genome_publish_dir ch_gene_bed = GUNZIP_GENE_BED ( [ [:], gene_bed ] ).gunzip.map { it[1] } ch_versions = ch_versions.mix(GUNZIP_GENE_BED.out.versions) } else { ch_gene_bed = Channel.value(file(gene_bed)) } } else { + GTF2BED.config.publishDir = genome_publish_dir ch_gene_bed = GTF2BED ( ch_gtf ).bed ch_versions = ch_versions.mix(GTF2BED.out.versions) } @@ -122,18 +146,23 @@ workflow PREPARE_GENOME { // if (transcript_fasta) { if (transcript_fasta.endsWith('.gz')) { + GUNZIP_TRANSCRIPT_FASTA.config.publishDir = genome_publish_dir ch_transcript_fasta = GUNZIP_TRANSCRIPT_FASTA ( [ [:], transcript_fasta ] ).gunzip.map { it[1] } ch_versions = ch_versions.mix(GUNZIP_TRANSCRIPT_FASTA.out.versions) } else { ch_transcript_fasta = Channel.value(file(transcript_fasta)) } if (gencode) { + PREPROCESS_TRANSCRIPTS_FASTA_GENCODE.config.publishDir = genome_publish_dir PREPROCESS_TRANSCRIPTS_FASTA_GENCODE ( ch_transcript_fasta ) ch_transcript_fasta = PREPROCESS_TRANSCRIPTS_FASTA_GENCODE.out.fasta ch_versions = ch_versions.mix(PREPROCESS_TRANSCRIPTS_FASTA_GENCODE.out.versions) } } else { + GTF_GENE_FILTER.config.publishDir = genome_publish_dir ch_filter_gtf = GTF_GENE_FILTER ( ch_fasta, ch_gtf ).gtf + + MAKE_TRANSCRIPTS_FASTA.config.publishDir = genome_publish_dir ch_transcript_fasta = MAKE_TRANSCRIPTS_FASTA ( ch_fasta, ch_filter_gtf ).transcript_fasta ch_versions = ch_versions.mix(GTF_GENE_FILTER.out.versions) ch_versions = ch_versions.mix(MAKE_TRANSCRIPTS_FASTA.out.versions) @@ -142,6 +171,7 @@ workflow PREPARE_GENOME { // // Create chromosome sizes file // + CUSTOM_GETCHROMSIZES.config.publishDir = genome_publish_dir CUSTOM_GETCHROMSIZES ( ch_fasta.map { [ [:], it ] } ) ch_fai = CUSTOM_GETCHROMSIZES.out.fai.map { it[1] } ch_chrom_sizes = CUSTOM_GETCHROMSIZES.out.sizes.map { it[1] } @@ -154,6 +184,8 @@ workflow PREPARE_GENOME { if ('bbsplit' in prepare_tool_indices) { if (bbsplit_index) { if (bbsplit_index.endsWith('.tar.gz')) { + UNTAR_BBSPLIT_INDEX.config.ext.args2 = params.prepare_genome_untar_args + UNTAR_BBSPLIT_INDEX.config.publishDir = genome_index_publish_dir ch_bbsplit_index = UNTAR_BBSPLIT_INDEX ( [ [:], bbsplit_index ] ).untar.map { it[1] } ch_versions = ch_versions.mix(UNTAR_BBSPLIT_INDEX.out.versions) } else { @@ -169,6 +201,8 @@ workflow PREPARE_GENOME { .collect { [ it ] } // Collect entries as a list to pass as "tuple val(short_names), path(path_to_fasta)" to module .set { ch_bbsplit_fasta_list } + BBMAP_BBSPLIT.config.ext.args = params.prepare_genome_bbsplit_args + BBMAP_BBSPLIT.config.publishDir = genome_index_publish_dir ch_bbsplit_index = BBMAP_BBSPLIT ( [ [:], [] ], [], ch_fasta, ch_bbsplit_fasta_list, true ).index ch_versions = ch_versions.mix(BBMAP_BBSPLIT.out.versions) } @@ -181,6 +215,8 @@ workflow PREPARE_GENOME { if ('star_salmon' in prepare_tool_indices) { if (star_index) { if (star_index.endsWith('.tar.gz')) { + UNTAR_STAR_INDEX.config.ext.args2 = params.prepare_genome_untar_args + UNTAR_STAR_INDEX.config.publishDir = genome_index_publish_dir ch_star_index = UNTAR_STAR_INDEX ( [ [:], star_index ] ).untar.map { it[1] } ch_versions = ch_versions.mix(UNTAR_STAR_INDEX.out.versions) } else { @@ -188,9 +224,11 @@ workflow PREPARE_GENOME { } } else { if (is_aws_igenome) { + STAR_GENOMEGENERATE_IGENOMES.config.publishDir = genome_index_publish_dir ch_star_index = STAR_GENOMEGENERATE_IGENOMES ( ch_fasta, ch_gtf ).index ch_versions = ch_versions.mix(STAR_GENOMEGENERATE_IGENOMES.out.versions) } else { + STAR_GENOMEGENERATE.config.publishDir = genome_index_publish_dir ch_star_index = STAR_GENOMEGENERATE ( ch_fasta, ch_gtf ).index ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions) } @@ -204,12 +242,16 @@ workflow PREPARE_GENOME { if ('star_rsem' in prepare_tool_indices) { if (rsem_index) { if (rsem_index.endsWith('.tar.gz')) { + UNTAR_RSEM_INDEX.config.ext.args2 = params.prepare_genome_untar_args + UNTAR_RSEM_INDEX.config.publishDir = genome_index_publish_dir ch_rsem_index = UNTAR_RSEM_INDEX ( [ [:], rsem_index ] ).untar.map { it[1] } ch_versions = ch_versions.mix(UNTAR_RSEM_INDEX.out.versions) } else { ch_rsem_index = Channel.value(file(rsem_index)) } } else { + RSEM_PREPAREREFERENCE_GENOME.config.ext.args = params.prepare_genome_rsem_args + RSEM_PREPAREREFERENCE_GENOME.config.publishDir = genome_index_publish_dir ch_rsem_index = RSEM_PREPAREREFERENCE_GENOME ( ch_fasta, ch_gtf ).index ch_versions = ch_versions.mix(RSEM_PREPAREREFERENCE_GENOME.out.versions) } @@ -222,6 +264,7 @@ workflow PREPARE_GENOME { ch_hisat2_index = Channel.empty() if ('hisat2' in prepare_tool_indices) { if (!splicesites) { + HISAT2_EXTRACTSPLICESITES.config.publishDir = genome_index_publish_dir ch_splicesites = HISAT2_EXTRACTSPLICESITES ( ch_gtf.map { [ [:], it ] } ).txt.map { it[1] } ch_versions = ch_versions.mix(HISAT2_EXTRACTSPLICESITES.out.versions) } else { @@ -229,12 +272,15 @@ workflow PREPARE_GENOME { } if (hisat2_index) { if (hisat2_index.endsWith('.tar.gz')) { + UNTAR_HISAT2_INDEX.config.ext.args2 = params.prepare_genome_untar_args + UNTAR_HISAT2_INDEX.config.publishDir = genome_index_publish_dir ch_hisat2_index = UNTAR_HISAT2_INDEX ( [ [:], hisat2_index ] ).untar.map { it[1] } ch_versions = ch_versions.mix(UNTAR_HISAT2_INDEX.out.versions) } else { ch_hisat2_index = Channel.value(file(hisat2_index)) } } else { + HISAT2_BUILD.config.publishDir = genome_index_publish_dir ch_hisat2_index = HISAT2_BUILD ( ch_fasta.map { [ [:], it ] }, ch_gtf.map { [ [:], it ] }, ch_splicesites.map { [ [:], it ] } ).index.map { it[1] } ch_versions = ch_versions.mix(HISAT2_BUILD.out.versions) } @@ -246,6 +292,8 @@ workflow PREPARE_GENOME { ch_salmon_index = Channel.empty() if (salmon_index) { if (salmon_index.endsWith('.tar.gz')) { + UNTAR_SALMON_INDEX.config.ext.args2 = params.prepare_genome_untar_args + UNTAR_SALMON_INDEX.config.publishDir = genome_index_publish_dir ch_salmon_index = UNTAR_SALMON_INDEX ( [ [:], salmon_index ] ).untar.map { it[1] } ch_versions = ch_versions.mix(UNTAR_SALMON_INDEX.out.versions) } else { @@ -253,6 +301,8 @@ workflow PREPARE_GENOME { } } else { if ('salmon' in prepare_tool_indices) { + SALMON_INDEX.config.ext.args = params.gencode ? '--gencode' : '' + SALMON_INDEX.config.publishDir = genome_index_publish_dir ch_salmon_index = SALMON_INDEX ( ch_fasta, ch_transcript_fasta ).index ch_versions = ch_versions.mix(SALMON_INDEX.out.versions) } diff --git a/subworkflows/local/quantify_rsem.nf b/subworkflows/local/quantify_rsem.nf index 666af33b1..53a2fe8c6 100644 --- a/subworkflows/local/quantify_rsem.nf +++ b/subworkflows/local/quantify_rsem.nf @@ -19,18 +19,79 @@ workflow QUANTIFY_RSEM { // // Quantify reads with RSEM // + RSEM_CALCULATEEXPRESSION.config.ext.args = [ + '--star', + '--star-output-genome-bam', + '--star-gzipped-read-file', + '--estimate-rspd', + '--seed 1' + ].join(' ').trim() + RSEM_CALCULATEEXPRESSION.config.publishDir = [ + [ + path: "${params.outdir}/${params.aligner}", + mode: params.publish_dir_mode, + pattern: "*.{stat,results}" + ], + [ + path: "${params.outdir}/${params.aligner}", + mode: params.publish_dir_mode, + pattern: "*.bam", + enabled: params.save_align_intermeds + ], + [ + path: "${params.outdir}/${params.aligner}/log", + mode: params.publish_dir_mode, + pattern: "*.log" + ] + ] RSEM_CALCULATEEXPRESSION ( reads, index ) ch_versions = ch_versions.mix(RSEM_CALCULATEEXPRESSION.out.versions.first()) // // Sort, index BAM file and run samtools stats, flagstat and idxstats // - BAM_SORT_STATS_SAMTOOLS ( RSEM_CALCULATEEXPRESSION.out.bam_star, fasta ) + sort_ext_prefix = { "${meta.id}.sorted" } + sort_publish_dir = [ + path: "${params.outdir}/${params.aligner}", + mode: params.publish_dir_mode, + pattern: "*.bam", + enabled: params.save_align_intermeds || + params.skip_markduplicates + ] + index_ext_args = params.bam_csi_index ? '-c' : '' + index_publish_dir = [ + path: "${params.outdir}/${params.aligner}", + mode: params.publish_dir_mode, + pattern: "*.{bai,csi}", + enabled: params.save_align_intermeds || + params.skip_markduplicates + ] + stats_ext_prefix = { "${meta.id}.sorted.bam" } + stats_publish_dir = [ + path: "${params.outdir}/${params.aligner}/samtools_stats", + mode: params.publish_dir_mode, + pattern: "*.{stats,flagstat,idxstats}" + ] + BAM_SORT_STATS_SAMTOOLS ( + RSEM_CALCULATEEXPRESSION.out.bam_star, + fasta, + sort_ext_prefix, + sort_publish_dir, + index_ext_args, + index_publish_dir, + stats_ext_prefix, + stats_publish_dir + ) ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) // // Merge counts across samples // + RSEM_MERGE_COUNTS.config.publishDir = [ + path: "${params.outdir}/${params.aligner}", + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] RSEM_MERGE_COUNTS ( RSEM_CALCULATEEXPRESSION.out.counts_gene.collect{it[1]}, // [meta, counts]: Collect the second element (counts files) in the channel across all samples RSEM_CALCULATEEXPRESSION.out.counts_transcript.collect{it[1]} diff --git a/subworkflows/local/quantify_salmon.nf b/subworkflows/local/quantify_salmon.nf index 4ab996b2c..f156b2b1b 100644 --- a/subworkflows/local/quantify_salmon.nf +++ b/subworkflows/local/quantify_salmon.nf @@ -19,23 +19,39 @@ workflow QUANTIFY_SALMON { gtf // channel: /path/to/genome.gtf alignment_mode // bool: Run Salmon in alignment mode lib_type // val: String to override salmon library type + publish_dir_path main: ch_versions = Channel.empty() + publish_dir = [ + path: publish_dir_path, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + // // Quantify and merge counts across samples // + SALMON_QUANT.config.ext.args = params.extra_salmon_quant_args ?: '' + SALMON_QUANT.config.publishDir = [ + path: publish_dir_path, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') || filename.endsWith('_meta_info.json') ? null : filename } + ] SALMON_QUANT ( reads, index, gtf, transcript_fasta, alignment_mode, lib_type ) ch_versions = ch_versions.mix(SALMON_QUANT.out.versions.first()) + SALMON_TX2GENE.config.publishDir = publish_dir SALMON_TX2GENE ( SALMON_QUANT.out.results.collect{it[1]}, gtf ) ch_versions = ch_versions.mix(SALMON_TX2GENE.out.versions) + SALMON_TXIMPORT.config.publishDir = publish_dir SALMON_TXIMPORT ( SALMON_QUANT.out.results.collect{it[1]}, SALMON_TX2GENE.out.tsv.collect() ) ch_versions = ch_versions.mix(SALMON_TXIMPORT.out.versions) + SALMON_SE_GENE.config.publishDir = publish_dir SALMON_SE_GENE ( SALMON_TXIMPORT.out.counts_gene, SALMON_TXIMPORT.out.tpm_gene, @@ -43,18 +59,21 @@ workflow QUANTIFY_SALMON { ) ch_versions = ch_versions.mix(SALMON_SE_GENE.out.versions) + SALMON_SE_GENE_LENGTH_SCALED.config.publishDir = publish_dir SALMON_SE_GENE_LENGTH_SCALED ( SALMON_TXIMPORT.out.counts_gene_length_scaled, SALMON_TXIMPORT.out.tpm_gene, SALMON_TX2GENE.out.tsv.collect() ) + SALMON_SE_GENE_SCALED.config.publishDir = publish_dir SALMON_SE_GENE_SCALED ( SALMON_TXIMPORT.out.counts_gene_scaled, SALMON_TXIMPORT.out.tpm_gene, SALMON_TX2GENE.out.tsv.collect() ) + SALMON_SE_TRANSCRIPT.config.publishDir = publish_dir SALMON_SE_TRANSCRIPT ( SALMON_TXIMPORT.out.counts_transcript, SALMON_TXIMPORT.out.tpm_transcript, diff --git a/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/main.nf b/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/main.nf index 7c07084ff..33e569046 100644 --- a/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/main.nf +++ b/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/main.nf @@ -10,6 +10,11 @@ workflow BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS { take: ch_bam_bai // channel: [ val(meta), path(bam), path(bai/csi) ] val_get_dedup_stats // boolean: true/false + dedup_ext_prefix + index_ext_args + index_ext_prefix + index_publish_dir + stats_ext_prefix main: @@ -18,12 +23,37 @@ workflow BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS { // // UMI-tools dedup // + UMITOOLS_DEDUP.config.ext.args = { [ + meta.single_end ? '' : '--unpaired-reads=discard --chimeric-pairs=discard', + params.umitools_grouping_method ? "--method='${params.umitools_grouping_method}'" : '', + params.umitools_umi_separator ? "--umi-separator='${params.umitools_umi_separator}'" : '' + ].join(' ').trim() } + UMITOOLS_DEDUP.config.ext.prefix = dedup_ext_prefix + UMITOOLS_DEDUP.config.publishDir = [ + [ + path: "${params.outdir}/${params.aligner}/umitools", + mode: params.publish_dir_mode, + pattern: '*.tsv' + ], + [ + path: "${params.outdir}/${params.aligner}", + mode: params.publish_dir_mode, + pattern: '*.bam', + enabled: ( + params.save_align_intermeds || + params.save_umi_intermeds + ) + ] + ] UMITOOLS_DEDUP ( ch_bam_bai, val_get_dedup_stats ) ch_versions = ch_versions.mix(UMITOOLS_DEDUP.out.versions.first()) // // Index BAM file and run samtools stats, flagstat and idxstats // + SAMTOOLS_INDEX.config.ext.args = index_ext_args + SAMTOOLS_INDEX.config.ext.prefix = index_ext_prefix + SAMTOOLS_INDEX.config.publishDir = index_publish_dir SAMTOOLS_INDEX ( UMITOOLS_DEDUP.out.bam ) ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) @@ -39,6 +69,12 @@ workflow BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS { } } + BAM_STATS_SAMTOOLS.config.ext.prefix = stats_ext_prefix + BAM_STATS_SAMTOOLS.config.publishDir = [ + path: "${params.outdir}/${params.aligner}/samtools_stats", + mode: params.publish_dir_mode, + pattern: '*.{stats,flagstat,idxstats}' + ] BAM_STATS_SAMTOOLS ( ch_bam_bai_dedup, [ [:], [] ] ) ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) diff --git a/subworkflows/nf-core/bam_markduplicates_picard/main.nf b/subworkflows/nf-core/bam_markduplicates_picard/main.nf index 6e3df3320..8c0158adc 100644 --- a/subworkflows/nf-core/bam_markduplicates_picard/main.nf +++ b/subworkflows/nf-core/bam_markduplicates_picard/main.nf @@ -17,9 +17,30 @@ workflow BAM_MARKDUPLICATES_PICARD { ch_versions = Channel.empty() + PICARD_MARKDUPLICATES.config.ext.args = params.picard_args + PICARD_MARKDUPLICATES.config.ext.prefix = { "${meta.id}.markdup.sorted" } + PICARD_MARKDUPLICATES.config.publishDir = [ + [ + path: "${params.outdir}/${params.aligner}/picard_metrics", + mode: params.publish_dir_mode, + pattern: '*metrics.txt' + ], + [ + path: "${params.outdir}/${params.aligner}", + mode: params.publish_dir_mode, + pattern: '*.bam' + ] + ] PICARD_MARKDUPLICATES ( ch_bam, ch_fasta, ch_fai ) ch_versions = ch_versions.mix(PICARD_MARKDUPLICATES.out.versions.first()) + SAMTOOLS_INDEX.config.ext.args = params.bam_csi_index ? '-c' : '' + SAMTOOLS_INDEX.config.ext.prefix = { "${meta.id}.markdup.sorted" } + SAMTOOLS_INDEX.config.publishDir = [ + path: "${params.outdir}/${params.aligner}", + mode: params.publish_dir_mode, + pattern: '*.{bai,csi}' + ] SAMTOOLS_INDEX ( PICARD_MARKDUPLICATES.out.bam ) ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) @@ -35,7 +56,18 @@ workflow BAM_MARKDUPLICATES_PICARD { } } - BAM_STATS_SAMTOOLS ( ch_bam_bai, ch_fasta ) + samtools_ext_prefix = { "${meta.id}.markdup.sorted.bam" } + samtools_publish_dir = [ + path: "${params.outdir}/${params.aligner}/samtools_stats", + mode: params.publish_dir_mode, + pattern: '*.{stats,flagstat,idxstats}' + ] + BAM_STATS_SAMTOOLS ( + ch_bam_bai, + ch_fasta, + samtools_ext_prefix, + samtools_publish_dir + ) ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) emit: diff --git a/subworkflows/nf-core/bam_rseqc/main.nf b/subworkflows/nf-core/bam_rseqc/main.nf index a698b30ab..66d4d74d1 100644 --- a/subworkflows/nf-core/bam_rseqc/main.nf +++ b/subworkflows/nf-core/bam_rseqc/main.nf @@ -30,6 +30,11 @@ workflow BAM_RSEQC { // bamstat_txt = Channel.empty() if ('bam_stat' in rseqc_modules) { + RSEQC_BAMSTAT.config.publishDir = [ + path: "${params.outdir}/${params.aligner}/rseqc/bam_stat", + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] RSEQC_BAMSTAT ( ch_bam ) bamstat_txt = RSEQC_BAMSTAT.out.txt ch_versions = ch_versions.mix(RSEQC_BAMSTAT.out.versions.first()) @@ -44,6 +49,24 @@ workflow BAM_RSEQC { innerdistance_pdf = Channel.empty() innerdistance_rscript = Channel.empty() if ('inner_distance' in rseqc_modules) { + RSEQC_INNERDISTANCE.config.publishDir = [ + [ + path: "${params.outdir}/${params.aligner}/rseqc/inner_distance/txt", + mode: params.publish_dir_mode, + pattern: '*.txt', + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ], + [ + path: "${params.outdir}/${params.aligner}/rseqc/inner_distance/pdf", + mode: params.publish_dir_mode, + pattern: '*.pdf' + ], + [ + path: "${params.outdir}/${params.aligner}/rseqc/inner_distance/rscript", + mode: params.publish_dir_mode, + pattern: '*.r' + ] + ] RSEQC_INNERDISTANCE ( ch_bam, ch_bed ) innerdistance_distance = RSEQC_INNERDISTANCE.out.distance innerdistance_freq = RSEQC_INNERDISTANCE.out.freq @@ -58,6 +81,11 @@ workflow BAM_RSEQC { // inferexperiment_txt = Channel.empty() if ('infer_experiment' in rseqc_modules) { + RSEQC_INFEREXPERIMENT.config.publishDir = [ + path: "${params.outdir}/${params.aligner}/rseqc/infer_experiment", + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] RSEQC_INFEREXPERIMENT ( ch_bam, ch_bed ) inferexperiment_txt = RSEQC_INFEREXPERIMENT.out.txt ch_versions = ch_versions.mix(RSEQC_INFEREXPERIMENT.out.versions.first()) @@ -74,6 +102,33 @@ workflow BAM_RSEQC { junctionannotation_rscript = Channel.empty() junctionannotation_log = Channel.empty() if ('junction_annotation' in rseqc_modules) { + RSEQC_JUNCTIONANNOTATION.config.publishDir = [ + [ + path: "${params.outdir}/${params.aligner}/rseqc/junction_annotation/pdf", + mode: params.publish_dir_mode, + pattern: '*.pdf' + ], + [ + path: "${params.outdir}/${params.aligner}/rseqc/junction_annotation/bed", + mode: params.publish_dir_mode, + pattern: '*.bed' + ], + [ + path: "${params.outdir}/${params.aligner}/rseqc/junction_annotation/xls", + mode: params.publish_dir_mode, + pattern: '*.xls' + ], + [ + path: "${params.outdir}/${params.aligner}/rseqc/junction_annotation/log", + mode: params.publish_dir_mode, + pattern: '*.log' + ], + [ + path: "${params.outdir}/${params.aligner}/rseqc/junction_annotation/rscript", + mode: params.publish_dir_mode, + pattern: '*.r' + ] + ] RSEQC_JUNCTIONANNOTATION ( ch_bam, ch_bed ) junctionannotation_bed = RSEQC_JUNCTIONANNOTATION.out.bed junctionannotation_interact_bed = RSEQC_JUNCTIONANNOTATION.out.interact_bed @@ -91,6 +146,18 @@ workflow BAM_RSEQC { junctionsaturation_pdf = Channel.empty() junctionsaturation_rscript = Channel.empty() if ('junction_saturation' in rseqc_modules) { + RSEQC_JUNCTIONSATURATION.config.publishDir = [ + [ + path: "${params.outdir}/${params.aligner}/rseqc/junction_saturation/pdf", + mode: params.publish_dir_mode, + pattern: '*.pdf' + ], + [ + path: "${params.outdir}/${params.aligner}/rseqc/junction_saturation/rscript", + mode: params.publish_dir_mode, + pattern: '*.r' + ] + ] RSEQC_JUNCTIONSATURATION ( ch_bam, ch_bed ) junctionsaturation_pdf = RSEQC_JUNCTIONSATURATION.out.pdf junctionsaturation_rscript = RSEQC_JUNCTIONSATURATION.out.rscript @@ -102,6 +169,11 @@ workflow BAM_RSEQC { // readdistribution_txt = Channel.empty() if ('read_distribution' in rseqc_modules) { + RSEQC_READDISTRIBUTION.config.publishDir = [ + path: "${params.outdir}/${params.aligner}/rseqc/read_distribution", + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] RSEQC_READDISTRIBUTION ( ch_bam, ch_bed ) readdistribution_txt = RSEQC_READDISTRIBUTION.out.txt ch_versions = ch_versions.mix(RSEQC_READDISTRIBUTION.out.versions.first()) @@ -115,6 +187,23 @@ workflow BAM_RSEQC { readduplication_pdf = Channel.empty() readduplication_rscript = Channel.empty() if ('read_duplication' in rseqc_modules) { + RSEQC_READDUPLICATION.config.publishDir = [ + [ + path: "${params.outdir}/${params.aligner}/rseqc/read_duplication/pdf", + mode: params.publish_dir_mode, + pattern: '*.pdf' + ], + [ + path: "${params.outdir}/${params.aligner}/rseqc/read_duplication/xls", + mode: params.publish_dir_mode, + pattern: '*.xls' + ], + [ + path: "${params.outdir}/${params.aligner}/rseqc/read_duplication/rscript", + mode: params.publish_dir_mode, + pattern: '*.r' + ] + ] RSEQC_READDUPLICATION ( ch_bam ) readduplication_seq_xls = RSEQC_READDUPLICATION.out.seq_xls readduplication_pos_xls = RSEQC_READDUPLICATION.out.pos_xls @@ -128,6 +217,11 @@ workflow BAM_RSEQC { // tin_txt = Channel.empty() if ('tin' in rseqc_modules) { + RSEQC_TIN.config.publishDir = [ + path: "${params.outdir}/${params.aligner}/rseqc/tin", + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] RSEQC_TIN ( ch_bam_bai, ch_bed ) tin_txt = RSEQC_TIN.out.txt ch_versions = ch_versions.mix(RSEQC_TIN.out.versions.first()) diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/main.nf b/subworkflows/nf-core/bam_sort_stats_samtools/main.nf index fc1c652b9..c8ac28bab 100644 --- a/subworkflows/nf-core/bam_sort_stats_samtools/main.nf +++ b/subworkflows/nf-core/bam_sort_stats_samtools/main.nf @@ -10,14 +10,24 @@ workflow BAM_SORT_STATS_SAMTOOLS { take: ch_bam // channel: [ val(meta), [ bam ] ] ch_fasta // channel: [ val(meta), path(fasta) ] + sort_ext_prefix + sort_publish_dir + index_ext_args + index_publish_dir + stats_ext_prefix + stats_publish_dir main: ch_versions = Channel.empty() + SAMTOOLS_SORT.config.ext.prefix = sort_ext_prefix + SAMTOOLS_SORT.config.publishDir = sort_publish_dir SAMTOOLS_SORT ( ch_bam ) ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions.first()) + SAMTOOLS_INDEX.config.ext.args = index_ext_args + SAMTOOLS_INDEX.config.publishDir = index_publish_dir SAMTOOLS_INDEX ( SAMTOOLS_SORT.out.bam ) ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) @@ -34,7 +44,12 @@ workflow BAM_SORT_STATS_SAMTOOLS { } .set { ch_bam_bai } - BAM_STATS_SAMTOOLS ( ch_bam_bai, ch_fasta ) + BAM_STATS_SAMTOOLS ( + ch_bam_bai, + ch_fasta, + stats_ext_prefix, + stats_publish_dir + ) ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) emit: diff --git a/subworkflows/nf-core/bam_stats_samtools/main.nf b/subworkflows/nf-core/bam_stats_samtools/main.nf index 44d4c010a..c95c56432 100644 --- a/subworkflows/nf-core/bam_stats_samtools/main.nf +++ b/subworkflows/nf-core/bam_stats_samtools/main.nf @@ -10,16 +10,24 @@ workflow BAM_STATS_SAMTOOLS { take: ch_bam_bai // channel: [ val(meta), path(bam), path(bai) ] ch_fasta // channel: [ val(meta), path(fasta) ] + ext_prefix + publish_dir main: ch_versions = Channel.empty() + SAMTOOLS_STATS.config.ext.prefix = ext_prefix + SAMTOOLS_STATS.config.publishDir = publish_dir SAMTOOLS_STATS ( ch_bam_bai, ch_fasta ) ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions) + SAMTOOLS_FLAGSTAT.config.ext.prefix = ext_prefix + SAMTOOLS_FLAGSTAT.config.publishDir = publish_dir SAMTOOLS_FLAGSTAT ( ch_bam_bai ) ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions) + SAMTOOLS_IDXSTATS.config.ext.prefix = ext_prefix + SAMTOOLS_IDXSTATS.config.publishDir = publish_dir SAMTOOLS_IDXSTATS ( ch_bam_bai ) ch_versions = ch_versions.mix(SAMTOOLS_IDXSTATS.out.versions) diff --git a/subworkflows/nf-core/bedgraph_bedclip_bedgraphtobigwig/main.nf b/subworkflows/nf-core/bedgraph_bedclip_bedgraphtobigwig/main.nf index 6c3b7b0cb..fc7b9e262 100644 --- a/subworkflows/nf-core/bedgraph_bedclip_bedgraphtobigwig/main.nf +++ b/subworkflows/nf-core/bedgraph_bedclip_bedgraphtobigwig/main.nf @@ -9,6 +9,8 @@ workflow BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG { take: bedgraph // channel: [ val(meta), [ bedgraph ] ] sizes // path: chrom.sizes + clip_ext_prefix + bigwig_ext_prefix main: @@ -17,12 +19,23 @@ workflow BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG { // // Clip bedGraph file // + UCSC_BEDCLIP.config.ext.prefix = clip_ext_prefix + UCSC_BEDCLIP.config.publishDir = [ + path: "${params.outdir}/${params.aligner}", + enabled: false + ] UCSC_BEDCLIP ( bedgraph, sizes ) ch_versions = ch_versions.mix(UCSC_BEDCLIP.out.versions.first()) // // Convert bedGraph to bigWig // + UCSC_BEDGRAPHTOBIGWIG.config.ext.prefix = bigwig_ext_prefix + UCSC_BEDGRAPHTOBIGWIG.config.publishDir = [ + path: "${params.outdir}/${params.aligner}/bigwig", + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] UCSC_BEDGRAPHTOBIGWIG ( UCSC_BEDCLIP.out.bedgraph, sizes ) ch_versions = ch_versions.mix(UCSC_BEDGRAPHTOBIGWIG.out.versions.first()) diff --git a/subworkflows/nf-core/fastq_align_hisat2/main.nf b/subworkflows/nf-core/fastq_align_hisat2/main.nf index a2ec1cf5e..fd4c8820c 100644 --- a/subworkflows/nf-core/fastq_align_hisat2/main.nf +++ b/subworkflows/nf-core/fastq_align_hisat2/main.nf @@ -17,13 +17,66 @@ workflow FASTQ_ALIGN_HISAT2 { // // Map reads with HISAT2 // + HISAT2_ALIGN.config.ext.args = '--met-stderr --new-summary --dta' + HISAT2_ALIGN.config.publishDir = [ + [ + path: "${params.outdir}/${params.aligner}/log", + mode: params.publish_dir_mode, + pattern: '*.log' + ], + [ + path: "${params.outdir}/${params.aligner}", + mode: params.publish_dir_mode, + pattern: '*.bam', + enabled: params.save_align_intermeds + ], + [ + path: "${params.outdir}/${params.aligner}/unmapped", + mode: params.publish_dir_mode, + pattern: '*.fastq.gz', + enabled: params.save_unaligned + ] + ] HISAT2_ALIGN ( reads, index, splicesites ) ch_versions = ch_versions.mix(HISAT2_ALIGN.out.versions.first()) // // Sort, index BAM file and run samtools stats, flagstat and idxstats // - BAM_SORT_STATS_SAMTOOLS ( HISAT2_ALIGN.out.bam, ch_fasta ) + sort_ext_prefix = { "${meta.id}.sorted" } + sort_publish_dir = [ + path: "${params.outdir}/${params.aligner}", + mode: params.publish_dir_mode, + pattern: "*.bam", + enabled: ( !params.with_umi && params.skip_markduplicates ) || + params.save_align_intermeds || + params.skip_markduplicates + ] + index_ext_args = params.bam_csi_index ? '-c' : '' + index_publish_dir = [ + path: "${params.outdir}/${params.aligner}", + mode: params.publish_dir_mode, + pattern: "*.{bai,csi}", + enabled: ( !params.with_umi && params.skip_markduplicates ) || + params.save_align_intermeds || + params.skip_markduplicates + ] + stats_ext_prefix = { "${meta.id}.sorted.bam" } + stats_publish_dir = [ + path: "${params.outdir}/${params.aligner}/samtools_stats", + mode: params.publish_dir_mode, + pattern: "*.{stats,flagstat,idxstats}" + ] + BAM_SORT_STATS_SAMTOOLS ( + HISAT2_ALIGN.out.bam, + ch_fasta, + sort_ext_prefix, + sort_publish_dir, + index_ext_args, + index_publish_dir, + stats_ext_prefix, + stats_publish_dir + ) ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf index 64ec88f23..304a0ff3a 100644 --- a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf +++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf @@ -35,6 +35,7 @@ workflow FASTQ_FASTQC_UMITOOLS_FASTP { fastqc_raw_html = Channel.empty() fastqc_raw_zip = Channel.empty() if (!skip_fastqc) { + FASTQC_RAW.config.ext.args = params.fastqc_args FASTQC_RAW ( reads ) @@ -46,6 +47,25 @@ workflow FASTQ_FASTQC_UMITOOLS_FASTP { umi_reads = reads umi_log = Channel.empty() if (with_umi && !skip_umi_extract) { + UMITOOLS_EXTRACT.config.ext.args = [ + params.umitools_extract_method ? "--extract-method=${params.umitools_extract_method}" : '', + params.umitools_bc_pattern ? "--bc-pattern='${params.umitools_bc_pattern}'" : '', + params.umitools_bc_pattern2 ? "--bc-pattern2='${params.umitools_bc_pattern2}'" : '', + params.umitools_umi_separator ? "--umi-separator='${params.umitools_umi_separator}'" : '' + ].join(' ').trim() + UMITOOLS_EXTRACT.config.publishDir = [ + [ + path: "${params.outdir}/umitools", + mode: params.publish_dir_mode, + pattern: "*.log" + ], + [ + path: "${params.outdir}/umitools", + mode: params.publish_dir_mode, + pattern: "*.fastq.gz", + enabled: params.save_umi_intermeds + ] + ] UMITOOLS_EXTRACT ( reads ) @@ -76,6 +96,25 @@ workflow FASTQ_FASTQC_UMITOOLS_FASTP { fastqc_trim_zip = Channel.empty() trim_read_count = Channel.empty() if (!skip_trimming) { + FASTP.config.ext.args = params.extra_fastp_args ?: '' + FASTP.config.publishDir = [ + [ + path: "${params.outdir}/${params.trimmer}", + mode: params.publish_dir_mode, + pattern: "*.{json,html}" + ], + [ + path: "${params.outdir}/${params.trimmer}/log", + mode: params.publish_dir_mode, + pattern: "*.log" + ], + [ + path: "${params.outdir}/${params.trimmer}", + mode: params.publish_dir_mode, + pattern: "*.fastq.gz", + enabled: params.save_trimmed + ] + ] FASTP ( umi_reads, adapter_fasta, @@ -109,6 +148,12 @@ workflow FASTQ_FASTQC_UMITOOLS_FASTP { .set { trim_read_count } if (!skip_fastqc) { + FASTQC_TRIM.config.ext.args = params.fastqc_args + FASTQC_TRIM.config.publishDir = [ + path: "${params.outdir}/${params.trimmer}/fastqc", + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] FASTQC_TRIM ( trim_reads ) diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/main.nf b/subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/main.nf index db2e5b329..f8770d094 100644 --- a/subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/main.nf +++ b/subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/main.nf @@ -36,6 +36,7 @@ workflow FASTQ_FASTQC_UMITOOLS_TRIMGALORE { fastqc_html = Channel.empty() fastqc_zip = Channel.empty() if (!skip_fastqc) { + FASTQC.config.ext.args = params.fastqc_args FASTQC (reads) fastqc_html = FASTQC.out.html fastqc_zip = FASTQC.out.zip @@ -45,22 +46,41 @@ workflow FASTQ_FASTQC_UMITOOLS_TRIMGALORE { umi_reads = reads umi_log = Channel.empty() if (with_umi && !skip_umi_extract) { - UMITOOLS_EXTRACT (reads) - umi_reads = UMITOOLS_EXTRACT.out.reads - umi_log = UMITOOLS_EXTRACT.out.log - ch_versions = ch_versions.mix(UMITOOLS_EXTRACT.out.versions.first()) + UMITOOLS_EXTRACT.config.ext.args = [ + params.umitools_extract_method ? "--extract-method=${params.umitools_extract_method}" : '', + params.umitools_bc_pattern ? "--bc-pattern='${params.umitools_bc_pattern}'" : '', + params.umitools_bc_pattern2 ? "--bc-pattern2='${params.umitools_bc_pattern2}'" : '', + params.umitools_umi_separator ? "--umi-separator='${params.umitools_umi_separator}'" : '' + ].join(' ').trim() + UMITOOLS_EXTRACT.config.publishDir = [ + [ + path: "${params.outdir}/umitools", + mode: params.publish_dir_mode, + pattern: "*.log" + ], + [ + path: "${params.outdir}/umitools", + mode: params.publish_dir_mode, + pattern: "*.fastq.gz", + enabled: params.save_umi_intermeds + ] + ] + UMITOOLS_EXTRACT (reads) + umi_reads = UMITOOLS_EXTRACT.out.reads + umi_log = UMITOOLS_EXTRACT.out.log + ch_versions = ch_versions.mix(UMITOOLS_EXTRACT.out.versions.first()) - // Discard R1 / R2 if required - if (umi_discard_read in [1,2]) { - UMITOOLS_EXTRACT - .out - .reads - .map { - meta, reads -> - meta.single_end ? [ meta, reads ] : [ meta + ['single_end': true], reads[umi_discard_read % 2] ] - } - .set { umi_reads } - } + // Discard R1 / R2 if required + if (umi_discard_read in [1,2]) { + UMITOOLS_EXTRACT + .out + .reads + .map { + meta, reads -> + meta.single_end ? [ meta, reads ] : [ meta + ['single_end': true], reads[umi_discard_read % 2] ] + } + .set { umi_reads } + } } trim_reads = umi_reads @@ -70,6 +90,30 @@ workflow FASTQ_FASTQC_UMITOOLS_TRIMGALORE { trim_log = Channel.empty() trim_read_count = Channel.empty() if (!skip_trimming) { + TRIMGALORE.config.ext.args = { + [ + "--fastqc_args '-t ${task.cpus}'", + params.extra_trimgalore_args ? params.extra_trimgalore_args.split("\\s(?=--)") : '' + ].flatten().unique(false).join(' ').trim() + } + TRIMGALORE.config.publishDir = [ + [ + path: "${params.outdir}/${params.trimmer}/fastqc", + mode: params.publish_dir_mode, + pattern: "*.{html,zip}" + ], + [ + path: "${params.outdir}/${params.trimmer}", + mode: params.publish_dir_mode, + pattern: "*.fq.gz", + enabled: params.save_trimmed + ], + [ + path: "${params.outdir}/${params.trimmer}", + mode: params.publish_dir_mode, + pattern: "*.txt" + ] + ] TRIMGALORE (umi_reads) trim_unpaired = TRIMGALORE.out.unpaired trim_html = TRIMGALORE.out.html diff --git a/subworkflows/nf-core/fastq_subsample_fq_salmon/main.nf b/subworkflows/nf-core/fastq_subsample_fq_salmon/main.nf index 0ac3e53a3..f64a6489c 100644 --- a/subworkflows/nf-core/fastq_subsample_fq_salmon/main.nf +++ b/subworkflows/nf-core/fastq_subsample_fq_salmon/main.nf @@ -24,6 +24,13 @@ workflow FASTQ_SUBSAMPLE_FQ_SALMON { // Create Salmon index if required // if (make_index) { + SALMON_INDEX.config.ext.args = params.gencode ? '--gencode' : '' + SALMON_INDEX.config.publishDir = [ + path: "${params.outdir}/genome/index", + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] ch_index = SALMON_INDEX ( ch_genome_fasta, ch_transcript_fasta ).index ch_versions = ch_versions.mix(SALMON_INDEX.out.versions) } @@ -31,6 +38,14 @@ workflow FASTQ_SUBSAMPLE_FQ_SALMON { // // Sub-sample FastQ files with fq // + FQ_SUBSAMPLE.config.ext.args = params.subsample_fq_args + FQ_SUBSAMPLE.config.ext.prefix = { "${meta.id}.subsampled" } + FQ_SUBSAMPLE.config.publishDir = [ + path: "${params.outdir}/sample_fastq/fastq", + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: false + ] FQ_SUBSAMPLE ( ch_reads ) ch_versions = ch_versions.mix(FQ_SUBSAMPLE.out.versions.first()) @@ -39,6 +54,13 @@ workflow FASTQ_SUBSAMPLE_FQ_SALMON { // def lib_type = 'A' def alignment_mode = false + SALMON_QUANT.config.ext.args = params.subsample_salmon_args + SALMON_QUANT.config.publishDir = [ + path: "${params.outdir}/sample_fastq/salmon", + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') || filename.endsWith('_meta_info.json') ? null : filename }, + enabled: false + ] SALMON_QUANT ( FQ_SUBSAMPLE.out.fastq, ch_index, ch_gtf, ch_transcript_fasta, alignment_mode, lib_type ) ch_versions = ch_versions.mix(SALMON_QUANT.out.versions.first()) diff --git a/workflows/rnaseq.nf b/workflows/rnaseq.nf index 8c6b7260d..cfc66804f 100755 --- a/workflows/rnaseq.nf +++ b/workflows/rnaseq.nf @@ -221,6 +221,12 @@ workflow RNASEQ { // // MODULE: Concatenate FastQ files from same sample if required // + CAT_FASTQ.config.publishDir = [ + path: "${params.outdir}/fastq", + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_merged_fastq + ] CAT_FASTQ ( ch_fastq.multiple ) @@ -344,6 +350,20 @@ workflow RNASEQ { // MODULE: Remove genome contaminant reads // if (!params.skip_bbsplit) { + BBMAP_BBSPLIT.config.ext.args = params.bbsplit_args + BBMAP_BBSPLIT.config.publishDir = [ + [ + path: "${params.outdir}/bbsplit", + mode: params.publish_dir_mode, + pattern: '*.txt' + ], + [ + path: "${params.outdir}/bbsplit", + mode: params.publish_dir_mode, + pattern: '*.fastq.gz', + enabled: params.save_bbsplit_reads + ] + ] BBMAP_BBSPLIT ( ch_filtered_reads, PREPARE_GENOME.out.bbsplit_index, @@ -363,6 +383,20 @@ workflow RNASEQ { if (params.remove_ribo_rna) { ch_sortmerna_fastas = Channel.from(ch_ribo_db.readLines()).map { row -> file(row, checkIfExists: true) }.collect() + SORTMERNA.config.ext.args = params.sortmerna_args + SORTMERNA.config.publishDir = [ + [ + path: "${params.outdir}/sortmerna", + mode: params.publish_dir_mode, + pattern: "*.log" + ], + [ + path: "${params.outdir}/sortmerna", + mode: params.publish_dir_mode, + pattern: "*.fastq.gz", + enabled: params.save_non_ribo_reads + ] + ] SORTMERNA ( ch_filtered_reads, ch_sortmerna_fastas @@ -413,9 +447,27 @@ workflow RNASEQ { // if (params.with_umi) { // Deduplicate genome BAM file before downstream analysis + dedup_ext_prefix = { "${meta.id}.umi_dedup.sorted" } + index_ext_args = params.bam_csi_index ? '-c' : '' + index_ext_prefix = { "${meta.id}.umi_dedup.sorted" } + index_publish_dir = [ + path: "${params.outdir}/${params.aligner}", + mode: params.publish_dir_mode, + pattern: '*.{bai,csi}', + enabled: ( + params.save_align_intermeds || + params.save_umi_intermeds + ) + ] + stats_ext_prefix = { "${meta.id}.umi_dedup.sorted.bam" } BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME ( ch_genome_bam.join(ch_genome_bam_index, by: [0]), - params.umitools_dedup_stats + params.umitools_dedup_stats, + dedup_ext_prefix, + index_ext_args, + index_ext_prefix, + index_publish_dir, + stats_ext_prefix ) ch_genome_bam = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.bam ch_genome_bam_index = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.bai @@ -428,20 +480,85 @@ workflow RNASEQ { ch_versions = ch_versions.mix(BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.versions) // Co-ordinate sort, index and run stats on transcriptome BAM + sort_ext_prefix = { "${meta.id}.transcriptome.sorted" } + sort_publish_dir = [ + path: "${params.outdir}/${params.aligner}", + mode: params.publish_dir_mode, + pattern: '*.bam', + enabled: ( + params.save_align_intermeds || + params.save_umi_intermeds + ) + ] + index_ext_args = '' + index_publish_dir = [ + path: "${params.outdir}/${params.aligner}", + mode: params.publish_dir_mode, + pattern: '*.bai', + enabled: ( + params.save_align_intermeds || + params.save_umi_intermeds + ) + ] + stats_ext_prefix = { "${meta.id}.transcriptome.sorted.bam" } + stats_publish_dir = [ + path: "${params.outdir}/${params.aligner}/samtools_stats", + mode: params.publish_dir_mode, + pattern: '*.{stats,flagstat,idxstats}', + enabled: ( + params.save_align_intermeds || + params.save_umi_intermeds + ) + ] BAM_SORT_STATS_SAMTOOLS ( ch_transcriptome_bam, - PREPARE_GENOME.out.fasta.map { [ [:], it ] } + PREPARE_GENOME.out.fasta.map { [ [:], it ] }, + sort_ext_prefix, + sort_publish_dir, + index_ext_args, + index_publish_dir, + stats_ext_prefix, + stats_publish_dir ) ch_transcriptome_sorted_bam = BAM_SORT_STATS_SAMTOOLS.out.bam ch_transcriptome_sorted_bai = BAM_SORT_STATS_SAMTOOLS.out.bai // Deduplicate transcriptome BAM file before read counting with Salmon + dedup_ext_prefix = { "${meta.id}.umi_dedup.transcriptome.sorted" } + index_ext_args = '' + index_ext_prefix = '' + index_publish_dir = [ + path: "${params.outdir}/${params.aligner}", + mode: params.publish_dir_mode, + pattern: '*.bai', + enabled: ( + params.save_align_intermeds || + params.save_umi_intermeds + ) + ] + stats_ext_prefix = { "${meta.id}.umi_dedup.transcriptome.sorted.bam" } BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME ( ch_transcriptome_sorted_bam.join(ch_transcriptome_sorted_bai, by: [0]), - params.umitools_dedup_stats + params.umitools_dedup_stats, + dedup_ext_prefix, + index_ext_args, + index_ext_prefix, + index_publish_dir, + stats_ext_prefix ) // Name sort BAM before passing to Salmon + SAMTOOLS_SORT.config.ext.args = params.samtools_sort_args + SAMTOOLS_SORT.config.ext.prefix = { "${meta.id}.umi_dedup.transcriptome" } + SAMTOOLS_SORT.config.publishDir = [ + path: "${params.outdir}/${params.aligner}", + mode: params.publish_dir_mode, + pattern: '*.bam', + enabled: ( + params.save_align_intermeds || + params.save_umi_intermeds + ) + ] SAMTOOLS_SORT ( BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME.out.bam ) @@ -461,6 +578,23 @@ workflow RNASEQ { // Fix paired-end reads in name sorted BAM file // See: https://github.com/nf-core/rnaseq/issues/828 + UMITOOLS_PREPAREFORSALMON.config.ext.prefix = { "${meta.id}.umi_dedup.transcriptome.filtered" } + UMITOOLS_PREPAREFORSALMON.config.publishDir = [ + [ + path: "${params.outdir}/${params.aligner}/umitools/log", + mode: params.publish_dir_mode, + pattern: '*.log' + ], + [ + path: "${params.outdir}/${params.aligner}", + mode: params.publish_dir_mode, + pattern: '*.bam', + enabled: ( + params.save_align_intermeds || + params.save_umi_intermeds + ) + ] + ] UMITOOLS_PREPAREFORSALMON ( ch_umitools_dedup_bam.paired_end ) @@ -481,11 +615,25 @@ workflow RNASEQ { PREPARE_GENOME.out.transcript_fasta, PREPARE_GENOME.out.gtf, true, - params.salmon_quant_libtype ?: '' + params.salmon_quant_libtype ?: '', + "${params.outdir}/${params.aligner}" ) ch_versions = ch_versions.mix(QUANTIFY_STAR_SALMON.out.versions) if (!params.skip_qc & !params.skip_deseq2_qc) { + DESEQ2_QC_STAR_SALMON.config.ext.args = [ + "--id_col 1", + "--sample_suffix ''", + "--outprefix deseq2", + "--count_col 3", + params.deseq2_vst ? '--vst TRUE' : '' + ].join(' ').trim() + DESEQ2_QC_STAR_SALMON.config.ext.args2 = 'star_salmon' + DESEQ2_QC_STAR_SALMON.config.publishDir = [ + path: "${params.outdir}/${params.aligner}/deseq2_qc", + mode: params.publish_dir_mode, + pattern: "*{RData,pca.vals.txt,plots.pdf,sample.dists.txt,size_factors,log}" + ] DESEQ2_QC_STAR_SALMON ( QUANTIFY_STAR_SALMON.out.counts_gene_length_scaled, ch_pca_header_multiqc, @@ -520,6 +668,19 @@ workflow RNASEQ { ch_versions = ch_versions.mix(QUANTIFY_RSEM.out.versions) if (!params.skip_qc & !params.skip_deseq2_qc) { + DESEQ2_QC_RSEM.config.ext.args = [ + "--id_col 1", + "--sample_suffix ''", + "--outprefix deseq2", + "--count_col 3", + params.deseq2_vst ? '--vst TRUE' : '' + ].join(' ').trim() + DESEQ2_QC_RSEM.config.ext.args2 = 'star_rsem' + DESEQ2_QC_RSEM.config.publishDir = [ + path: "${params.outdir}/${params.aligner}/deseq2_qc", + mode: params.publish_dir_mode, + pattern: "*{RData,pca.vals.txt,plots.pdf,sample.dists.txt,size_factors,log}" + ] DESEQ2_QC_RSEM ( QUANTIFY_RSEM.out.merged_counts_gene, ch_pca_header_multiqc, @@ -619,6 +780,19 @@ workflow RNASEQ { // ch_preseq_multiqc = Channel.empty() if (!params.skip_alignment && !params.skip_qc && !params.skip_preseq) { + PRESEQ_LCEXTRAP.config.ext.args = params.preseq_lcextrap_args + PRESEQ_LCEXTRAP.config.publishDir = [ + [ + path: "${params.outdir}/${params.aligner}/preseq", + mode: params.publish_dir_mode, + pattern: "*.txt" + ], + [ + path: "${params.outdir}/${params.aligner}/preseq/log", + mode: params.publish_dir_mode, + pattern: "*.log" + ] + ] PRESEQ_LCEXTRAP ( ch_genome_bam ) @@ -652,6 +826,15 @@ workflow RNASEQ { // MODULE: STRINGTIE // if (!params.skip_alignment && !params.skip_stringtie) { + STRINGTIE_STRINGTIE.config.ext.args = [ + '-v', + params.stringtie_ignore_gtf ? '' : '-e' + ].join(' ').trim() + STRINGTIE_STRINGTIE.config.publishDir = [ + path: "${params.outdir}/${params.aligner}/stringtie", + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] STRINGTIE_STRINGTIE ( ch_genome_bam, PREPARE_GENOME.out.gtf @@ -679,11 +862,26 @@ workflow RNASEQ { .map { it[0.. filename.equals('versions.yml') ? null : filename } + ] SUBREAD_FEATURECOUNTS ( ch_featurecounts ) ch_versions = ch_versions.mix(SUBREAD_FEATURECOUNTS.out.versions.first()) + MULTIQC_CUSTOM_BIOTYPE.config.publishDir = [ + path: "${params.outdir}/${params.aligner}/featurecounts", + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] MULTIQC_CUSTOM_BIOTYPE ( SUBREAD_FEATURECOUNTS.out.counts, ch_biotypes_header_multiqc @@ -697,6 +895,11 @@ workflow RNASEQ { // if (!params.skip_alignment && !params.skip_bigwig) { + BEDTOOLS_GENOMECOV.config.ext.args = params.bedtools_args + BEDTOOLS_GENOMECOV.config.publishDir = [ + path: { "${params.outdir}/bedtools/${meta.id}" }, + enabled: false + ] BEDTOOLS_GENOMECOV ( ch_genome_bam ) @@ -705,15 +908,23 @@ workflow RNASEQ { // // SUBWORKFLOW: Convert bedGraph to bigWig // + clip_ext_prefix = { "${meta.id}.clip.forward" } + bigwig_ext_prefix = { "${meta.id}.forward" } BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG_FORWARD ( BEDTOOLS_GENOMECOV.out.bedgraph_forward, - PREPARE_GENOME.out.chrom_sizes + PREPARE_GENOME.out.chrom_sizes, + clip_ext_prefix, + bigwig_ext_prefix ) ch_versions = ch_versions.mix(BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG_FORWARD.out.versions) + clip_ext_prefix = { "${meta.id}.clip.reverse" } + bigwig_ext_prefix = { "${meta.id}.reverse" } BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG_REVERSE ( BEDTOOLS_GENOMECOV.out.bedgraph_reverse, - PREPARE_GENOME.out.chrom_sizes + PREPARE_GENOME.out.chrom_sizes, + clip_ext_prefix, + bigwig_ext_prefix ) } @@ -733,6 +944,11 @@ workflow RNASEQ { ch_tin_multiqc = Channel.empty() if (!params.skip_alignment && !params.skip_qc) { if (!params.skip_qualimap) { + QUALIMAP_RNASEQ.config.publishDir = [ + path: "${params.outdir}/${params.aligner}/qualimap", + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] QUALIMAP_RNASEQ ( ch_genome_bam, PREPARE_GENOME.out.gtf @@ -742,6 +958,33 @@ workflow RNASEQ { } if (!params.skip_dupradar) { + DUPRADAR.config.publishDir = [ + [ + path: "${params.outdir}/${params.aligner}/dupradar/scatter_plot", + mode: params.publish_dir_mode, + pattern: "*Dens.pdf" + ], + [ + path: "${params.outdir}/${params.aligner}/dupradar/box_plot", + mode: params.publish_dir_mode, + pattern: "*Boxplot.pdf" + ], + [ + path: "${params.outdir}/${params.aligner}/dupradar/histogram", + mode: params.publish_dir_mode, + pattern: "*Hist.pdf" + ], + [ + path: "${params.outdir}/${params.aligner}/dupradar/gene_data", + mode: params.publish_dir_mode, + pattern: "*Matrix.txt" + ], + [ + path: "${params.outdir}/${params.aligner}/dupradar/intercepts_slope", + mode: params.publish_dir_mode, + pattern: "*slope.txt" + ] + ] DUPRADAR ( ch_genome_bam, PREPARE_GENOME.out.gtf @@ -806,12 +1049,26 @@ workflow RNASEQ { ch_dummy_file, PREPARE_GENOME.out.gtf, false, - params.salmon_quant_libtype ?: '' + params.salmon_quant_libtype ?: '', + "${params.outdir}/${params.pseudo_aligner}" ) ch_salmon_multiqc = QUANTIFY_SALMON.out.results ch_versions = ch_versions.mix(QUANTIFY_SALMON.out.versions) if (!params.skip_qc & !params.skip_deseq2_qc) { + DESEQ2_QC_SALMON.config.ext.args = [ + "--id_col 1", + "--sample_suffix ''", + "--outprefix deseq2", + "--count_col 3", + params.deseq2_vst ? '--vst TRUE' : '' + ].join(' ').trim() + DESEQ2_QC_SALMON.config.ext.args2 = 'salmon' + DESEQ2_QC_SALMON.config.publishDir = [ + path: "${params.outdir}/${params.pseudo_aligner}/deseq2_qc", + mode: params.publish_dir_mode, + pattern: "*{RData,pca.vals.txt,plots.pdf,sample.dists.txt,size_factors,log}" + ] DESEQ2_QC_SALMON ( QUANTIFY_SALMON.out.counts_gene_length_scaled, ch_pca_header_multiqc, @@ -826,6 +1083,11 @@ workflow RNASEQ { // // MODULE: Pipeline reporting // + CUSTOM_DUMPSOFTWAREVERSIONS.config.publishDir = [ + path: "${params.outdir}/pipeline_info", + mode: params.publish_dir_mode, + pattern: '*_versions.yml' + ] CUSTOM_DUMPSOFTWAREVERSIONS ( ch_versions.unique().collectFile(name: 'collated_versions.yml') ) @@ -840,6 +1102,15 @@ workflow RNASEQ { methods_description = WorkflowRnaseq.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description) ch_methods_description = Channel.value(methods_description) + MULTIQC.config.ext.args = params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' + MULTIQC.config.publishDir = [ + path: [ + "${params.outdir}/multiqc", + params.skip_alignment ? '' : "/${params.aligner}" + ].join(''), + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] MULTIQC ( ch_multiqc_config, ch_multiqc_custom_config.collect().ifEmpty([]),