Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move process config into pipeline code #1081

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,200 changes: 44 additions & 1,156 deletions conf/modules.config

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions lib/Utils.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,11 @@ class Utils {
"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
}
}

public static Map publishDir(def params, String subPath, Map opts=[:]) {
final defaults = [
mode: params.publish_dir_mode
]
return defaults + [path: "${params.outdir}/${subPath}"] + opts
}
}
72 changes: 71 additions & 1 deletion subworkflows/local/align_star.nf
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,42 @@ workflow ALIGN_STAR {
ch_bam_transcript = Channel.empty()
ch_fastq = Channel.empty()
ch_tab = Channel.empty()
align_ext_args = [
'--quantMode TranscriptomeSAM',
'--twopassMode Basic',
'--outSAMtype BAM Unsorted',
'--readFilesCommand zcat',
'--runRNGseed 0',
'--outFilterMultimapNmax 20',
'--alignSJDBoverhangMin 1',
'--outSAMattributes NH HI AS NM MD',
'--quantTranscriptomeBan Singleend',
'--outSAMstrandField intronMotif',
params.save_unaligned ? '--outReadsUnmapped Fastx' : '',
params.extra_star_align_args ? params.extra_star_align_args.split("\\s(?=--)") : ''
].flatten().unique(false).join(' ').trim()
align_publish_dir = [
[
path: "${params.outdir}/${params.aligner}/log",
mode: params.publish_dir_mode,
pattern: '*.{out,tab}'
],
[
path: "${params.outdir}/${params.aligner}",
mode: params.publish_dir_mode,
pattern: '*.bam',
enabled: params.save_align_intermeds
],
[
path: "${params.outdir}/${params.aligner}/unmapped",
mode: params.publish_dir_mode,
pattern: '*.fastq.gz',
enabled: params.save_unaligned
]
]
if (is_aws_igenome) {
STAR_ALIGN_IGENOMES.config.ext.args = align_ext_args
STAR_ALIGN_IGENOMES.config.publishDir = align_publish_dir
STAR_ALIGN_IGENOMES ( reads, index, gtf, star_ignore_sjdbgtf, seq_platform, seq_center )
ch_orig_bam = STAR_ALIGN_IGENOMES.out.bam
ch_log_final = STAR_ALIGN_IGENOMES.out.log_final
Expand All @@ -44,6 +79,8 @@ workflow ALIGN_STAR {
ch_tab = STAR_ALIGN_IGENOMES.out.tab
ch_versions = ch_versions.mix(STAR_ALIGN_IGENOMES.out.versions.first())
} else {
STAR_ALIGN.config.ext.args = align_ext_args
STAR_ALIGN.config.publishDir = align_publish_dir
STAR_ALIGN ( reads, index, gtf, star_ignore_sjdbgtf, seq_platform, seq_center )
ch_orig_bam = STAR_ALIGN.out.bam
ch_log_final = STAR_ALIGN.out.log_final
Expand All @@ -59,7 +96,40 @@ workflow ALIGN_STAR {
//
// Sort, index BAM file and run samtools stats, flagstat and idxstats
//
BAM_SORT_STATS_SAMTOOLS ( ch_orig_bam, fasta )
sort_ext_prefix = { "${meta.id}.sorted" }
sort_publish_dir = [
path: "${params.outdir}/${params.aligner}",
mode: params.publish_dir_mode,
pattern: "*.bam",
enabled: ( !params.with_umi && params.skip_markduplicates ) ||
params.save_align_intermeds ||
params.skip_markduplicates
]
index_ext_args = params.bam_csi_index ? '-c' : ''
index_publish_dir = [
path: "${params.outdir}/${params.aligner}",
mode: params.publish_dir_mode,
pattern: "*.{bai,csi}",
enabled: ( !params.with_umi && params.skip_markduplicates ) ||
params.save_align_intermeds ||
params.skip_markduplicates
]
stats_ext_prefix = { "${meta.id}.sorted.bam" }
stats_publish_dir = [
path: "${params.outdir}/${params.aligner}/samtools_stats",
mode: params.publish_dir_mode,
pattern: "*.{stats,flagstat,idxstats}"
]
BAM_SORT_STATS_SAMTOOLS (
ch_orig_bam,
fasta,
sort_ext_prefix,
sort_publish_dir,
index_ext_args,
index_publish_dir,
stats_ext_prefix,
stats_publish_dir
)
ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions)

emit:
Expand Down
5 changes: 5 additions & 0 deletions subworkflows/local/input_check.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@ workflow INPUT_CHECK {
samplesheet // file: /path/to/samplesheet.csv

main:
SAMPLESHEET_CHECK.config.publishDir = [
path: "${params.outdir}/pipeline_info",
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
SAMPLESHEET_CHECK ( samplesheet )
.csv
.splitCsv ( header:true, sep:',' )
Expand Down
50 changes: 50 additions & 0 deletions subworkflows/local/prepare_genome.nf
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,24 @@ workflow PREPARE_GENOME {

ch_versions = Channel.empty()

genome_publish_dir = [
path: "${params.outdir}/genome",
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
enabled: params.save_reference
]
genome_index_publish_dir = [
path: "${params.outdir}/genome/index",
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
enabled: params.save_reference
]

//
// Uncompress genome fasta file if required
//
if (fasta.endsWith('.gz')) {
GUNZIP_FASTA.config.publishDir = genome_publish_dir
ch_fasta = GUNZIP_FASTA ( [ [:], fasta ] ).gunzip.map { it[1] }
ch_versions = ch_versions.mix(GUNZIP_FASTA.out.versions)
} else {
Expand All @@ -70,18 +84,23 @@ workflow PREPARE_GENOME {
//
if (gtf) {
if (gtf.endsWith('.gz')) {
GUNZIP_GTF.config.publishDir = genome_publish_dir
ch_gtf = GUNZIP_GTF ( [ [:], gtf ] ).gunzip.map { it[1] }
ch_versions = ch_versions.mix(GUNZIP_GTF.out.versions)
} else {
ch_gtf = Channel.value(file(gtf))
}
} else if (gff) {
if (gff.endsWith('.gz')) {
GUNZIP_GFF.config.publishDir = genome_publish_dir
ch_gff = GUNZIP_GFF ( [ [:], gff ] ).gunzip.map { it[1] }
ch_versions = ch_versions.mix(GUNZIP_GFF.out.versions)
} else {
ch_gff = Channel.value(file(gff))
}

GFFREAD.config.ext.args = params.prepare_genome_gffread_args
GFFREAD.config.publishDir = genome_publish_dir
ch_gtf = GFFREAD ( ch_gff ).gtf
ch_versions = ch_versions.mix(GFFREAD.out.versions)
}
Expand All @@ -91,11 +110,14 @@ workflow PREPARE_GENOME {
//
if (additional_fasta) {
if (additional_fasta.endsWith('.gz')) {
GUNZIP_ADDITIONAL_FASTA.config.publishDir = genome_publish_dir
ch_add_fasta = GUNZIP_ADDITIONAL_FASTA ( [ [:], additional_fasta ] ).gunzip.map { it[1] }
ch_versions = ch_versions.mix(GUNZIP_ADDITIONAL_FASTA.out.versions)
} else {
ch_add_fasta = Channel.value(file(additional_fasta))
}

CAT_ADDITIONAL_FASTA.config.publishDir = genome_publish_dir
CAT_ADDITIONAL_FASTA ( ch_fasta, ch_gtf, ch_add_fasta, biotype )
ch_fasta = CAT_ADDITIONAL_FASTA.out.fasta
ch_gtf = CAT_ADDITIONAL_FASTA.out.gtf
Expand All @@ -107,12 +129,14 @@ workflow PREPARE_GENOME {
//
if (gene_bed) {
if (gene_bed.endsWith('.gz')) {
GUNZIP_GENE_BED.config.publishDir = genome_publish_dir
ch_gene_bed = GUNZIP_GENE_BED ( [ [:], gene_bed ] ).gunzip.map { it[1] }
ch_versions = ch_versions.mix(GUNZIP_GENE_BED.out.versions)
} else {
ch_gene_bed = Channel.value(file(gene_bed))
}
} else {
GTF2BED.config.publishDir = genome_publish_dir
ch_gene_bed = GTF2BED ( ch_gtf ).bed
ch_versions = ch_versions.mix(GTF2BED.out.versions)
}
Expand All @@ -122,18 +146,23 @@ workflow PREPARE_GENOME {
//
if (transcript_fasta) {
if (transcript_fasta.endsWith('.gz')) {
GUNZIP_TRANSCRIPT_FASTA.config.publishDir = genome_publish_dir
ch_transcript_fasta = GUNZIP_TRANSCRIPT_FASTA ( [ [:], transcript_fasta ] ).gunzip.map { it[1] }
ch_versions = ch_versions.mix(GUNZIP_TRANSCRIPT_FASTA.out.versions)
} else {
ch_transcript_fasta = Channel.value(file(transcript_fasta))
}
if (gencode) {
PREPROCESS_TRANSCRIPTS_FASTA_GENCODE.config.publishDir = genome_publish_dir
PREPROCESS_TRANSCRIPTS_FASTA_GENCODE ( ch_transcript_fasta )
ch_transcript_fasta = PREPROCESS_TRANSCRIPTS_FASTA_GENCODE.out.fasta
ch_versions = ch_versions.mix(PREPROCESS_TRANSCRIPTS_FASTA_GENCODE.out.versions)
}
} else {
GTF_GENE_FILTER.config.publishDir = genome_publish_dir
ch_filter_gtf = GTF_GENE_FILTER ( ch_fasta, ch_gtf ).gtf

MAKE_TRANSCRIPTS_FASTA.config.publishDir = genome_publish_dir
ch_transcript_fasta = MAKE_TRANSCRIPTS_FASTA ( ch_fasta, ch_filter_gtf ).transcript_fasta
ch_versions = ch_versions.mix(GTF_GENE_FILTER.out.versions)
ch_versions = ch_versions.mix(MAKE_TRANSCRIPTS_FASTA.out.versions)
Expand All @@ -142,6 +171,7 @@ workflow PREPARE_GENOME {
//
// Create chromosome sizes file
//
CUSTOM_GETCHROMSIZES.config.publishDir = genome_publish_dir
CUSTOM_GETCHROMSIZES ( ch_fasta.map { [ [:], it ] } )
ch_fai = CUSTOM_GETCHROMSIZES.out.fai.map { it[1] }
ch_chrom_sizes = CUSTOM_GETCHROMSIZES.out.sizes.map { it[1] }
Expand All @@ -154,6 +184,8 @@ workflow PREPARE_GENOME {
if ('bbsplit' in prepare_tool_indices) {
if (bbsplit_index) {
if (bbsplit_index.endsWith('.tar.gz')) {
UNTAR_BBSPLIT_INDEX.config.ext.args2 = params.prepare_genome_untar_args
UNTAR_BBSPLIT_INDEX.config.publishDir = genome_index_publish_dir
ch_bbsplit_index = UNTAR_BBSPLIT_INDEX ( [ [:], bbsplit_index ] ).untar.map { it[1] }
ch_versions = ch_versions.mix(UNTAR_BBSPLIT_INDEX.out.versions)
} else {
Expand All @@ -169,6 +201,8 @@ workflow PREPARE_GENOME {
.collect { [ it ] } // Collect entries as a list to pass as "tuple val(short_names), path(path_to_fasta)" to module
.set { ch_bbsplit_fasta_list }

BBMAP_BBSPLIT.config.ext.args = params.prepare_genome_bbsplit_args
BBMAP_BBSPLIT.config.publishDir = genome_index_publish_dir
ch_bbsplit_index = BBMAP_BBSPLIT ( [ [:], [] ], [], ch_fasta, ch_bbsplit_fasta_list, true ).index
ch_versions = ch_versions.mix(BBMAP_BBSPLIT.out.versions)
}
Expand All @@ -181,16 +215,20 @@ workflow PREPARE_GENOME {
if ('star_salmon' in prepare_tool_indices) {
if (star_index) {
if (star_index.endsWith('.tar.gz')) {
UNTAR_STAR_INDEX.config.ext.args2 = params.prepare_genome_untar_args
UNTAR_STAR_INDEX.config.publishDir = genome_index_publish_dir
ch_star_index = UNTAR_STAR_INDEX ( [ [:], star_index ] ).untar.map { it[1] }
ch_versions = ch_versions.mix(UNTAR_STAR_INDEX.out.versions)
} else {
ch_star_index = Channel.value(file(star_index))
}
} else {
if (is_aws_igenome) {
STAR_GENOMEGENERATE_IGENOMES.config.publishDir = genome_index_publish_dir
ch_star_index = STAR_GENOMEGENERATE_IGENOMES ( ch_fasta, ch_gtf ).index
ch_versions = ch_versions.mix(STAR_GENOMEGENERATE_IGENOMES.out.versions)
} else {
STAR_GENOMEGENERATE.config.publishDir = genome_index_publish_dir
ch_star_index = STAR_GENOMEGENERATE ( ch_fasta, ch_gtf ).index
ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions)
}
Expand All @@ -204,12 +242,16 @@ workflow PREPARE_GENOME {
if ('star_rsem' in prepare_tool_indices) {
if (rsem_index) {
if (rsem_index.endsWith('.tar.gz')) {
UNTAR_RSEM_INDEX.config.ext.args2 = params.prepare_genome_untar_args
UNTAR_RSEM_INDEX.config.publishDir = genome_index_publish_dir
ch_rsem_index = UNTAR_RSEM_INDEX ( [ [:], rsem_index ] ).untar.map { it[1] }
ch_versions = ch_versions.mix(UNTAR_RSEM_INDEX.out.versions)
} else {
ch_rsem_index = Channel.value(file(rsem_index))
}
} else {
RSEM_PREPAREREFERENCE_GENOME.config.ext.args = params.prepare_genome_rsem_args
RSEM_PREPAREREFERENCE_GENOME.config.publishDir = genome_index_publish_dir
ch_rsem_index = RSEM_PREPAREREFERENCE_GENOME ( ch_fasta, ch_gtf ).index
ch_versions = ch_versions.mix(RSEM_PREPAREREFERENCE_GENOME.out.versions)
}
Expand All @@ -222,19 +264,23 @@ workflow PREPARE_GENOME {
ch_hisat2_index = Channel.empty()
if ('hisat2' in prepare_tool_indices) {
if (!splicesites) {
HISAT2_EXTRACTSPLICESITES.config.publishDir = genome_index_publish_dir
ch_splicesites = HISAT2_EXTRACTSPLICESITES ( ch_gtf.map { [ [:], it ] } ).txt.map { it[1] }
ch_versions = ch_versions.mix(HISAT2_EXTRACTSPLICESITES.out.versions)
} else {
ch_splicesites = Channel.value(file(splicesites))
}
if (hisat2_index) {
if (hisat2_index.endsWith('.tar.gz')) {
UNTAR_HISAT2_INDEX.config.ext.args2 = params.prepare_genome_untar_args
UNTAR_HISAT2_INDEX.config.publishDir = genome_index_publish_dir
ch_hisat2_index = UNTAR_HISAT2_INDEX ( [ [:], hisat2_index ] ).untar.map { it[1] }
ch_versions = ch_versions.mix(UNTAR_HISAT2_INDEX.out.versions)
} else {
ch_hisat2_index = Channel.value(file(hisat2_index))
}
} else {
HISAT2_BUILD.config.publishDir = genome_index_publish_dir
ch_hisat2_index = HISAT2_BUILD ( ch_fasta.map { [ [:], it ] }, ch_gtf.map { [ [:], it ] }, ch_splicesites.map { [ [:], it ] } ).index.map { it[1] }
ch_versions = ch_versions.mix(HISAT2_BUILD.out.versions)
}
Expand All @@ -246,13 +292,17 @@ workflow PREPARE_GENOME {
ch_salmon_index = Channel.empty()
if (salmon_index) {
if (salmon_index.endsWith('.tar.gz')) {
UNTAR_SALMON_INDEX.config.ext.args2 = params.prepare_genome_untar_args
UNTAR_SALMON_INDEX.config.publishDir = genome_index_publish_dir
ch_salmon_index = UNTAR_SALMON_INDEX ( [ [:], salmon_index ] ).untar.map { it[1] }
ch_versions = ch_versions.mix(UNTAR_SALMON_INDEX.out.versions)
} else {
ch_salmon_index = Channel.value(file(salmon_index))
}
} else {
if ('salmon' in prepare_tool_indices) {
SALMON_INDEX.config.ext.args = params.gencode ? '--gencode' : ''
SALMON_INDEX.config.publishDir = genome_index_publish_dir
ch_salmon_index = SALMON_INDEX ( ch_fasta, ch_transcript_fasta ).index
ch_versions = ch_versions.mix(SALMON_INDEX.out.versions)
}
Expand Down
Loading
Loading