-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.nf
164 lines (141 loc) · 5.23 KB
/
main.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
#!/usr/bin/env nextflow
/*
Coriell Institute for Medical Research
RNAseq Pipeline. Started November 2023.
Contributors:
Anthony Pompetti <[email protected]>
Methodology adapted from:
Gennaro Calendo
*/
/*
Enable Nextflow DSL2
*/
nextflow.enable.dsl=2
//Configurable variables for pipeline
params.fastq_folder = "${workflow.projectDir}/fastq"
params.reads = "${params.fastq_folder}/*{_L00,}{1,2,3,4}{_R,_}{1,2}*.{fastq,fq}.gz"
params.singleEnd = false
params.multiLane = false
params.amplifytargets = false
params.genome = false
params.db = params.genomes ? params.genomes[ params.genome ].db ?:false : false
//Include modules to main pipeline
include { FASTQC as PRETRIM_FASTQC } from './modules/fastqc/main' addParams(pubdir: 'pretrim_fastqc')
include { TRIM_GALORE } from './modules/trim_galore/main'
include { FASTQC as POSTTRIM_FASTQC } from './modules/fastqc/main' addParams(pubdir: 'posttrim_fastqc')
include { SALMON_ALIGN } from './modules/salmon/salmon_align/main'
include { GEN_SE } from './modules/salmon/gen_se/main'
include { MULTIQC } from './modules/multiqc/main'
//Provide sample table in csv format to have pipeline process samples via sample table
params.sample_table = "${workflow.projectDir}/sample_table_input.csv"
if ( params.sample_table ){
// Channel for the samplesheet
ch_samplesheet = Channel.fromPath(params.sample_table)
//ch_samplesheet.view()
// Parse it line by line
reads_ch = ch_samplesheet.splitCsv(header:true).map {
// This is the read1 and read2 entry
r1_L1 = it['r1_L1']
r1_L2 = it['r1_L2']
r1_L3 = it['r1_L3']
r1_L4 = it['r1_L4']
r2_L1 = it['r2_L1']
r2_L2 = it['r2_L2']
r2_L3 = it['r2_L3']
r2_L4 = it['r2_L4']
// Detect wiether single-end or paired-end
is_singleEnd = r2_L1.toString()=='' ? true : false
is_multiLane = r1_L2.toString()=='' ? false : true
// The "meta" map, which is a Nextflow/Groovy map with id (the sample name) and a single_end logical entry
meta = [id: it['sample'], single_end: is_singleEnd, multi_lane: is_multiLane]
// We return a nested map, the first entry is the meta map, the second one is the read(s)
if ( is_singleEnd ){
if ( r1_L4.toString()!='' ){
[meta, [r1_L1, r1_L2, r1_L3, r1_L4], []]
}
else if ( r1_L3.toString()!='' ){
[meta, [r1_L1, r1_L2, r1_L3], []]
}
else if ( r1_L2.toString()!='' ){
[meta, [r1_L1, r1_L2], []]
}
else {
[meta, [r1_L1], []]
}
}
else{
if (r2_L4.toString()!=''){
[meta, [r1_L1, r1_L2, r1_L3, r1_L4], [r2_L1, r2_L2, r2_L3, r2_L4]]
}
else if ( r2_L3.toString()!='' ){
[meta, [r1_L1, r1_L2, r1_L3], [r2_L1, r2_L2, r2_L3]]
}
else if ( r2_L2.toString()!='' ){
[meta, [r1_L1, r1_L2], [r2_L1, r2_L2]]
}
else {
[meta, [r1_L1], [r2_L1]]
}
}
}
}
else {
Channel
.fromFilePairs(params.reads, size: params.singleEnd ? (params.multiLane ?: 1) : 2)
.ifEmpty {exit 1, "Cannot find any reads matching ${params.reads}\nNB: Path needs to be enclosed in quotes!\nIf this is single-end data, please specify --singleEnd on the command line."}
.set{ reads_ch }
}
process PREPROCESS {
input:
tuple val(meta), path(reads_1), path(reads_2)
output:
tuple val(meta), path("*{_mL,_sL,_sL_1,_sL_2,_mL_1,_mL_2}.fq.gz")
script:
if ( meta.multi_lane ){
if (meta.single_end){
"""
cat ${reads_1} > ${meta.id}_mL.fq.gz
"""
}
else {
"""
cat ${reads_1} > ${meta.id}_mL_1.fq.gz
cat ${reads_2} > ${meta.id}_mL_2.fq.gz
"""
}
}
else {
if (meta.single_end){
"""
mv ${reads_1} ${meta.id}_sL.fq.gz
"""
}
else {
"""
mv ${reads_1} ${meta.id}_sL_1.fq.gz
mv ${reads_2} ${meta.id}_sL_2.fq.gz
"""
}
}
}
workflow {
//reads_ch.view()
PREPROCESS(reads_ch)
reads_ch = PREPROCESS.out
//reads_ch.view()
//Run fastqc on raw reads
PRETRIM_FASTQC(reads_ch)
//Run trim_galore on raw reads
TRIM_GALORE(reads_ch)
//Run fastqc on trimmed reads, specifies trim_galore[0] because second input channel is not need for this process
POSTTRIM_FASTQC(TRIM_GALORE.out.reads)
//Run salmon_align on trimmed reads
//State Dependency: Wait until TRIM_GALORE is done to run bismark align
//State Dependency: Wait until POSTTRIM_FASTQC is done to run bismark align
state = POSTTRIM_FASTQC.out.collect()
SALMON_ALIGN(TRIM_GALORE.out.reads.collect(flat: false).flatMap(), state)
//Run gen_se on salmon quants
GEN_SE(SALMON_ALIGN.out.quants.collect())
//Run multiqc on pretrim fastqc output, trim_galore trimming report, posttrim fastqc output, salmon align output
MULTIQC(PRETRIM_FASTQC.out.collect().combine(POSTTRIM_FASTQC.out.collect()).combine(TRIM_GALORE.out.report.collect()).combine(SALMON_ALIGN.out.quants.collect()))
}