generated from lifebit-ai/manta-strelka
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.nf
142 lines (122 loc) · 5.24 KB
/
main.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#!/usr/bin/env nextflow
def helpMessage() {
log.info """
Usage:
The typical command for running the pipeline is as follows:
nextflow run main.nf --bams sample.bam [Options]
Inputs Options:
--input Input file
Resource Options:
--max_cpus Maximum number of CPUs (int)
(default: $params.max_cpus)
--max_memory Maximum memory (memory unit)
(default: $params.max_memory)
--max_time Maximum time (time unit)
(default: $params.max_time)
See here for more info: https://github.com/lifebit-ai/hla/blob/master/docs/usage.md
""".stripIndent()
}
// Show help message
if (params.help) {
helpMessage()
exit 0
}
// Define channels from repository files
projectDir = workflow.projectDir
// Define Channels from input
Channel
.fromPath(params.input)
.ifEmpty { exit 1, "Cannot find input file : ${params.input}" }
.splitCsv(skip:1, by:2)
.map { row ->
def idPatient = row[0][0]
if( row[0][2] == "N") {
idSampleNormal = row[0][1]
bamNormal = file(row[0][3])
baiNormal = file(row[0][4])
idSampleTumor = row[1][1]
bamTumor = file(row[1][3])
baiTumor = file(row[1][4])
} else {
idSampleNormal = row[1][1]
bamNormal = file(row[1][3])
baiNormal = file(row[1][4])
idSampleTumor = row[0][1]
bamTumor = file(row[0][3])
baiTumor = file(row[0][4])
}
[idPatient, idSampleNormal, bamNormal, baiNormal, idSampleTumor, bamTumor, baiTumor]
}
.into { pairBamManta; pairBamStrelka }
ch_fasta = Channel.value(file(params.genome_fasta))
ch_fai = Channel.value(file(params.genome_fasta_fai))
// Define Process
// STEP MANTA - SOMATIC PAIR
process manta {
tag "${idSampleTumor}_vs_${idSampleNormal}"
publishDir "${params.outdir}/VariantCalling/${idSampleTumor}_vs_${idSampleNormal}/Manta", mode: 'copy'
input:
set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor) from pairBamManta
file(fasta) from ch_fasta
file(fastaFai) from ch_fai
output:
set val("Manta"), idPatient, val("${idSampleTumor}_vs_${idSampleNormal}"), file("*.vcf.gz"), file("*.vcf.gz.tbi") into vcfManta
set idPatient, idSampleNormal, idSampleTumor, file("*.candidateSmallIndels.vcf.gz"), file("*.candidateSmallIndels.vcf.gz.tbi") into mantaToStrelka
script:
"""
${params.pre_script}
configManta.py \
--normalBam ${bamNormal} \
--tumorBam ${bamTumor} \
--reference ${fasta} \
--runDir Manta
python Manta/runWorkflow.py -m local -j ${task.cpus}
mv Manta/results/variants/candidateSmallIndels.vcf.gz \
Manta_${idSampleTumor}_vs_${idSampleNormal}.candidateSmallIndels.vcf.gz
mv Manta/results/variants/candidateSmallIndels.vcf.gz.tbi \
Manta_${idSampleTumor}_vs_${idSampleNormal}.candidateSmallIndels.vcf.gz.tbi
mv Manta/results/variants/candidateSV.vcf.gz \
Manta_${idSampleTumor}_vs_${idSampleNormal}.candidateSV.vcf.gz
mv Manta/results/variants/candidateSV.vcf.gz.tbi \
Manta_${idSampleTumor}_vs_${idSampleNormal}.candidateSV.vcf.gz.tbi
mv Manta/results/variants/diploidSV.vcf.gz \
Manta_${idSampleTumor}_vs_${idSampleNormal}.diploidSV.vcf.gz
mv Manta/results/variants/diploidSV.vcf.gz.tbi \
Manta_${idSampleTumor}_vs_${idSampleNormal}.diploidSV.vcf.gz.tbi
mv Manta/results/variants/somaticSV.vcf.gz \
Manta_${idSampleTumor}_vs_${idSampleNormal}.somaticSV.vcf.gz
mv Manta/results/variants/somaticSV.vcf.gz.tbi \
Manta_${idSampleTumor}_vs_${idSampleNormal}.somaticSV.vcf.gz.tbi
${params.post_script}
"""
}
// STEP STRELKA - SOMATIC PAIR
process strelka {
tag "${idSampleTumor}_vs_${idSampleNormal}"
publishDir "${params.outdir}/VariantCalling/${idSampleTumor}_vs_${idSampleNormal}/Strelka", mode: 'copy'
input:
set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor) from pairBamStrelka
file(fasta) from ch_fasta
file(fastaFai) from ch_fai
output:
set val("Strelka"), idPatient, val("${idSampleTumor}_vs_${idSampleNormal}"), file("*.vcf.gz"), file("*.vcf.gz.tbi") into vcfStrelka
script:
"""
${params.pre_script}
configureStrelkaSomaticWorkflow.py \
--tumor ${bamTumor} \
--normal ${bamNormal} \
--referenceFasta ${fasta} \
--runDir Strelka
python Strelka/runWorkflow.py -m local -j ${task.cpus}
mv Strelka/results/variants/somatic.indels.vcf.gz \
Strelka_${idSampleTumor}_vs_${idSampleNormal}_somatic_indels.vcf.gz
mv Strelka/results/variants/somatic.indels.vcf.gz.tbi \
Strelka_${idSampleTumor}_vs_${idSampleNormal}_somatic_indels.vcf.gz.tbi
mv Strelka/results/variants/somatic.snvs.vcf.gz \
Strelka_${idSampleTumor}_vs_${idSampleNormal}_somatic_snvs.vcf.gz
mv Strelka/results/variants/somatic.snvs.vcf.gz.tbi \
Strelka_${idSampleTumor}_vs_${idSampleNormal}_somatic_snvs.vcf.gz.tbi
${params.post_script}
"""
}