-
Notifications
You must be signed in to change notification settings - Fork 3
/
Sample_processing.snake
143 lines (130 loc) · 6.46 KB
/
Sample_processing.snake
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
from scripts.common import detect_reads, fill_default_values, extended_glob, get_extension
from os.path import basename
# TODO : remove the need for {ext}, pretty sure it's un-needed and it makes it particularly complicated.
# deal with mutliple possible extensions :
wildcard_constraints:
ext = "(fastq.gz|fq.gz|fastq|fq)",
IN = IN,
prefix = ".+", # this is a trick so that the wildcard don't try to match folders/prefix, this speed up dag resolution as in this case we have too many wildcards, making it less identifyable.
# unclear why previous negative lookahead does not work anymore
# suffix = "^(?!.*_trimmed).*|.{0}",
suffix = ".*(?<!_trimmed)|.{0}",
sample = ".+"
rule bwa_index2:
input: "{file}"
output: "{file}.sa"
resources:
slurm_partition = get_resource("partition"),
mem_mb = get_resource("mem")
conda : "%s/bwasamtools.yaml"%CONDA_ENV
singularity : "docker://quay.io/annacprice/bwasamtools:1.10"
shell: "bwa index {input}"
rule map_to_mask:
input: R1 = lambda w:SAMPLE_READS[w.sample][0],
R2 = lambda w:SAMPLE_READS[w.sample][1],
index = "%s.sa"%FILTER
output: bam = temp("{IN}/{sample}/Filtered.bam"),
log: "{IN}/{sample}/mapping.log"
threads: 10
resources:
slurm_partition = get_resource("partition"),
mem_mb = get_resource("mem")
conda : CONDA_ENV + "/bwasamtools.yaml"
singularity : "docker://quay.io/annacprice/bwasamtools:1.10"
shell: """
bwa mem -t {threads} {FILTER} {input.R1} {input.R2} 2>{log} | samtools view -b -f 12 -@{threads} - | samtools sort -@{threads} - > {output.bam}
"""
rule bam_to_fastq:
input: "{IN}/{sample}/Filtered.bam"
output: R1 = temp("{IN}/{sample}/Filtered_{prefix}R1{suffix}.{ext}"),
R2 = temp("{IN}/{sample}/Filtered_{prefix}R2{suffix}.{ext}")
threads: 10
resources:
slurm_partition = get_resource("partition"),
mem_mb = get_resource("mem")
singularity : "docker://quay.io/annacprice/bedtools:2.29.2"
shell: """
bamToFastq -i {input} -fq {output.R1}_temp -fq2 {output.R2}_temp
if [[ "{wildcards.ext}" == *.gz ]]; then
gzip -c {output.R1}_temp > {output.R1} && rm {output.R1}_temp
gzip -c {output.R2}_temp > {output.R2} && rm {output.R2}_temp
else
mv {output.R1}_temp {output.R1}
mv {output.R2}_temp {output.R2}
fi
"""
# need to use a _done file, so that I can precise {ext}, since fastqc output name can't be precised.
rule fastqc :
input : "{IN}/{sample}/{prefix}.{ext}"
output : "{IN}/{sample}/{prefix}.{ext}_fastqc_done"
conda : CONDA_ENV + "/fastqc.yaml"
resources:
slurm_partition = get_resource("partition"),
mem_mb = get_resource("mem")
singularity : "docker://quay.io/annacprice/fastqc:0.11.9"
shell :"""
fastqc -Xmx{resources.mem_mb}m -o {IN}/{wildcards.sample} {input}
touch {output}
"""
if FILTER:
rule trim_galore :
input : file_R1="{IN}/{sample}/Filtered_{prefix}R1{suffix}.{ext}",
file_R1_qc="{IN}/{sample}/Filtered_{prefix}R1{suffix}.{ext}_fastqc_done",
file_R2="{IN}/{sample}/Filtered_{prefix}R2{suffix}.{ext}",
file_R2_qc="{IN}/{sample}/Filtered_{prefix}R2{suffix}.{ext}_fastqc_done"
output :R1="{IN}/{sample}/Filtered_{prefix}R1{suffix}_trimmed.{ext}",
R2="{IN}/{sample}/Filtered_{prefix}R2{suffix}_trimmed.{ext}",
report_R1="{IN}/{sample}/Filtered_{prefix}R1{suffix}.{ext}_trimming_report.txt",
report_R2="{IN}/{sample}/Filtered_{prefix}R2{suffix}.{ext}_trimming_report.txt"
params : R1 = lambda w: "%s/%s/Filtered_%sR1%s_val_1.fq"%(IN,w.sample,w.prefix,w.suffix)+".gz"*(".gz" in w.ext),
R2 = lambda w: "%s/%s/Filtered_%sR2%s_val_2.fq"%(IN,w.sample,w.prefix,w.suffix)+".gz"*(".gz" in w.ext),
option = lambda w:"--gzip"*("gz" in w['ext']),
log : "{IN}/{sample}/{prefix}.{suffix}.{ext}.log"
conda : CONDA_ENV + "/trim_galore.yaml"
threads: 8
resources:
slurm_partition = get_resource("partition"),
mem_mb = get_resource("mem")
singularity : "docker://quay.io/annacprice/trimgalore:0.6.5"
shell : """
trim_galore -j {threads} --fastqc {params.option} -o $(dirname {output.R1}) --paired {input.file_R1} {input.file_R2} > {log} 2>&1
mv {params.R1} {output.R1}
mv {params.R2} {output.R2}
"""
else:
rule trim_galore :
input : file_R1="{IN}/{sample}/{prefix}R1{suffix}.{ext}",
file_R1_qc="{IN}/{sample}/{prefix}R1{suffix}.{ext}_fastqc_done",
file_R2="{IN}/{sample}/{prefix}R2{suffix}.{ext}",
file_R2_qc="{IN}/{sample}/{prefix}R2{suffix}.{ext}_fastqc_done"
output :R1="{IN}/{sample}/{prefix}R1{suffix}_trimmed.{ext}",
R2="{IN}/{sample}/{prefix}R2{suffix}_trimmed.{ext}",
report_R1="{IN}/{sample}/{prefix}R1{suffix}.{ext}_trimming_report.txt",
report_R2="{IN}/{sample}/{prefix}R2{suffix}.{ext}_trimming_report.txt"
params : R1 = lambda w: "%s/%s/%sR1%s_val_1.fq"%(IN,w.sample,w.prefix,w.suffix)+".gz"*(".gz" in w.ext),
R2 = lambda w: "%s/%s/%sR2%s_val_2.fq"%(IN,w.sample,w.prefix,w.suffix)+".gz"*(".gz" in w.ext),
option = lambda w:"--gzip"*("gz" in w['ext']),
log : "{IN}/{sample}/{prefix}.{suffix}.{ext}.log"
conda : CONDA_ENV + "/trim_galore.yaml"
threads: 8
resources:
slurm_partition = get_resource("partition"),
mem_mb = get_resource("mem")
singularity : "docker://quay.io/annacprice/trimgalore:0.6.5"
shell :"""
trim_galore -j {threads} --fastqc {params.option} -o $(dirname {output.R1}) --paired {input.file_R1} {input.file_R2} > {log} 2>&1
mv {params.R1} {output.R1}
mv {params.R2} {output.R2}
"""
rule multiqc :
input : R1=[replace_extensions(SAMPLE_READS[sample][0],FILTER) for sample in SAMPLES],
R2=[replace_extensions(SAMPLE_READS[sample][1],FILTER) for sample in SAMPLES],
output : IN+"/quality_done"
conda : CONDA_ENV + "/multiqc.yaml"
resources:
slurm_partition = get_resource("partition"),
mem_mb = get_resource("mem")
singularity : "docker://quay.io/annacprice/multiqc:1.9"
shell : """multiqc --interactive -f {IN}/ -o {IN}/
touch {output}
"""