-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrrmd_workflow_management.efficient.Snakefile
67 lines (59 loc) · 1.72 KB
/
rrmd_workflow_management.efficient.Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
################################################################################
# Project: Archaic genome comparison
# Part: Quality checks
# Step: Average coverage on chromosome 21
#
# Calculate the average coverage on the chromosome 21 for the three Archaic
# samples.
#
# Alex Huebner, 29/01/2020
################################################################################
workdir: "./coverage"
# Infer sample names from file system
SAMPLES, = glob_wildcards("../data/{sample}.hg19.bam")
print(SAMPLES)
rule all:
input:
expand("{sample}.21_avgcov.txt", sample=SAMPLES)
rule sort_by_coordinate:
output:
"{sample}.sorted.bam"
message: "Sort sample {wildcards.sample} by coordinate"
params:
bam = "../data/{sample}.hg19.bam"
shell:
"""
samtools sort -o {output} {params.bam}
"""
rule index_sorted_bam:
input:
"{sample}.sorted.bam"
output:
"{sample}.sorted.bam.bai"
message: "Create BAM index for sample {wildcards.sample}"
shell:
"""
samtools index {input}
"""
rule subset_chrom:
input:
bam = "{sample}.sorted.bam",
bai = "{sample}.sorted.bam.bai"
output:
"{sample}.21.bam"
message: "Subset sample {wildcards.sample} to chromosome 21"
shell:
"""
samtools view -bh {input.bam} > {output}
samtools index {output}
"""
rule average_depth:
input:
"{sample}.21.bam"
output:
"{sample}.21_avgcov.txt"
message: "Calculate the average coverage for sample {wildcards.sample}"
shell:
"""
samtools depth -a -r 21:10000000-20000000 {input} | awk '{{totalcov += $3}}END{{print totalcov / NR}}' - > {output}
"""