forked from kpatel427/YouTubeTutorials
-
Notifications
You must be signed in to change notification settings - Fork 0
/
variant_filtering_annotation.sh
106 lines (73 loc) · 2.87 KB
/
variant_filtering_annotation.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#!/bin/bash
# Script to filter and annotate variants
# This script is for demonstration purposes only
# directories
ref="/Users/kr/Desktop/demo/supporting_files/hg38/hg38.fa"
results="/Users/kr/Desktop/demo/variant_calling/results"
if false
then
# -------------------
# Filter Variants - GATK4
# -------------------
# Filter SNPs
gatk VariantFiltration \
-R ${ref} \
-V ${results}/raw_snps.vcf \
-O ${results}/filtered_snps.vcf \
-filter-name "QD_filter" -filter "QD < 2.0" \
-filter-name "FS_filter" -filter "FS > 60.0" \
-filter-name "MQ_filter" -filter "MQ < 40.0" \
-filter-name "SOR_filter" -filter "SOR > 4.0" \
-filter-name "MQRankSum_filter" -filter "MQRankSum < -12.5" \
-filter-name "ReadPosRankSum_filter" -filter "ReadPosRankSum < -8.0" \
-genotype-filter-expression "DP < 10" \
-genotype-filter-name "DP_filter" \
-genotype-filter-expression "GQ < 10" \
-genotype-filter-name "GQ_filter"
# Filter INDELS
gatk VariantFiltration \
-R ${ref} \
-V ${results}/raw_indels.vcf \
-O ${results}/filtered_indels.vcf \
-filter-name "QD_filter" -filter "QD < 2.0" \
-filter-name "FS_filter" -filter "FS > 200.0" \
-filter-name "SOR_filter" -filter "SOR > 10.0" \
-genotype-filter-expression "DP < 10" \
-genotype-filter-name "DP_filter" \
-genotype-filter-expression "GQ < 10" \
-genotype-filter-name "GQ_filter"
# Select Variants that PASS filters
gatk SelectVariants \
--exclude-filtered \
-V ${results}/filtered_snps.vcf \
-O ${results}/analysis-ready-snps.vcf
gatk SelectVariants \
--exclude-filtered \
-V ${results}/filtered_indels.vcf \
-O ${results}/analysis-ready-indels.vcf
# to exclude variants that failed genotype filters
cat analysis-ready-snps.vcf|grep -v -E "DP_filter|GQ_filter" > analysis-ready-snps-filteredGT.vcf
cat analysis-ready-indels.vcf| grep -v -E "DP_filter|GQ_filter" > analysis-ready-indels-filteredGT.vcf
# -------------------
# Annotate Variants - GATK4 Funcotator
# -------------------
# Annotate using Funcotator
gatk Funcotator \
--variant ${results}/analysis-ready-snps-filteredGT.vcf \
--reference ${ref} \
--ref-version hg38 \
--data-sources-path /Users/kr/Desktop/demo/tools/functotator_prepackaged_sources/funcotator/hg38/funcotator_dataSources.v1.7.20200521g \
--output ${results}/analysis-ready-snps-filteredGT-functotated.vcf \
--output-file-format VCF
gatk Funcotator \
--variant ${results}/analysis-ready-indels-filteredGT.vcf \
--reference ${ref} \
--ref-version hg38 \
--data-sources-path /Users/kr/Desktop/demo/tools/functotator_prepackaged_sources/funcotator/hg38/funcotator_dataSources.v1.7.20200521g \
--output ${results}/analysis-ready-indels-filteredGT-functotated.vcf \
--output-file-format VCF
fi
# Extract fields from a VCF file to a tab-delimited table
gatk VariantsToTable \
-V ${results}/analysis-ready-snps-filteredGT-functotated.vcf -F AC -F AN -F DP -F AF -F FUNCOTATION \
-O ${results}/output_snps.table