-
Notifications
You must be signed in to change notification settings - Fork 0
/
VCFFilteringOptimization
49 lines (28 loc) · 1.86 KB
/
VCFFilteringOptimization
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
##Filter VCF for neutral SNPs
vcftools --gzvcf Schil_capture_variants_Chr1-12.vcf.gz --thin 1000 --remove-indels \
--maf 0.05 --minDP 10 --max-missing 0.75 --out thinnedSNPs --recode-INFO-all --recode
##PLINK LD pruning r² of 0.5
plink --vcf thinnedSNPs.recode.vcf --indep-pairwise 50 5 0.5 --double-id --allow-extra-chr --set-missing-var-ids @:#
plink --vcf thinnedSNPs.recode.vcf --indep-pairwise 50 5 0.5 --double-id --allow-extra-chr \
--extract plink.prune.in --recode vcf --out pruned.thinnedSNPs --set-missing-var-ids @:#
##Remove outgroups from
bcftools view -s ^Spenn_LA0716_ERR418107_Spenn_LA0716_ERR418107,\
S.chiCGN15532_1_ERR418097_S.chiCGN15532_1_ERR418097,\
S.chiCGN15530_1_ERR418098_S.chiCGN15530_1_ERR418098,\
Sper_LA1278_ERR418084_Sper_LA1278_ERR418084,\
Sper_LA1954_ERR418094_Sper_LA1954_ERR418094 pruned.thinnedSNPs.vcf > pruned.thinnedSNPs.noOutgroup.vcf
##Define populations for PGDSpider
sed -r 's/(.*)_.*/&\t\1/' allSampleNames.txt > populationDefinitions.txt
##BayeScan (convert PLINK VCF to BayeScan format using PGDSpider)
bayescan_2.1 bayescanThinnedSNPs_noOutgroup.txt -o selectionTesting -threads 40
##Find outliers with plot_bayescan.R
results <- plot_bayescan("selectionTestingThinnedSNPs_fst.txt", FDR=0.05)
write(results$outliers, file = "outliers_indices.txt", ncolumns = 1, sep = "\t")
##Create list of SNP IDs
bcftools query -f '%ID\n' ../VCFs/pruned.thinnedSNPs.noOutgroup.rename.filtered.vcf > SNPIDs.txt
##Match variant IDs with index number from BayeScan
awk '{ print NR, $1 }' SNPIDs.txt > SNPIDs_indexed.txt
##awk script to print only outlier loci
for i in `cat outliers_indices.txt`; do awk -v var=$i '{if ($1 == var) print $2;}' SNPIDs_indexed.txt ; done > outlier_IDs.txt
##Filter to exclude outlier loci from VCF
bcftools view --exclude ID==@outlier_IDs.txt ../VCFs/pruned.thinnedSNPs.noOutgroup.vcf > thinnedSNPS.noOutliers.vcf