-
Notifications
You must be signed in to change notification settings - Fork 0
/
SIgenesVCFfiltering
38 lines (23 loc) · 1.27 KB
/
SIgenesVCFfiltering
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
##Filter VCF by SI bed file
bcftools view -I -O z Schil_capture_variants_Chr1-12.reheader.filtered.vcf.gz \
-R ../bedFiles/selfincompatibility_genes1.bed -o Schil_capture_variants_Chr1-12.reheader.filtered.SIgenes.vcf.gz
##Filter VCF for unlinked SNPs
vcftools --gzvcf Schil_capture_variants_Chr1-12.reheader.filtered.SIgenes.vcf.gz --thin 200 --remove-indels --maf 0.05 \
--minDP 10 --max-missing 0.9 --out SIgenesSNPs --recode-INFO-all --recode
##PLINK LD pruning r² of 0.5
plink --vcf SIgenesSNPs.recode.vcf --indep-pairwise 50 5 0.5 --const-fid --allow-extra-chr \
--recode vcf --out pruned.SIgenesSNPs --set-missing-var-ids @:#
##Remove outgroups
bcftools view -s ^0_Spenn_LA0716_ERR418107,\
0_S.chiCGN15532_1_ERR418097,\
0_S.chiCGN15530_1_ERR418098,\
0_Sper_LA1278_ERR418084,\
0_Sper_LA1954_ERR418094 pruned.SIgenesSNPs.vcf > pruned.SIgenesSNPs.noOutgroup.vcf
## Edit VCF header to simplify sample names for visualization
bcftools query -l pruned.SIgenesSNPs.noOutgroup.vcf > sampleIDlist.txt
#Edit txt in VIM
%s/\(0_\)\(.*\)/\1\2\t\2/
#Reheader in VCF
bcftools reheader -s sampleIDlist.txt -o pruned.SIgenesSNPs.noOutgroup.rename.vcf pruned.SIgenesSNPs.noOutgroup.vcf
##Define populations for PGDSpider
sed -r 's/(.*)_.*/&\t\1/' allSampleNames.txt > populationDefinitions.txt