-
Notifications
You must be signed in to change notification settings - Fork 0
/
vcfReadGenotypes.go
166 lines (137 loc) · 3.7 KB
/
vcfReadGenotypes.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
package vcfio
import (
"errors"
"log"
"strconv"
"strings"
)
// ExtractVcfFORMAT parses genotyping information in latter columns
func ExtractVcfFORMAT(fields []string, info *InfoByte, sampleNames []string, svtype string, chr string) ([]SampleSpecific, error) {
// Samples parsing (column 8 for FORMAT, and 9+ for sample genotypes)
genotypes := []SampleSpecific{}
if len(fields) < 9 {
return genotypes, nil
}
samples := fields[9:]
format := strings.Split(fields[8], ":")
for i, sample := range samples {
// Parse genotype line
gtMap := createGtMap(format, sample)
gt, err := parseSampleGenotypes(gtMap)
if err != nil {
return genotypes, errors.New("err parsing sample genotypes")
}
// Add sample name to genotype
if len(sampleNames) == len(samples) {
gt.SampleName = sampleNames[i]
} else {
return genotypes, errors.New("samples in header different from samples in vcf lines")
}
cn, cnErr := info.Get("CN")
if cnErr != nil && !strings.Contains(cnErr.Error(), "not found in header") {
log.Println("Error getting CN from INFO", info, cnErr)
} else if cn != nil {
// Switch on CN == string vs int
switch t := cn.(type) {
case string:
cnNum, err := strconv.Atoi(t)
if err == nil {
switch {
case cnNum == 2 && svtype == "DUP" && (chr == "chrX" || chr == "Y"):
gt.NumAlts = 1
case cnNum == 2:
gt.NumAlts = 0
case cnNum == 1:
gt.NumAlts = 1
case cnNum == 0:
gt.NumAlts = 2
default:
gt.NumAlts = cnNum - 2
}
}
case int:
switch {
case t == 2 && svtype == "DUP" && (chr == "chrX" || chr == "Y"):
gt.NumAlts = 1
case t == 2:
gt.NumAlts = 0
case t == 1:
gt.NumAlts = 1
case t == 0:
gt.NumAlts = 2
default:
gt.NumAlts = t - 2
}
}
}
if svtype == "LOH" {
gt.NumAlts = 1
}
genotypes = append(genotypes, gt)
}
return genotypes, nil
}
func createGtMap(format []string, unparsedSample string) map[string]string {
gtMap := make(map[string]string)
sampleFields := strings.Split(unparsedSample, ":") // 1/1:0,56:99:1888,167,0 -> 1/1 0,56 99 1888,167,0
for i, ft := range format {
gtMap[ft] = sampleFields[i]
}
return gtMap
}
func parseSampleGenotypes(gtMap map[string]string) (gt SampleSpecific, err error) {
// Prefer phased variants (PGT), but accept unphased (GT)
if field, exists := gtMap["PGT"]; exists {
if strings.Contains(field, "|") {
gt.IsPhased = true
}
switch {
case field == "0/0" || field == "0|0" || field == ".|." || field == "./.":
gt.NumAlts = 0
case field == "0/1" || field == "0|1" || field == "1/0" || field == "1|0":
gt.NumAlts = 1
case field == "1/1" || field == "1|1":
gt.NumAlts = 2
}
} else if field, exists := gtMap["GT"]; exists {
if strings.Contains(field, "|") {
gt.IsPhased = true
}
switch {
case field == "0/0" || field == "0|0" || field == ".|." || field == "./.":
gt.NumAlts = 0
case field == "0/1" || field == "0|1" || field == "1/0" || field == "1|0":
gt.NumAlts = 1
case field == "1/1" || field == "1|1":
gt.NumAlts = 2
}
}
// PID Phase ID identifies variants in same strand
if field, exists := gtMap["PID"]; exists {
gt.PhaseID = field
}
// AD R Integer Read depth for each allele
if field, exists := gtMap["AD"]; exists {
ad := strings.Split(field, ",")
// Avoids out of bounds error if AD isn't int,int
if len(ad) == 2 {
if ad[0] == "." {
gt.ReadDepthRef = 0
} else {
gt.ReadDepthRef, err = strconv.Atoi(ad[0])
if err != nil {
return gt, err
}
}
if ad[1] == "." {
gt.ReadDepthAlt = 0
} else {
gt.ReadDepthAlt, err = strconv.Atoi(ad[1])
if err != nil {
return gt, err
}
}
}
}
return gt, nil
}