-
Notifications
You must be signed in to change notification settings - Fork 0
/
vcfParse.go
57 lines (46 loc) · 1.54 KB
/
vcfParse.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
package vcfio
import (
"log"
"strconv"
"strings"
)
// ParseVariant is the entry point for VCF parsing
func ParseVariant(line string, header *Header) (VariantInfo, Quality, []SampleSpecific) {
fields := strings.Split(line, "\t")
// Multiple alts
if len(strings.Split(string(fields[4]), ",")) > 1 {
log.Printf("VCF file must be denormalized with bcftools norm -m -any: %s\n", string(line))
}
// VCF 4.3 spec: the ‘*’ allele is reserved to indicate that the allele is missing due to an overlapping deletion.
// The variant appears in 2 consecutive lines. We remove these * alt variants.
if string(fields[4]) == "*" {
return VariantInfo{Alt: "*"}, Quality{}, nil
}
pos, err := strconv.ParseUint(fields[1], 10, 64)
if err != nil {
log.Println(err)
}
chr := fields[0] // Chr
start := int(pos) - 1 // Start
ref := strings.ToUpper(fields[3]) // Ref
alt := fields[4] // Alt
// INFO
info := NewInfoByte([]byte(fields[7]), header)
// Parse VCF
variantInfo, err := ExtractVcfFields(chr, start, ref, alt, info)
if err != nil {
log.Fatalf("Error extracting vcf INFO, %v\n", err)
}
// Quality and reads parameters
quality, err := ExtractVcfQUAL(fields, info)
if err != nil {
log.Fatalf("Error extracting vcf QUAL, %v\n", err)
}
// Genotypes for each sample in VCF
genotypes, err := ExtractVcfFORMAT(fields, info, header.SampleNames, variantInfo.SVtype, chr)
if err != nil {
log.Println(line)
log.Fatalf("Error extracting vcf FORMAT, %v\n", err)
}
return variantInfo, quality, genotypes
}