Skip to content

Commit

Permalink
Added gt_nums, gt_bases, gt_type, phased to Call.
Browse files Browse the repository at this point in the history
  • Loading branch information
arq5x committed Jan 20, 2012
1 parent 544ba1e commit 6e646f1
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 5 deletions.
18 changes: 18 additions & 0 deletions test/test_call.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import os
import sys
import vcf

vcf_reader = vcf.VCFReader(open(sys.argv[1]), 'rb')
for var in vcf_reader:
print var.CHROM, var.POS, var.REF, var.ALT
for sample in vcf_reader.samples:
if var.genotypes[sample].called:
print "\t", sample, \
var.genotypes[sample].gt_nums, \
var.genotypes[sample].gt_bases, \
var.genotypes[sample].gt_type, \
var.genotypes[sample].phased
print



47 changes: 42 additions & 5 deletions vcf.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,13 +230,47 @@ def __init__(self, site, sample, data):
self.site = site
self.sample = sample
self.data = data
self.gt_nums = self.data['GT']
self.called = self.gt_nums is not None

def called(self):
return self.GT != './.'
@property
def gt_bases(self):
'''Return the actual genotype alleles.
E.g. if VCF genotype is 0/1, return A/G
'''
# extract the numeric alleles of the gt string
(a1, phase, a2) = list(self.gt_nums)
# lookup and return the actual DNA alleles
return self.site.alleles[int(a1)] + \
phase + \
self.site.alleles[int(a2)]

@property
def GT(self):
return self.data['GT']
def gt_type(self):
'''Return the type of genotype.
hom_ref = 0
het = 1
hom_alt = 2 (we don;t track _which+ ALT)
uncalled = None
'''
# extract the numeric alleles of the gt string
if self.called:
(a1, phase, a2) = list(self.gt_nums)
if (int(a1) == 0) and (int(a2) == 0): return 0
elif (int(a1) == 0) and (int(a2) >= 1): return 1
elif (int(a1) >= 1) and (int(a2) >= 1): return 2
else: return -1
else:
return None

@property
def phased(self):
'''Return a boolean indicating whether or not
the genotype is phased for this sample
'''
return self.data['GT'].find("|") >= 0




class _Record(object):
Expand All @@ -251,7 +285,10 @@ def __init__(self, CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, FORMAT, genotyp
self.INFO = INFO
self.FORMAT = FORMAT
self.genotypes = genotypes

# create a list of alleles. [0] = REF, [1:] = ALTS
self.alleles = [self.REF]
self.alleles.extend(self.ALT)

def __str__(self):
return "Record(CHROM=%(CHROM)s, POS=%(POS)s, REF=%(REF)s, ALT=%(ALT)s)" % self.__dict__

Expand Down

0 comments on commit 6e646f1

Please sign in to comment.