-
Notifications
You must be signed in to change notification settings - Fork 2
/
fimpute2geno.py
executable file
·90 lines (83 loc) · 3.43 KB
/
fimpute2geno.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#!/usr/bin/env python
# Version 1.0
# from __future__ import division, print_function
import sys
import argparse
def readMarkers(markerfile):
"""
Columns options:
name,position,allele1,allele2 [,chromosome] (BEAGLE)
chromosome,name,gendist,position (PLINK)
name
"""
def trans(s):
if s in ['A','1']: return '1'
if s in ['C','2']: return '2'
if s in ['G','3']: return '3'
if s in ['T','4']: return '4'
return '0'
with open(markerfile,'r') as fin:
mark = {'marklist':[]}
count = 0
for line in fin:
if line.startswith('#'): continue
l = line.strip().split()
if len(l) == 0: continue
name,position,a1,a2,chrom,rank,alias = '0',0,None,None,'0',0,None
if name == 'marklist':
sys.stderr.write('"marklist" is not a legal markername\n')
sys.exit(1)
if len(l) >= 7: # Plink MAP, with three more columns showing reference and alternative alleles and an alias
chrom,name,gendist,position,a1,a2,alias = l[0],l[1],l[2],l[3],l[4],l[5],l[6]
elif len(l) == 6: # Plink MAP, with two more columns showing major and minor alleles
chrom,name,gendist,position,a1,a2 = l[0],l[1],l[2],l[3],l[4],l[5]
else:
raise Exception('Map file requires columns 5 and 6 to be marker alleles\n')
if name not in mark:
mark[name] = {'chrom':chrom,
'pos':position,
'a1':trans(a1),
'a1x':0,
'a2':trans(a2),
'a2x':0,
'rank':count,
'alias': alias}
count += 1
mark['marklist'].append(name)
return mark
def convertFile(args):
def trans(a,m1,m2):
if a == '0': return m1+m1
if a == '1': return m1+m2
if a == '2': return m2+m2
if a == '3': return m1+m2
if a == '4': return m2+m1
return '00'
mark = readMarkers(args.mapfile)
with open(args.infile,'r') as fin, open(args.output,'w') as fout:
fout.write('#\t%s\n' % ('\t'.join([m for m in mark['marklist'] if mark[m]['chrom']==args.chrom])))
fin.next()
for line in fin:
l = line.strip().split()
if len(l) < 1: continue
animal,chip,geno = l
father,mother = '0','0'
fout.write('%s\t%s\t%s' % (animal,father,mother))
for i,m in enumerate(mark['marklist']):
if mark[m]['chrom'] != args.chrom: continue
g = trans(geno[i],mark[m]['a1'],mark[m]['a2'])
fout.write('\t%s\t%s' % (g[0],g[1]))
fout.write('\n')
def main():
parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter,
description='Converts from FImpute to Genos')
parser.add_argument('-i','--infile', help='FImpute file')
parser.add_argument('-o','--output',help='Output file')
parser.add_argument('-v','--verbose',help='Prints runtime info')
parser.add_argument('-m','--mapfile',help='Map file')
parser.add_argument('-p','--ped',help='Pedigree')
parser.add_argument('-c','--chrom',help='Chromosome')
args = parser.parse_args()
convertFile(args)
if __name__ == '__main__':
main()