-
Notifications
You must be signed in to change notification settings - Fork 0
/
hapmap_to_qtl2.py
executable file
·80 lines (67 loc) · 2.5 KB
/
hapmap_to_qtl2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import sys
import argparse
import pandas as pd
def parse_arguments():
"""Parse arguments passed to script"""
parser = argparse.ArgumentParser(description="This script was designed to convert " +
"a hapmap file into R qtl2 format.\n\n")
required_named = parser.add_argument_group('required arguments')
required_named.add_argument(
"--hapmap",
type=str,
required=True,
help="The name of the input hapmap file. ",
action="store")
parser.add_argument(
"--output",
type=str,
required=False,
default="Rqtl2.genotypes.csv",
help="File name for qtl2 output.",
action="store")
return parser.parse_args()
def write_qtl2(header, samples, variants, output):
sys.stderr.write("Number of samples: " +
str(len(variants[0])) + "\n")
sys.stderr.write("Number of variants: " +
str(len(variants)) + "\n")
with open(output, 'w') as f:
f.write(",".join(header) + "\n")
for x, row in enumerate(pd.DataFrame(variants).transpose().values):
f.write(samples[x] + "," + ",".join(row) + "\n")
def hapmap_to_qtl2(hapmap, output):
"""Given a hapmap file, convert genotypes into qtl2 format"""
# ref -> A
# alt -> B
# N -> -
variants_header = ["id"]
all_variants = []
for x, line in enumerate(hapmap):
split_line = line.split("\t")
if x == 0:
samples = split_line[11:]
else:
variants_header.append(split_line[0])
split_alleles = split_line[1].split('/')
ref_allele = split_alleles[0]
try:
alt_allele = split_alleles[1]
except IndexError:
sys.stderr.write("ERROR: Monomorphic sites must be removed.\n")
sys.exit(1)
variant_list = []
for y, variant in enumerate(split_line[11:]):
if variant == ref_allele:
variant_list.append("A")
elif variant == alt_allele:
variant_list.append("B")
else:
variant_list.append("-")
all_variants.append(variant_list)
write_qtl2(variants_header, samples,
all_variants, output)
if __name__ == '__main__':
arguments = parse_arguments()
with open(arguments.hapmap) as f:
hapmap = f.read().splitlines()
hapmap_to_qtl2(hapmap, arguments.output)