-
Notifications
You must be signed in to change notification settings - Fork 0
/
racaout_to_csv.py
40 lines (34 loc) · 1.58 KB
/
racaout_to_csv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import csv
"""This program converts RACA's output files to a csv-readable format"""
#Change these parameters
RACA_directory = "./" #where is the RACA output located?
target = 'vv2' #name of target genome as provided to RACA
reference = 'canFam3' #name of reference genome as provided to RACA
outgroup = 'felCat5' #name of outgroup genome as provided to RACA
#Conversion below
genomes = {"target":target, "reference":reference, "outgroup":outgroup}
for gtype, gname in genomes.items():
fileroot = 'rec_chrs.'+gname+'.segments.refined'
filein = open(fileroot + '.txt', 'r')
fileread = filein.read()
with open(fileroot + '.csv', 'wb') as fileout:
writerbot = csv.writer(fileout, delimiter = ',')
header = ["blocknum"]
blocks = fileread.split('>')
for block in blocks[1:]:
lines = block.split('\n')
blocknum = lines[0] #RACA assigns each block a number
outlist = [blocknum]
for line in lines[1:]:
if line != "":
chrominfo, direction = line.split(' ')
if len(header) >0:
header.append(chrominfo.split('.')[0])
header.extend(['low','high','direction'])
fragname = chrominfo.split('.')[1].split(':')[0]
low, high = (chrominfo.split(':')[1]).split('-')
outlist.extend([fragname, low, high, direction])
if len(header) > 0:
writerbot.writerow(header)
header = []
writerbot.writerow(outlist)