-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwig2grp.py
executable file
·145 lines (122 loc) · 5.84 KB
/
wig2grp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#!/usr/bin/env python
# wig2grp.py, convert wiggle format to artemis graphic user plot file (grp)
# Copyright (C) 2016 Shengwei Hou
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import os
import sys
import argparse
import numpy as np
from Bio import SeqIO
from sys import stdout, stderr
def get_genome_size(input_file):
""" parse fasta records in input_file,
return the fastaName:genomeSize as a dictionary
"""
genomeSizeDict = {}
for fasta in SeqIO.parse(input_file, "fasta"):
name = fasta.name.strip()
length = len(fasta.seq)
genomeSizeDict[name] = length
return genomeSizeDict
def wig2grp(genomeSizeDict, wig):
""" convert wig to grp file
"""
chromArrayDict = {} # {chrom:Array}
for chrom in genomeSizeDict.keys():
chromArrayDict[chrom] = np.zeros(genomeSizeDict[chrom])
with open(wig, "r") as ih:
for line in ih:
if line.startswith("variableStep"):
chrom = line.strip().split("chrom=")[-1].strip()
if not chrom in genomeSizeDict:
stderr.write("ERROR: chromosome %s in your wig file was not found in your genome reference! "%chrom)
sys.exit(1)
else:
line = line.strip().split("\t")
index = int(line[0])-1
value = float(line[1])
if index >= len(chromArrayDict[chrom]):
stderr.write("ATTENTION: Mapped position at %d was out of genome length %d, on chromosome %s"%(index, genomeSizeDict[chrom], chrom))
continue
chromArrayDict[chrom][index] = value
return chromArrayDict
def write2grp(genomeSizeDict, fwd=None, rev=None, prefix=None, toInteger=True):
""" convert wig to grp file
"""
# only fwd or rev wig
if (fwd and (not rev)) or (rev and (not fwd)):
if fwd:
header = "# BASE fwd_grp\n# colour 0:255:0\n"
wig = fwd
else:
header = "# BASE rev_grp\n# colour 0:0:255\n"
wig = rev
chromArrayDict = wig2grp(genomeSizeDict, wig)
for chrom, value_array in chromArrayDict.iteritems():
pos_array = np.arange(genomeSizeDict[chrom]) + 1
if prefix:
outfile = prefix +"_"+ chrom + ".grp"
else:
outfile = os.path.basename(wig) +"_"+ chrom +".grp"
with open(os.path.join(os.getcwd(), outfile), "w") as oh:
oh.write(header)
if toInteger:
for pos, value in zip(pos_array, value_array):
oh.write("%d %d\n"%(pos, int(value)))
else:
for pos, value in zip(pos_array, value_array):
oh.write("%d %.2f\n"%(pos, value))
# both fwd and rev wig, integrate into one grp
else:
header = "# BASE fwd_grp rev_grp\n# colour 0:255:0 0:0:255\n"
fwd_chromArrayDict = wig2grp(genomeSizeDict, fwd)
rev_chromArrayDict = wig2grp(genomeSizeDict, rev)
for chrom, fwd_value_array in fwd_chromArrayDict.iteritems():
pos_array = np.arange(genomeSizeDict[chrom]) + 1
rev_value_array = rev_chromArrayDict[chrom]
if prefix:
outfile = prefix +"_"+ chrom + ".grp"
else:
if "Forward" in fwd:
outfile = os.path.basename(fwd).replace("Forward", "FwdRev") +"_"+ chrom +".grp"
else:
outfile = os.path.basename(fwd).rsplit(".", 1)[0] +"_FwdRev_"+ chrom +".grp"
with open(os.path.join(os.getcwd(), outfile), "w") as oh:
oh.write(header)
if toInteger:
for pos, fwd_value, rev_value in zip(pos_array, fwd_value_array, rev_value_array):
oh.write("%d %d %d\n"%(pos, int(fwd_value), int(rev_value)))
else:
for pos, fwd_value, rev_value in zip(pos_array, fwd_value_array, rev_value_array):
oh.write("%d %.2f %.2f\n"%(pos, fwd_value, rev_value))
def main():
desc="Convert variableStep forward and reverse wig files into fixedStep grp file, to visualize in Artemis"
parser = argparse.ArgumentParser(description=desc)
parser.add_argument("-g", "--genomeFasta", required=True, help="genome fasta file that used in the mapping")
parser.add_argument("-f", "--forward", required=False, help="forward wig file generated by RSeQC::bam2wig.py")
parser.add_argument("-r", "--reverse", required=False, help="reverse wig file generated by RSeQC::bam2wig.py")
parser.add_argument("-p", "--prefix", required=False, help="output prefix for grp file")
parser.add_argument("-t", "--toInteger", required=False, action="store_true", default=True, help="write out integer values")
args = parser.parse_args()
genomeSizeDict = get_genome_size(args.genomeFasta)
if not (args.forward or args.reverse):
stderr.write("\nERROR: At least one wig file need to be given!\n")
parser.print_help()
sys.exit(1)
else:
write2grp(genomeSizeDict, fwd=args.forward, rev=args.reverse, prefix=args.prefix, toInteger=args.toInteger)
print("---- DONE ----")
if __name__ == "__main__":
main()