-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbin.py
51 lines (46 loc) · 1.34 KB
/
bin.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/usr/bin/env python3
import pyBigWig
import numpy as np
import json, glob
import sys
class Binner:
def __init__(self, chrom_sizes, bin_size):
self.chrom_sizes = chrom_sizes
self.bin_size = bin_size
self.num_bins = {}
self.offset = {}
self.total_n_bins = 0
for chrom_name, n_bases in self.chrom_sizes.items():
n_bins = int(np.ceil(n_bases / bin_size))
self.offset[chrom_name] = self.total_n_bins
self.num_bins[chrom_name] = n_bins
self.total_n_bins += n_bins
def get_bin(self, chrom, position):
off = position // self.bin_size
return self.offset[chrom] + off
def featurize(self, opened_file, keep=lambda x: True):
arr = np.zeros((self.total_n_bins, ))
for chrom, chrom_len in self.chrom_sizes.items():
entries = opened_file.entries(chrom, 0, chrom_len)
if not entries:
continue
for peak in entries:
begin = peak[0]
end = peak[1]
if keep(peak):
bin_begin = self.get_bin(chrom, begin)
bin_end = self.get_bin(chrom, end)
for i in range(bin_begin, bin_end + 1):
arr[i] += 1 / len(range(bin_begin, bin_end + 1))
return arr
if __name__ == "__main__":
chrom_sizes = json.load(open('chrom_sizes.json'))
bnr = Binner({'chr2': 100000}, 10000)
for filename in glob.glob('data/*'):
try:
file = pyBigWig.open(filename)
F = bnr.featurize(file)
print(F)
file.close()
except:
pass