-
Notifications
You must be signed in to change notification settings - Fork 1
/
count_subsequences.py
executable file
·54 lines (42 loc) · 1.4 KB
/
count_subsequences.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
from __future__ import print_function
import csv
import sys
def progress(count, total, status=''):
bar_len = 60
filled_len = int(round(bar_len * count / float(total)))
percents = round(100.0 * count / float(total), 1)
bar = '=' * filled_len + '-' * (bar_len - filled_len)
sys.stdout.write('[%s] %s%s ...%s\r' % (bar, percents, '%', status))
sys.stdout.flush()
def read_in():
"""Read all lines from stdin
Strips newline characters from each line
"""
lines = [x.strip() for x in sys.stdin.readlines()]
return lines
if __name__ == '__main__':
print('Reading input file...', end='')
all_data = map(lambda x: x.split('\t'), read_in())
print('done')
print('Sorting rows...', end='')
all_data = sorted(all_data, key=lambda x: x[0])
print('done')
seed = all_data[0][0]
i = 0
first = True
rows = list()
total = 0
for r in all_data:
if (r[0][:len(seed)] == seed):
rows.append(r[0])
total = total + int(r[1])
else:
with open('output.tsv', 'w' if first is True else 'a') as tsvfile:
writer = csv.writer(tsvfile, delimiter='\t')
writer.writerow([seed, rows, total])
first = False
seed = r[0]
rows[:] = [r[0]]
total = int(r[1])
progress(i, len(all_data), status='finding locations')
i = i + 1