-
Notifications
You must be signed in to change notification settings - Fork 2
/
wga_bed_summary.py
executable file
·81 lines (56 loc) · 1.8 KB
/
wga_bed_summary.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/usr/bin/env python
from __future__ import print_function
import sys
import argparse
from wga_bed_indels import species_in_block
from collections import OrderedDict
def out_group_agreement(sites):
"""
returns true if outgroups agree
:param sites: list(str, ...)
:return: bool
"""
out_groups = sites[1:]
if len(set(out_groups)) == 1:
return True
else:
return False
def out_group_len_agreement(sites):
"""
returns true if outgroups lengths agree
:param sites: list(str, ...)
:return: bool
"""
out_groups = sites[1:]
out_lens = [len(x) for x in out_groups]
if len(set(out_lens)) == 1:
return True
else:
return False
def main():
# args
parser = argparse.ArgumentParser()
parser.add_argument('-callable',
help='Outputs number of sites that have all species covered and outgroup agreement',
default=False, action='store_true')
args = parser.parse_args()
out_dict = OrderedDict()
# process file
for orig_line in sys.stdin:
line = orig_line.rstrip('\n').split('\t')
chromo, start, end, strand, spp, all_chromo, all_pos, sites, score = line[0], line[1], line[2], line[3], \
line[4].split(','), line[5].split(','), line[6].split(','), line[7].split(','), line[8]
if chromo not in out_dict.keys():
out_dict[chromo] = 0
if args.callable:
# coverage check
if species_in_block(all_pos) < len(all_chromo):
continue
# ref specific?
if out_group_len_agreement(sites) is False:
continue
out_dict[chromo] += 1
for x in out_dict.items():
print(x[0], x[1], sep='\t')
if __name__ == '__main__':
main()