-
Notifications
You must be signed in to change notification settings - Fork 0
/
1_to_1_filter.py
41 lines (30 loc) · 1.1 KB
/
1_to_1_filter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import sys, argparse, re
def filter_file( file_path, num_species ):
regex = re.compile( "\|m\.\S*" )
with open( file_path ) as fh:
for line in fh:
line_mod = regex.sub( " ", line.strip() )
ids = line_mod.split()[ 1 : ]
ids_unique = set( ids )
print "*" * 20
print "ids:\t" + str( len( ids ) )
print "uniq:\t" + str( len( ids_unique ) )
print "spec:\t" + str( num_species )
if len( ids ) == len( ids_unique ) and len( ids ) == num_species:
print line.strip()
def main( args ):
filter_file( args.file_path, args.num_species )
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description = "Find 1-to-1 orthology groups."
)
parser.add_argument( "num_species",
type = int,
help = "The number of species necessary for 1-to-1 groupings."
)
parser.add_argument( "file_path",
type = str,
help = "Path to OrthoMCL output."
)
args = parser.parse_args()
main( args )