-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Sanyk28
committed
Oct 25, 2013
1 parent
4fdefbf
commit bc7efd0
Showing
6 changed files
with
86 additions
and
453 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,138 +1,57 @@ | ||
#!/usr/bin/python | ||
|
||
|
||
######################################################################### | ||
######################################################################################### | ||
# Author: | ||
# | ||
# | ||
# Sanyk28 ([email protected]) | ||
# | ||
# Date created: | ||
# | ||
# 20 Oct 2013 | ||
# | ||
# 25 June 2013 | ||
# | ||
# Rosalind problem: | ||
# | ||
# Given: A collection of n trees (n<=40) in Newick format, with each tree containing at | ||
# most 200 nodes; each tree Tk is followed by a pair of nodes xk and yk in Tk. | ||
# | ||
# Distances in Trees | ||
# | ||
# Given: A collection of n trees (n<=40) in Newick format, with each tree | ||
# containing at most 200 nodes; each tree Tk is followed by a pair | ||
# of nodes xk and yk in Tk. | ||
# | ||
# Return: A collection of n positive integers, for which the kth integer | ||
# represents the distance between xk and yk in Tk. | ||
# Return: A collection of n positive integers, for which the kth integer represents the | ||
# distance between xk and yk in Tk. | ||
# | ||
# Usage: | ||
# | ||
# python NWCK.py [Input File] | ||
# | ||
######################################################################## | ||
######################################################################################## | ||
|
||
import sys | ||
import re | ||
from ete2 import Tree | ||
|
||
def read_file(filename): | ||
''' | ||
Given: input file filename in plain text format. | ||
Return: file contents from input file. | ||
Example: | ||
>>> read_file(test.txt) | ||
['(cat)dog;\n', 'dog cat\n', '\n', '(dog,cat);\n', 'dog cat\n'] | ||
''' | ||
f = open(filename) | ||
raw_data = f.readlines() | ||
f.close() | ||
return raw_data | ||
|
||
def parse_data(data): | ||
''' | ||
Given: file content from read_file(filename). | ||
Return: a dictionary where Newick format Trees are dictionary keys, | ||
and nodes that need to calcuate the distance in between are | ||
dictionary values. | ||
Example: | ||
>>> parse_data(['(cat)dog;\n', 'dog cat\n', '\n', '(dog,cat);\n', | ||
'dog cat\n']) | ||
[('(cat)dog;', ['dog', 'cat']), ('(dog,cat);', ['dog', 'cat'])] | ||
''' | ||
|
||
Trees,tree,nodes = [],'',[] | ||
Trees,tree = [],'' | ||
for row in data: | ||
if len(row.strip()) == 0: | ||
continue | ||
elif row.strip()[-1:] == ';': | ||
tree = row.strip() | ||
else: | ||
nodes = row.strip().split(' ') | ||
Trees.append((tree,nodes)) | ||
n1,n2 = row.strip().split(' ') | ||
Trees.append((tree,n1,n2)) | ||
return Trees | ||
|
||
def count_pattern(string, pattern): | ||
return re.subn(pattern, '', string)[1] | ||
|
||
def NWCK_distance(tree, nodes): | ||
''' | ||
Given: a Newick format tree and a string of two nodes from the tree. | ||
Return: the distance between the nodes. | ||
Examples: | ||
>>> distance('(dog,cat);','dog cat') | ||
2 | ||
>>> distance('(,,,,,,,,,,dog,,,,)cat', 'cat dog') | ||
1 | ||
>>> distance('(elephant,rabbit,cat,monkey,pig)dog;', 'dog cat') | ||
1 | ||
>>> distance('(rabbit,cat,monkey)dog;', 'cat dog') | ||
1 | ||
>>> distance('(dog)cat;', 'cat dog') | ||
1 | ||
>>> distance('cat,(dog,monkey),elephant;', 'elephant cat') | ||
2 | ||
>>> distance('(,,,,,,,,,,Bradyporus_saxatilis,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Thymallus_platycephala,,,);', 'Thymallus_platycephala Bradyporus_saxatilis') | ||
2 | ||
>> distance() | ||
1 | ||
''' | ||
|
||
distance = 0 | ||
n1, n2 = nodes[0], nodes[1] | ||
i1, i2 = tree.find(n1), tree.find(n2) | ||
# '(cat)dog;','(zebra,(cat,rat))dog;', '(((zebra,panda),rabbit),(cat,sheep))dog;' | ||
prog1 = re.compile('\)'+n1+';$') | ||
prog2 = re.compile('\)'+n2+';$') | ||
# '(cat,dog);', '(cat,zebra,dog)monkey;', '(cat,(monkey,ant),(dog,rabbit));', '(((pig,cat,rat),monkey),(zebra,giraff,dog));' | ||
prog3 = re.compile('\([\(\),\w]*'+n1+'[\(\),\w]*'+n2+'[\(\),\w]*\)\w*;$') | ||
# '(monkey,((zebra,rat),rabbit),(elephant,(pig,cat,giraff)),((ants,(dog,tiger)),((hippo,dragon),sheep)));' | ||
prog4 = re.compile('\([\(\),\w]*'+n2+'[\(\),\w]*'+n1+'[\(\),\w]*\)\w*;$') | ||
if prog1.search(tree): | ||
distance = count_pattern(tree[:i2],'\(')-count_pattern(tree[:i2],'\)') | ||
elif prog2.search(tree): | ||
distance = count_pattern(tree[:i1],'\(')-count_pattern(tree[:i1],'\)') | ||
elif prog3.search(tree): | ||
distance = count_pattern(tree[:i1],'\(')-count_pattern(tree[:i1],'\)')-count_pattern(tree[i2:],'\(')+count_pattern(tree[i2:],'\)') | ||
elif prog4.search(tree): | ||
distance = count_pattern(tree[:i2],'\(')-count_pattern(tree[:i2],'\)')-count_pattern(tree[i1:], '\(')+count_pattern(tree[i1:],'\)') | ||
return distance | ||
|
||
def result(Trees): | ||
''' | ||
Given: a dictionary where Newick format Trees are dictionary keys, | ||
and nodes that need to calcuate the distance in between are | ||
dictionary values. | ||
Return: the distance between the two nodes in coresponding Newick | ||
format Tree | ||
Example: | ||
''' | ||
|
||
Distances = [] | ||
for tree, nodes in Trees: | ||
Distances.append((NWCK_distance(tree, nodes))) | ||
return Distances | ||
def distance(nw,n1,n2): | ||
t = Tree(nw,format=1) | ||
n1,n2 = t&n1,t&n2 | ||
return n1.get_distance(n2) | ||
|
||
if __name__ == '__main__': | ||
|
||
raw_data = read_file(sys.argv[-1]) | ||
Trees = parse_data(raw_data) | ||
print ' '.join(map(str, result(Trees))) | ||
for tree,n1,n2 in Trees: | ||
print int(distance(tree,n1,n2)), |
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.