Skip to content

Commit

Permalink
complete NWCK/NWCK.py
Browse files Browse the repository at this point in the history
  • Loading branch information
Sanyk28 committed Oct 25, 2013
1 parent 4fdefbf commit bc7efd0
Show file tree
Hide file tree
Showing 6 changed files with 86 additions and 453 deletions.
97 changes: 0 additions & 97 deletions NWCK/NWCK.px~

This file was deleted.

121 changes: 20 additions & 101 deletions NWCK/NWCK.py
Original file line number Diff line number Diff line change
@@ -1,138 +1,57 @@
#!/usr/bin/python


#########################################################################
#########################################################################################
# Author:
#
#
# Sanyk28 ([email protected])
#
# Date created:
#
# 20 Oct 2013
#
# 25 June 2013
#
# Rosalind problem:
#
# Given: A collection of n trees (n<=40) in Newick format, with each tree containing at
# most 200 nodes; each tree Tk is followed by a pair of nodes xk and yk in Tk.
#
# Distances in Trees
#
# Given: A collection of n trees (n<=40) in Newick format, with each tree
# containing at most 200 nodes; each tree Tk is followed by a pair
# of nodes xk and yk in Tk.
#
# Return: A collection of n positive integers, for which the kth integer
# represents the distance between xk and yk in Tk.
# Return: A collection of n positive integers, for which the kth integer represents the
# distance between xk and yk in Tk.
#
# Usage:
#
# python NWCK.py [Input File]
#
########################################################################
########################################################################################

import sys
import re
from ete2 import Tree

def read_file(filename):
'''
Given: input file filename in plain text format.
Return: file contents from input file.
Example:
>>> read_file(test.txt)
['(cat)dog;\n', 'dog cat\n', '\n', '(dog,cat);\n', 'dog cat\n']
'''
f = open(filename)
raw_data = f.readlines()
f.close()
return raw_data

def parse_data(data):
'''
Given: file content from read_file(filename).
Return: a dictionary where Newick format Trees are dictionary keys,
and nodes that need to calcuate the distance in between are
dictionary values.
Example:
>>> parse_data(['(cat)dog;\n', 'dog cat\n', '\n', '(dog,cat);\n',
'dog cat\n'])
[('(cat)dog;', ['dog', 'cat']), ('(dog,cat);', ['dog', 'cat'])]
'''

Trees,tree,nodes = [],'',[]
Trees,tree = [],''
for row in data:
if len(row.strip()) == 0:
continue
elif row.strip()[-1:] == ';':
tree = row.strip()
else:
nodes = row.strip().split(' ')
Trees.append((tree,nodes))
n1,n2 = row.strip().split(' ')
Trees.append((tree,n1,n2))
return Trees

def count_pattern(string, pattern):
return re.subn(pattern, '', string)[1]

def NWCK_distance(tree, nodes):
'''
Given: a Newick format tree and a string of two nodes from the tree.
Return: the distance between the nodes.
Examples:
>>> distance('(dog,cat);','dog cat')
2
>>> distance('(,,,,,,,,,,dog,,,,)cat', 'cat dog')
1
>>> distance('(elephant,rabbit,cat,monkey,pig)dog;', 'dog cat')
1
>>> distance('(rabbit,cat,monkey)dog;', 'cat dog')
1
>>> distance('(dog)cat;', 'cat dog')
1
>>> distance('cat,(dog,monkey),elephant;', 'elephant cat')
2
>>> distance('(,,,,,,,,,,Bradyporus_saxatilis,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Thymallus_platycephala,,,);', 'Thymallus_platycephala Bradyporus_saxatilis')
2
>> distance()
1
'''

distance = 0
n1, n2 = nodes[0], nodes[1]
i1, i2 = tree.find(n1), tree.find(n2)
# '(cat)dog;','(zebra,(cat,rat))dog;', '(((zebra,panda),rabbit),(cat,sheep))dog;'
prog1 = re.compile('\)'+n1+';$')
prog2 = re.compile('\)'+n2+';$')
# '(cat,dog);', '(cat,zebra,dog)monkey;', '(cat,(monkey,ant),(dog,rabbit));', '(((pig,cat,rat),monkey),(zebra,giraff,dog));'
prog3 = re.compile('\([\(\),\w]*'+n1+'[\(\),\w]*'+n2+'[\(\),\w]*\)\w*;$')
# '(monkey,((zebra,rat),rabbit),(elephant,(pig,cat,giraff)),((ants,(dog,tiger)),((hippo,dragon),sheep)));'
prog4 = re.compile('\([\(\),\w]*'+n2+'[\(\),\w]*'+n1+'[\(\),\w]*\)\w*;$')
if prog1.search(tree):
distance = count_pattern(tree[:i2],'\(')-count_pattern(tree[:i2],'\)')
elif prog2.search(tree):
distance = count_pattern(tree[:i1],'\(')-count_pattern(tree[:i1],'\)')
elif prog3.search(tree):
distance = count_pattern(tree[:i1],'\(')-count_pattern(tree[:i1],'\)')-count_pattern(tree[i2:],'\(')+count_pattern(tree[i2:],'\)')
elif prog4.search(tree):
distance = count_pattern(tree[:i2],'\(')-count_pattern(tree[:i2],'\)')-count_pattern(tree[i1:], '\(')+count_pattern(tree[i1:],'\)')
return distance

def result(Trees):
'''
Given: a dictionary where Newick format Trees are dictionary keys,
and nodes that need to calcuate the distance in between are
dictionary values.
Return: the distance between the two nodes in coresponding Newick
format Tree
Example:
'''

Distances = []
for tree, nodes in Trees:
Distances.append((NWCK_distance(tree, nodes)))
return Distances
def distance(nw,n1,n2):
t = Tree(nw,format=1)
n1,n2 = t&n1,t&n2
return n1.get_distance(n2)

if __name__ == '__main__':

raw_data = read_file(sys.argv[-1])
Trees = parse_data(raw_data)
print ' '.join(map(str, result(Trees)))
for tree,n1,n2 in Trees:
print int(distance(tree,n1,n2)),
76 changes: 0 additions & 76 deletions NWCK/NWCK.py~

This file was deleted.

Loading

0 comments on commit bc7efd0

Please sign in to comment.