Skip to content

Commit

Permalink
add NWCK
Browse files Browse the repository at this point in the history
  • Loading branch information
Sanyk28 committed Oct 24, 2013
1 parent 8b0c9b4 commit 4fdefbf
Show file tree
Hide file tree
Showing 9 changed files with 523 additions and 0 deletions.
Binary file added NWCK/.NWCK.py.swn
Binary file not shown.
Binary file added NWCK/.NWCK.py.swo
Binary file not shown.
Binary file added NWCK/.NWCK.py.swp
Binary file not shown.
97 changes: 97 additions & 0 deletions NWCK/NWCK.px~
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
#!/usr/bin/python


#########################################################################
# Author:
#
# Sanyk28 ([email protected])
#
# Date created:
#
# 19 Oct 2013
#
# Rosalind problem:
#
# Distances in Trees
#
# Given: A collection of n trees (n<=40) in Newick format, with each tree
# containing at most 200 nodes; each tree Tk is followed by a pair
# of nodes xk and yk in Tk.
#
# Return: A collection of n positive integers, for which the kth integer
# represents the distance between xk and yk in Tk.
#
# Usage:
#
# python NWCK.py [Input File]
#
########################################################################

import sys

def read_file(filename):
'''
Given: input file filename in plain text format.
Return: file contents from input file.
Example:
>>> read_file(test.txt)
['(cat)dog;\n', 'dog cat\n', '\n', '(dog,cat);\n', 'dog cat\n']
'''
f = open(filename)
raw_data = f.readlines()
f.close()
return raw_data

def parse_data(data):
'''
Given: file content from read_file(filename).
Return: a dictionary where Newick format Trees are dictionary keys,
and nodes that need to calcuate the distance in between are
dictionary values.
Example:
>>> parse_data(['(cat)dog;\n', 'dog cat\n', '\n', '(dog,cat);\n',
'dog cat\n'])
{'(dog,cat);': 'dog cat', '(cat)dog;': 'dog cat'}
'''

Trees = {}
tree = ''
for row in data:
if len(row.strip()) == 0:
continue
elif row.strip()[-1:] == ';':
tree = row.strip()
Trees[tree] = ''
else:
Trees[tree] = row.strip()
return Trees

def distance(tree, nodes):
'''
Given: a Newick format tree and a string of two nodes from the tree.
Return: the distance between the nodes.
Examples:
>>>
'''

distance = 0
nodes = nodes.split(' ')
if ','.join([nodes[0],nodes[1]]) or ','.join([nodes[1],nodes[0]]) in tree:
distance = 2
return distance

'''
def result(data_dict):
for key, value in data_dict:
distance
return
'''

if __name__ == '__main__':

raw_data = read_file(sys.argv[-1])
Trees = parse_data(raw_data)
print distance('(dog,cat);','dog cat')
138 changes: 138 additions & 0 deletions NWCK/NWCK.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
#!/usr/bin/python


#########################################################################
# Author:
#
# Sanyk28 ([email protected])
#
# Date created:
#
# 20 Oct 2013
#
# Rosalind problem:
#
# Distances in Trees
#
# Given: A collection of n trees (n<=40) in Newick format, with each tree
# containing at most 200 nodes; each tree Tk is followed by a pair
# of nodes xk and yk in Tk.
#
# Return: A collection of n positive integers, for which the kth integer
# represents the distance between xk and yk in Tk.
#
# Usage:
#
# python NWCK.py [Input File]
#
########################################################################

import sys
import re

def read_file(filename):
'''
Given: input file filename in plain text format.
Return: file contents from input file.
Example:
>>> read_file(test.txt)
['(cat)dog;\n', 'dog cat\n', '\n', '(dog,cat);\n', 'dog cat\n']
'''
f = open(filename)
raw_data = f.readlines()
f.close()
return raw_data

def parse_data(data):
'''
Given: file content from read_file(filename).
Return: a dictionary where Newick format Trees are dictionary keys,
and nodes that need to calcuate the distance in between are
dictionary values.
Example:
>>> parse_data(['(cat)dog;\n', 'dog cat\n', '\n', '(dog,cat);\n',
'dog cat\n'])
[('(cat)dog;', ['dog', 'cat']), ('(dog,cat);', ['dog', 'cat'])]
'''

Trees,tree,nodes = [],'',[]
for row in data:
if len(row.strip()) == 0:
continue
elif row.strip()[-1:] == ';':
tree = row.strip()
else:
nodes = row.strip().split(' ')
Trees.append((tree,nodes))
return Trees

def count_pattern(string, pattern):
return re.subn(pattern, '', string)[1]

def NWCK_distance(tree, nodes):
'''
Given: a Newick format tree and a string of two nodes from the tree.
Return: the distance between the nodes.
Examples:
>>> distance('(dog,cat);','dog cat')
2
>>> distance('(,,,,,,,,,,dog,,,,)cat', 'cat dog')
1
>>> distance('(elephant,rabbit,cat,monkey,pig)dog;', 'dog cat')
1
>>> distance('(rabbit,cat,monkey)dog;', 'cat dog')
1
>>> distance('(dog)cat;', 'cat dog')
1
>>> distance('cat,(dog,monkey),elephant;', 'elephant cat')
2
>>> distance('(,,,,,,,,,,Bradyporus_saxatilis,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Thymallus_platycephala,,,);', 'Thymallus_platycephala Bradyporus_saxatilis')
2
>> distance()
1
'''

distance = 0
n1, n2 = nodes[0], nodes[1]
i1, i2 = tree.find(n1), tree.find(n2)
# '(cat)dog;','(zebra,(cat,rat))dog;', '(((zebra,panda),rabbit),(cat,sheep))dog;'
prog1 = re.compile('\)'+n1+';$')
prog2 = re.compile('\)'+n2+';$')
# '(cat,dog);', '(cat,zebra,dog)monkey;', '(cat,(monkey,ant),(dog,rabbit));', '(((pig,cat,rat),monkey),(zebra,giraff,dog));'
prog3 = re.compile('\([\(\),\w]*'+n1+'[\(\),\w]*'+n2+'[\(\),\w]*\)\w*;$')
# '(monkey,((zebra,rat),rabbit),(elephant,(pig,cat,giraff)),((ants,(dog,tiger)),((hippo,dragon),sheep)));'
prog4 = re.compile('\([\(\),\w]*'+n2+'[\(\),\w]*'+n1+'[\(\),\w]*\)\w*;$')
if prog1.search(tree):
distance = count_pattern(tree[:i2],'\(')-count_pattern(tree[:i2],'\)')
elif prog2.search(tree):
distance = count_pattern(tree[:i1],'\(')-count_pattern(tree[:i1],'\)')
elif prog3.search(tree):
distance = count_pattern(tree[:i1],'\(')-count_pattern(tree[:i1],'\)')-count_pattern(tree[i2:],'\(')+count_pattern(tree[i2:],'\)')
elif prog4.search(tree):
distance = count_pattern(tree[:i2],'\(')-count_pattern(tree[:i2],'\)')-count_pattern(tree[i1:], '\(')+count_pattern(tree[i1:],'\)')
return distance

def result(Trees):
'''
Given: a dictionary where Newick format Trees are dictionary keys,
and nodes that need to calcuate the distance in between are
dictionary values.
Return: the distance between the two nodes in coresponding Newick
format Tree
Example:
'''

Distances = []
for tree, nodes in Trees:
Distances.append((NWCK_distance(tree, nodes)))
return Distances

if __name__ == '__main__':

raw_data = read_file(sys.argv[-1])
Trees = parse_data(raw_data)
print ' '.join(map(str, result(Trees)))
76 changes: 76 additions & 0 deletions NWCK/NWCK.py~
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#!/usr/bin/python


#########################################################################
# Author:
#
# Sanyk28 ([email protected])
#
# Date created:
#
# 19 Oct 2013
#
# Rosalind problem:
#
# Distances in Trees
#
# Given: A collection of n trees (n<=40) in Newick format, with each tree
# containing at most 200 nodes; each tree Tk is followed by a pair
# of nodes xk and yk in Tk.
#
# Return: A collection of n positive integers, for which the kth integer
# represents the distance between xk and yk in Tk.
#
# Usage:
#
# python NWCK.py [Input File]
#
########################################################################

import sys

def read_file(filename):
'''
Given: input file filename in plain text format.
Return: file contents from input file.
Example:
>>> read_file(test.txt)
['(cat)dog;\n', 'dog cat\n', '\n', '(dog,cat);\n', 'dog cat\n']
'''
f = open(filename)
raw_data = f.readlines()
f.close()
return raw_data

def parse_data(data):
'''
Given: file content from read_file(filename).
Return: a dictionary where Newick format Trees are dictionary keys,
and nodes that need to calcuate the distance are dictionary
values.
Examples:
>>> parse_data(['(cat)dog;\n', 'dog cat\n', '\n', '(dog,cat);\n',
'dog cat\n'])
{'(dog,cat);': 'dog cat', '(cat)dog;': 'dog cat'}
'''

Trees = {}
tree = ''
for row in data:
if len(row.strip()) == 0:
continue
elif row.strip()[-1:] == ';':
tree = row.strip()
Trees[tree] = ''
else:
Trees[tree] = row.strip()
return Trees



if __name__ == '__main__':

raw_data = read_file(sys.argv[-1])
print parse_data(raw_data)
Loading

0 comments on commit 4fdefbf

Please sign in to comment.