Skip to content

Commit

Permalink
complete NKEW/NKEW.py & INDC/INDC.py
Browse files Browse the repository at this point in the history
  • Loading branch information
Sanyk28 committed Oct 25, 2013
1 parent bc7efd0 commit c9445cd
Show file tree
Hide file tree
Showing 11 changed files with 294 additions and 0 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,8 @@ nosetests.xml
.mr.developer.cfg
.project
.pydevproject

# Python
*.swn
*.swo
*.swp
47 changes: 47 additions & 0 deletions CTBL/CTBL.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/usr/bin/python

#########################################################################################
# Author:
#
# Sanyk28 ([email protected])
#
# Date created:
#
# 25 June 2013
#
# Rosalind problem:
#
# Creating a Character Table
#
# Given: An unrooted binary tree T in Newick format for at most 200 species taxa.
#
# Return: A character table having the same splits as the edge splits of T. The columns
# of the character table should encode the taxa ordered lexicographically; the
# rows of the character table may be given in any order. Also, for any given
# character, the particular subset of taxa to which 1s are assigned is arbitrary.
#
# Usage:
#
# python CTBL.py [Input File]
#
########################################################################################

import sys
from ete2 import Tree

def read_file(filename):
f = open(filename)
raw_data = f.readline()
f.close()
return raw_data

def distance(nw,n1,n2):
t = Tree(nw,format=1)
n1,n2 = t&n1,t&n2
return n1.get_distance(n2)

if __name__ == '__main__':

data = read_file(sys.argv[-1])
tree = Tree(data)
print tree
1 change: 1 addition & 0 deletions CTBL/test.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
(dog,((elephant,mouse),robot),cat);
58 changes: 58 additions & 0 deletions INDC/INDC.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#!/usr/bin/python

#########################################################################################
# Author:
#
# Sanyk28 ([email protected])
#
# Date created:
#
# 25 June 2013
#
# Rosalind problem:
#
# Independent Segregation of Chromosomes
#
# Given: A positive integer n<=50
#
# Return: An array A of length 2n in which A[k] represents the common logarithm of the
# probability that two diploid siblings share at least k of their 2n chromosomes
# (we do not consider recombination for now).
#
# Usage:
#
# python INDC.py [Input File]
#
########################################################################################

import sys
import math
from scipy.stats.distributions import binom

def read_file(filename):
f = open(filename)
raw_data = f.readline()
f.close()
return raw_data

def INDC(n,k,p):
bc = binom.cdf(k,n,p)
return math.log(bc,10)

def result(n):
result = []
for k in range(2*n):
result.append(INDC(2*n,k,0.5))
return sorted(result,reverse=True)

if __name__ == '__main__':

n = int(read_file(sys.argv[-1]))

fw = open('output.txt','w')
for i in result(n):
if '%.3f'%i == '-0.000':
fw.write('0.000 ')
else:
fw.write('%.3f'%i+' ')
fw.close()
1 change: 1 addition & 0 deletions INDC/output.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 -0.001 -0.002 -0.003 -0.006 -0.010 -0.016 -0.025 -0.039 -0.057 -0.082 -0.114 -0.154 -0.205 -0.266 -0.339 -0.425 -0.524 -0.638 -0.766 -0.910 -1.070 -1.247 -1.440 -1.652 -1.881 -2.129 -2.395 -2.681 -2.987 -3.313 -3.660 -4.027 -4.417 -4.829 -5.264 -5.722 -6.204 -6.712 -7.246 -7.806 -8.394 -9.011 -9.659 -10.338 -11.051 -11.799 -12.585 -13.410 -14.279 -15.194 -16.160 -17.182 -18.267 -19.424 -20.665 -22.008 -23.480 -25.134 -27.093
1 change: 1 addition & 0 deletions INDC/rosalind_indc.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
45
1 change: 1 addition & 0 deletions INDC/test.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
5
60 changes: 60 additions & 0 deletions NKEW/NKEW.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#!/usr/bin/python

#########################################################################################
# Author:
#
# Sanyk28 ([email protected])
#
# Date created:
#
# 25 June 2013
#
# Rosalind problem:
#
# Newick Format with Edge Weights
#
# Given: A collection of n weighted trees (n<=40) in Newick format, with each tree
# containing at most 200 nodes; each tree Tk is followed by a pair of nodes
# xk and yk in Tk.
#
# Return: A collection of n numbers, for which the kth number represents the distance
# between xk and yk in Tk
#
# Usage:
#
# python NKEW.py [Input File]
#
########################################################################################

import sys
from ete2 import Tree

def read_file(filename):
f = open(filename)
raw_data = f.readlines()
f.close()
return raw_data

def parse_data(data):
Trees,tree = [],''
for row in data:
if len(row.strip()) == 0:
continue
elif row.strip()[-1:] == ';':
tree = row.strip()
else:
n1,n2 = row.strip().split(' ')
Trees.append((tree,n1,n2))
return Trees

def distance(nw,n1,n2):
t = Tree(nw,format=1)
n1,n2 = t&n1,t&n2
return n1.get_distance(n2)

if __name__ == '__main__':

raw_data = read_file(sys.argv[-1])
Trees = parse_data(raw_data)
for tree,n1,n2 in Trees:
print int(distance(tree,n1,n2)),
113 changes: 113 additions & 0 deletions NKEW/rosalind_nkew.txt

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions NKEW/test.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
(dog:42,cat:33);
cat dog

((dog:4,cat:3):74,robot:98,elephant:58);
dog elephant
2 changes: 2 additions & 0 deletions NWCK/NWCK.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
# 25 June 2013
#
# Rosalind problem:
#
# Distances in Trees
#
# Given: A collection of n trees (n<=40) in Newick format, with each tree containing at
# most 200 nodes; each tree Tk is followed by a pair of nodes xk and yk in Tk.
Expand Down

0 comments on commit c9445cd

Please sign in to comment.