-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDiscrSeq
145 lines (111 loc) · 4.68 KB
/
DiscrSeq
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
"""
Created on Sun Jan 25 22:18:46 2015
@author: Jasmine Brown
"""
#Assignment Two
#1. Write a function that multiplies all consecutively decreasing numbers
#max and min are supplied as arguments
from functools import reduce
#reduce allows me to apply a function of two arguments and the values are reduced to a single value
import operator
#lets me use standard operations as function ex.operator.add(x,y)=x+y
def multiple (min,max):
#I am creating a function to interate over a range of values and return one value
x= reduce(operator.mul, range(min,max+1))
#I am using reduce to give me one value in this range. operator.mul will multiply
#the values in the range. +1 is added to max to make sure the max number is included
return x
example=multiple(2,10)
#here is an example of how to input values and call my "multiple" function
print example
#2. write a function that calculates the binomial coefficient 2 ways using the function from #1
#2a. use the n!/(n-k)!k! OR n(n-1)...(n-k+1)/k!
def BiCoefficient(n,k):
if k>n:
return 0
else:
#note that n should be your max value and k the min. you want to see how many sets are in the larger
return multiple(n, 1)/(multiple(n-k, 1)*multiple(k, 1))
example2a=BiCoefficient(10,2)
print example2a
#2b Now lets use the other equation to calulate the binomial coefficient
#n*n-1/k
#Note: I think I may have something wrong with the equation above...the value is 4 off from the 2nd equation
#I also tried this equation (n*n-1)/k and it's 4 off not sure why
def BiCoefficient2(n,k):
if k>n:
return 0
else:
return multiple(n, n-k+1)/multiple(k,1)
example2b=BiCoefficient2 (10,2)
print example2b
#3. Now I'm going to use both functions on large values and see what happens
#I tried both of these and the first equation took longer than the 2nd
#4. Use (2b) to write a function that calculates the probability of
#k successes in n Bernoulli trials with probability p.
#This is called the Binomial(n,p) distribution.
def Bernoulli(k,n,p):
ProbMassFunction= BiCoefficient2(n, k)*pow(p,k)*pow((1-p),(n-k))
return ProbMassFunction
#5 Now write a function to sample from an arbitrary discrete distribution.
#This function should take two arguments.
#The first is a list of arbitrarily labeled events and the second is a list of
#probabilities associated with these events.
def DiscreteDistr(events,probabilities):
#you need to import random to get arbritrary random numbers generated
import random
x= random.choice(events)
#this pulls a random number from a list
#the lists must be the same length and I should use the lists index to match
#events with its proper probability
index=events.index(x)
#this indexes the random events
probability=probabilities[index]
#this is the probabilities of the events given
return "this is the event", x, "and this is the probability", probability
eventlist=["j","a","s","m","i","n","e"]
problist=[.4,.5,.6,.7,.3,.2,.1]
print DiscreteDistr(eventlist,problist)
"""
This next section focuses on "sampling sites from an alignment"
Imagine that you have a multiple sequence alignment with two kinds of sites.
One type of site pattern supports the monophyly of taxon A and taxon B.
The second type supports the monophyly of taxon A and taxon C.
"""
#6 For an alignment of 400 sites,
#with 200 sites of type 1 and 200 of type 2,
#sample a new alignment (a new set of site pattern counts) with replacement
#from the original using your function from (5). Print out the counts of the two types.
def sites(eventslist,probability,size):
count1=[]
count2 =[]
p=0
l=0
for events in range(size):
#I have 200 of each type of site. Now I need to use my function from part 5 for sampling
if DiscreteDistr(eventlist,problist,size)[1] =="a":
#the index 1 refers to the print statement from the DiscrDistr function.[1] is the value from x
p=p +1
else:
l=l +1
#these variables are keeping count
count1.append(p)
count2.append(l)
return "This is value of p",p,"this is the value of l",l
eventslist=[("j"),("a")]
probability=[.5,.5]
size=400
a=sites(eventslist,probability,size)
b=sites(eventslist,probability,size)
print a
print b
#7
a=[]
x = 0
while x < 100:
times = sites(eventslist, probability, 400)
a.append(times)
x = x+1
print a
#8. ...#summarize how oftern I saw each proportion...Will summarize once #6 is fixed :/ but as of now everything that is being drawn is type2
#9 -11. Will go back and summarize and run a larger sample once 6 is fixed to consider both variables in the list