-
Notifications
You must be signed in to change notification settings - Fork 0
/
Tree.py
112 lines (93 loc) · 3.36 KB
/
Tree.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
from Node import Node
from Operation import Operation as Op
import random as r
import copy
import math
class Tree(object):
def __init__(self, cells, mutations):
self.cells = cells
self.mutations = mutations
self.losses_list = []
self.k_losses_list = [0] * mutations
self.best_sigma = [0] * cells
self.likelihood = float("-inf")
self.phylogeny = None
self.operation = None
self.debug = False
def calculate_losses_list(self, k):
losses_list = []
k_losses_list = [0] * self.mutations
for n in self.phylogeny.traverse():
if n.loss:
losses_list.append(n)
k_losses_list[n.mutation_id] += 1
return losses_list, k_losses_list
def copy(self):
"Copies everything in this tree"
t = Tree(self.cells, self.mutations)
t.likelihood = self.likelihood
t.phylogeny = self.phylogeny.copy()
for n in t.phylogeny.traverse():
if n.loss:
t.losses_list.append(n)
t.k_losses_list[n.mutation_id] += 1
if self.operation == None:
t.operation = None
else:
t.operation = Op(self.operation.type, self.operation.node_name_1, self.operation.node_name_2, self.operation.node_name_3)
return t
@classmethod
def random(cls, cells, mutations, mutation_names):
t = Tree(cells, mutations)
random_tree = cls._generate_random_btree(mutations, mutation_names)
t.phylogeny = random_tree
return t
@classmethod
def germline_node(cls):
return Node("germline", None, -1, 0)
@classmethod
def _generate_random_btree(cls, mutations, mutation_names):
""" Generates a random binary tree """
root = cls.germline_node()
rantree = [i for i in range(mutations)]
r.shuffle(rantree, r.random)
nodes = [root]
append_node = 0
i = 0
while i < mutations:
nodes.append(
Node(mutation_names[rantree[i]], nodes[append_node], rantree[i])
)
i += 1
if i < mutations:
nodes.append(
Node(mutation_names[rantree[i]], nodes[append_node], rantree[i])
)
append_node += 1
i += 1
return root
@classmethod
def greedy_loglikelihood(cls, helper, tree, data=None):
"Gets maximum likelihood of a tree"
nodes_list = tree.phylogeny.get_cached_content()
node_genotypes = [
[0 for j in range(helper.mutations)]
for i in range(len(nodes_list))
]
for i, n in enumerate(nodes_list):
n.get_genotype_profile(node_genotypes[i])
maximum_likelihood = 0
for i in range(helper.cells):
best_sigma = -1
best_lh = float("-inf")
for n in range(len(nodes_list)):
lh = 0
for j in range(helper.mutations):
p = Op.prob(helper.matrix[i][j], node_genotypes[n][j], node_genotypes, helper, tree, data)
lh += math.log(p)
if lh > best_lh:
best_sigma = n
best_lh = lh
tree.best_sigma[i] = best_sigma
maximum_likelihood += best_lh
return maximum_likelihood