-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
277 lines (201 loc) · 7.22 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
"""
Node in tree:
- Value
- Children
value:
"*" - times
"+" - plus
"-" - minus
[number] - number
"x" - x variable
"""
import numpy as np
operators = ["*", "+", "-"]
variables = ["x"]
seed = 1231
rng = np.random.default_rng(seed)
targetTree = [
"*", [
["*",
[
["x", None],
["+", [
["x", None],
[1, None]
]]
]
],
["-",
[
["x", None],
["4", None]
]
],
]
]
def performAction(action, x, a, b):
if a == "x":
a = x
if b == "x":
b = x
a, b = float(a), float(b)
if action == "*":
return a * b
elif action == "+":
return a + b
elif action == "-":
return a - b
elif action == "**":
return a ** b
def calculateNode(node, x, first_time=True):
action, children = node[0], node[1]
if first_time and children == None:
return x if action == "x" else action
left, right = children
left_value = left[0] if left[1] is None else calculateNode(
left, x, first_time=False)
right_value = right[0] if right[1] is None else calculateNode(
right, x, first_time=False)
return performAction(action, x, left_value, right_value)
def getRandomNodeValue(no_children=False):
nodeValueChoices = [float(rng.integers(low=-1000, high=1001) / 10),
np.random.choice(variables)]
if not no_children:
nodeValueChoices = np.append(nodeValueChoices,
np.random.choice(operators))
choice = np.random.choice(nodeValueChoices)
return choice, choice in operators
def generateRandomTree(depth=0, max_depth=4):
no_children = False
if depth == max_depth:
no_children = True
node_value, has_children = getRandomNodeValue(no_children)
return [node_value, [generateRandomTree(depth+1), generateRandomTree(depth+1)] if has_children else None]
def calculateLoss(X, answers, predictionModel):
predictions = []
loss = 0
for i, _ in enumerate(answers):
predictions.append(calculateNode(predictionModel, X[i]))
for set in zip(answers, predictions):
answer, prediction = set
loss += (float(answer) - float(prediction)) ** 2
return loss / len(predictions)
def listTreeNodeCoords(tree):
def listTreeNodesHelper(tree, treeList=[[]], depth=0):
if tree[1] == None:
return treeList
left, right = tree[1]
return np.append(
listTreeNodesHelper(left, treeList=treeList +
[treeList[-1] + [0]], depth=depth+1),
listTreeNodesHelper(right, treeList=treeList +
[treeList[-1] + [1]], depth=depth+1),
)
coords = listTreeNodesHelper(tree)
if len(coords) == 1 and len(coords[0]) == 0:
return []
return np.delete(np.unique(coords), 0)
def getNodeFromCoords(tree, coords):
for index in coords:
tree = tree[1][index]
return tree
def replaceNodeOnTree(tree, node, coords):
if not coords:
return node
value, children = tree
new_children = children.copy() if children else None
if children:
direction = coords[0]
new_children[direction] = replaceNodeOnTree(
children[direction], node, coords[1:])
return [value, new_children]
def getRandomCoordsOfTree(tree):
nodeCoordsList = listTreeNodeCoords(tree)
if len(nodeCoordsList) == 0:
return []
treeCoords = np.random.choice(nodeCoordsList)
return treeCoords
def getRandomNodeFromTree(tree):
nodeCoordsList = listTreeNodeCoords(tree)
if len(nodeCoordsList) == 0:
return tree
treeCoords = np.random.choice(nodeCoordsList)
return getNodeFromCoords(tree, treeCoords)
# Given models sorted from fittest to least fit
# Returns a list of new population of (largely crossed and mutated) models
def crossAndMutate(modelPopulation):
population = []
probabilities = np.linspace(1, 0, 100)
probabilities /= probabilities.sum()
n = len(modelPopulation)
for i in range(n):
# cross
(_, donating), (_, receiving) = [np.random.choice(
modelPopulation[:100], p=probabilities) for i in range(0, 2)]
receiving = replaceNodeOnTree(
receiving, getRandomNodeFromTree(donating), getRandomCoordsOfTree(receiving))
# TODO: the max depth is not respected. depth=2 set arbitrarily
# mutate
receiving = replaceNodeOnTree(
receiving, generateRandomTree(
depth=2), getRandomCoordsOfTree(receiving)
)
# add to population
population.append(receiving)
return population
def tree_to_equation(tree):
"""
Converts a tree model into a human-readable infix mathematical expression.
"""
if tree is None or not tree:
return ""
# Handling basic number or variable case
if not isinstance(tree, list) or len(tree) == 2 and tree[1] is None:
return str(tree[0])
# Extracting the operator and its operands
operator, operands = tree
# Converting each operand recursively and adding brackets for clarity
converted_operands = [
f"({tree_to_equation(operand)})" for operand in operands]
# Formatting expressions based on the operator
if operator == '+':
return ' + '.join(converted_operands)
elif operator == '-':
return ' - '.join(converted_operands)
elif operator == '*':
return ' * '.join(converted_operands)
else:
# For unsupported operators or formats
return "Unsupported Format"
# Parameters
X = np.arange(-100, 100, 0.5)
initial_population = 1000
num_generations = 100 # Specify the number of generations here
# Calculate the answers for each value in X using the target tree
answers = np.array([calculateNode(targetTree, x) for x in X])
def run_generation(models, X, answers):
# Calculate losses for the models
losses = [calculateLoss(X, answers, model) for model in models]
# Zip and sort models based on loss
modelLosses = np.array(list(zip(losses, models)), dtype=[
("loss", float), ("model", object)])
modelLossesSorted = np.sort(modelLosses, order='loss')
# Generate next generation
next_gen = crossAndMutate(modelLossesSorted)
return next_gen, modelLossesSorted
# Initialize first generation of models
current_generation = [generateRandomTree() for _ in range(initial_population)]
# Run the simulation for specified number of generations
print(f"Target equation: {tree_to_equation(targetTree)}")
for gen in range(num_generations):
current_generation, sorted_models = run_generation(
current_generation, X, answers)
print(
f"Generation {gen + 1} - Top model loss: {sorted_models[0]['loss']:.4f}")
if sorted_models[0]['loss'] == 0:
break
# Display final generation's top models
print(f"Total models in the final generation: {len(current_generation)}")
print("Top models from the final generation based on loss:")
for i, (loss, model) in enumerate(sorted_models[:5]): # Display top 5 models
print(f"Rank {i+1}, Loss: {loss:.4f}, Model: {tree_to_equation(model)}")