-
Notifications
You must be signed in to change notification settings - Fork 0
/
genetic_algoritm.py
218 lines (187 loc) · 9.43 KB
/
genetic_algoritm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
import random
from statistics import mean
import math
class GA:
def __init__(self):
self.phase = "GENETIC ALGORITHM"
# Individuals
self.n_size = 50 # gene size (not used)
self.grid = [[0,1]]*50 # list of list of size self.n_size, self.grid[i] defines the list of possible values for the i-th gene
# Requirement: in self.grid, each element in the list must constain at least 2 elements
# Population of individuals
self.pop_size = 200 # population size (at the end of each iteration, the population size is self.pop_size)
self.population = set() # population (is updated during every iteration)
self.population_history = dict() # individual:fitness dictionary. self.population_history.keys() is a list of all individuals already encountered
self.new_individuals = set() # new individuals added to the population for the next iteration (updated at every iteration)
self.best_individual = None
self.individuals_iter = dict()
self.num_selected = 0 # number of new individuals selected
# Fitness
self.min_fitness = 0.01 # in order to define a strictly positive evaluation
self.average_fitness = 0
self.best_average_fitness = 0
self.max_fitness = 0
# Algorithm
self.iterations = 50 # Stopping criterion (number of iterations)
self.early_stopping = 5 # Stopping criterion (Number of iterations without increasing the average fitness of individuals in the population)
self.stop_early_counter = 0
# Mutation
self.mutation_prob = 0.05 # Probability for a gene to be mutated during the mutation phase
def reset(self):
self.population = set()
self.population_history = dict()
self.new_individuals = set()
self.best_individual = None
self.individuals_iter = dict()
self.average_fitness = 0
self.best_average_fitness = 0
self.max_fitness = 0
self.stop_early_counter = 0
def generate_initial_population(self):
while len(self.population) < self.pop_size:
tmp = tuple([random.choice(j) for j in self.grid])
self.population.add(tmp)
self.new_individuals.add(tmp)
def evaluate_new_individuals(self):
f = self.fitness()
for t in self.new_individuals:
self.population_history[t] = f[t]
self.new_individuals = set()
fitnesses = [self.population_history[t] for t in list(self.population)] # don't compute again what has already been computed
self.average_fitness = mean(fitnesses)
self.max_fitness = max(fitnesses)
# update the counter for early stopping
if self.average_fitness > self.best_average_fitness:
self.stop_early_counter = 0
self.best_average_fitness = self.average_fitness
else:
self.stop_early_counter += 1
return None
def fitness(self): # to be redefined when using it for another application
f = dict()
fitness = lambda t: sum([((-1)**(i+1))*(i+1)*t[i] for i in range(len(t))])
for t in self.new_individuals:
f[t] = fitness(t)
return f
def select_best(self):
# Rule: select the best individuals of the population such that their fitness
# is higher than the average of the fitnesses calculated on the whole population
# (and within the limit, in number, of half the individuals of the population)
pop = list(self.population)
fitnesses = [self.population_history[t] for t in pop] # don't compute again what has already been computed
selected = sorted([(a,b) for a,b in zip(fitnesses, pop) if a > self.average_fitness], reverse = True)
# At least, we need to keep 2 individuals
if len(selected) == 1:
selected = sorted([(a,b) for a,b in zip(fitnesses, pop)], reverse = True)[:2]
if len(selected) >= math.ceil(0.5*self.pop_size):
selected = selected[:math.ceil(0.5*self.pop_size)]
# Update population ()
self.population = set([a for _,a in selected])
self.num_selected = len(self.population)
return None
def crossover(self, t1, t2): # can be improved
# select 2 pivots where to cut individuals
n_size = len(t1)
tmp = [i for i in range(n_size)]
random.shuffle(tmp)
cut1, cut2 = min(tmp[:2]), max(tmp[:2])
# do a crossover between t1 and t2
t1_, t2_ = [], []
for i in range(n_size):
if i < cut1 or i >= cut2:
t1_.append(t1[i])
t2_.append(t2[i])
else:
t1_.append(t2[i])
t2_.append(t1[i])
return(tuple(t1_), tuple(t2_))
def mutate(self, t):
t_ = []
for i in range(len(t)):
if random.random() < self.mutation_prob:
new_val = random.choice([elt for elt in self.grid[i] if not elt == t[i]])
t_.append(new_val)
else:
t_.append(t[i])
return(tuple(t_))
def update_pop(self):
pop_size_before = len(self.population)
fitnesses_pop = [(self.population_history[t], t) for t in self.population] # list of (fitness, individuals) couples
fitnesses = [f for f, _ in fitnesses_pop]
pop_selected = [p for _, p in fitnesses_pop]
# Defines a distribution over the individuals in pop_selected
# In the selection phase of the individuals to be crossed, the individuals with higher
# fitness have more chance to be selected than the individuals with low fitness
# Reminder: fitness is a strictly positive evaluation function (f(t) > 0 for all t)
fit_prob = [i/sum(fitnesses) for i in fitnesses]
while len(self.population) < self.pop_size:
# Select 2 parents
r1 = random.random()
r2 = random.random()
cum = 0
parent1, parent2 = None, None
for i in range(pop_size_before):
cum += fit_prob[i]
if r1 < cum and parent1 == None:
parent1 = pop_selected[i]
if r2 < cum and parent2 == None:
parent2 = pop_selected[i]
if not parent1 == None and not parent2 == None:
break
# Make them reproduce (don't forget to add some genetic diversity
# via the gene mutation principle)
individuals_encountered = set(self.population_history.keys())
if not parent1 == parent2:
t1_, t2_ = self.crossover(parent1, parent2)
t1_ = self.mutate(t1_)
t2_ = self.mutate(t2_)
# Add individuals to the population if they are not carbon
# copies of individuals already encountered in the past
if not t1_ in individuals_encountered:
self.population.add(t1_)
self.new_individuals.add(t1_)
len(self.population)
if not len(self.population) == self.pop_size and not t2_ in individuals_encountered:
self.population.add(t2_)
self.new_individuals.add(t2_)
len(self.population)
return None
def update_individuals_iter(self, i):
for t in self.new_individuals:
self.individuals_iter[t] = i
def print_100_best(self):
# Print the 100 best individuals
# If less than 100 individuals in population history, print all individuals
r = sorted([(self.population_history[t], t) for t in self.population_history.keys()], reverse=True)
for i in range(min(len(r), 100)):
print(f'First best individuals -- rank: {i}, fitness: {r[i][0]}, individual: {r[i][1]}')
def print_all(self):
r = sorted([(self.population_history[t], self.individuals_iter[t], t) for t in self.population_history.keys()], reverse=True)
for i in range(len(r)):
reports.print_report_line(self.phase, 'First best individuals', {"rank": i, "fitness": r[i][0], "iteration": r[i][1], "individual": r[i][2]})
def algo(self):
self.generate_initial_population()
self.update_individuals_iter(0)
self.evaluate_new_individuals()
for i in range(self.iterations):
self.select_best() # reduce the size of the population to the best elements
self.update_pop() # add new individuals to the population (via crossover and mutation)
self.update_individuals_iter(i+1)
self.evaluate_new_individuals()
r = sorted([(self.population_history[t], t) for t in self.population], reverse=True)
self.best_individual = r[0][1]
self.max_fitness = self.population_history[self.best_individual]
if __name__ == "__main__":
# default genetic algorithm tries to find the maximum of a simple function
# f(x1,x2, ..., x50) = sum_i [(-1)^i * xi * i]
# with xi in {0, 1} for all i
# The best and unique solution is (x1, x2, x3, x4, ..., x47, x48, x49, x50) = (0, 1, 0, 1, ..., 0, 1, 0, 1)
# And the maximum is 650
random.seed(42)
ga = GA()
search_space_size = math.prod([len(i) for i in ga.grid])
print(f"Search space size: {search_space_size} individuals")
ga.algo()
print(f"Individuals evaluated: {len(ga.population_history.keys())} individuals")
print(f'Maximum found: {ga.max_fitness}')
print(f'Best individual: {ga.best_individual}')