-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathothello_ai.py
253 lines (226 loc) · 11.4 KB
/
othello_ai.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
"""
All AI algorithms are handled in this file
"""
from copy import deepcopy
from random import choice
from time import sleep
from pickle import load
from os.path import join
from othello import Othello
# Board score based on position used for heuristic function
val1 = [[10, -3, 2, 1, 1, 2, -3, 10],
[-3, -5, 0, 0, 0, 0, -5, -3],
[2, 0, 0, 0, 0, 0, 0, 2],
[1, 0, 0, 0, 0, 0, 0, 1],
[1, 0, 0, 0, 0, 0, 0, 1],
[2, 0, 0, 0, 0, 0, 0, 2],
[-3, -5, 0, 0, 0, 0, -5, -3],
[10, -3, 2, 1, 1, 2, -3, 10]]
val2 = [[10, -3, 2, 1, 1, 2, -3, 10],
[-3, -5, -1, -1, -1, -1, -5, -3],
[2, -1, 2, 0, 0, 2, -1, 2],
[1, -1, 0, 0, 0, 0, -1, 1],
[1, -1, 0, 0, 0, 0, -1, 1],
[2, -1, 2, 0, 0, 2, -1, 2],
[-3, -5, -1, -1, -1, -1, -5, -3],
[10, -3, 2, 1, 1, 2, -3, 10]]
val3 = [[30, -9, 2, 1, 1, 2, -9, 30],
[-9, -5, 0, -1, -1, 0, -5, -9],
[3, 0, 2, 0, 0, 2, 0, 3],
[1, -1, 0, 0, 0, 0, -1, 1],
[1, -1, 0, 0, 0, 0, -1, 1],
[3, 0, 2, 0, 0, 2, 0, 3],
[-9, -5, 0, -1, -1, 0, -5, -9],
[30, -9, 2, 1, 1, 2, -9, 30]]
# Given an othello object and ai level, return the move made by the ai
def AI_move(othello, level, delay):
if level == 1: return level1(othello, delay)
if level == 2: return level2(othello, delay)
if level == 3: return level3(othello)
if level == 4: return level4(othello)
if level == 5: return level5(othello)
if level == 6: return level6(othello)
# Level 1 AI: make completely random moves
def level1(ot, delay):
moves = ot.get_possible_moves()
if delay: sleep(0.5)
return choice(moves)
# Level 2 AI: tries to capture corners and prevent capturing buffer pieces
def level2(ot, delay):
moves = ot.get_possible_moves()
score_move = [-99999, []]
for move in moves:
temp_ot = deepcopy(ot)
temp_ot.make_move(move)
new_score = negamax(temp_ot, 0, -99999, 99999, 1) * (1 if temp_ot.turn == 2 else -1)
if new_score > score_move[0]: score_move = [new_score, [move]]
elif new_score == score_move[0]: score_move[1].append(move)
if delay: sleep(0.5)
return choice(score_move[1])
# Level 3 AI: looks 2 moves ahead, evaluate a board state based on a trained model
def level3(ot):
moves = ot.get_possible_moves()
score_move = [-99999, []]
for move in moves:
temp_ot = deepcopy(ot)
temp_ot.make_move(move)
new_score = negamax(temp_ot, 1, -99999, 99999, 2) * (1 if temp_ot.turn == 2 else -1)
if new_score > score_move[0]: score_move = [new_score, [move]]
elif new_score == score_move[0]: score_move[1].append(move)
return choice(score_move[1])
# Level 4 AI: looks 3 moves ahead based on model, looks 4 moves ahead based on piece heuristic in endgame
def level4(ot):
moves = ot.get_possible_moves()
score_move = [-99999, []]
for move in moves:
temp_ot = deepcopy(ot)
temp_ot.make_move(move)
if ot.move_no < 53: new_score = negamax(temp_ot, 2, -99999, 99999, 2) * (1 if temp_ot.turn == 2 else -1)
else: new_score = negamax(temp_ot, 3, -99999, 99999, 4) * (1 if temp_ot.turn == 2 else -1)
if new_score > score_move[0]: score_move = [new_score, [move]]
elif new_score == score_move[0]: score_move[1].append(move)
return choice(score_move[1])
# Level 5 AI: TODO
def level5(ot):
moves = ot.get_possible_moves()
score_move = [-99999, []]
for move in moves:
temp_ot = deepcopy(ot)
temp_ot.make_move(move)
if ot.move_no < 50: new_score = negamax(temp_ot, 2, -99999, 99999, 2) * (1 if temp_ot.turn == 2 else -1)
else: new_score = negamax(temp_ot, 6, -99999, 99999, 4) * (1 if temp_ot.turn == 2 else -1)
if new_score > score_move[0]: score_move = [new_score, [move]]
elif new_score == score_move[0]: score_move[1].append(move)
return choice(score_move[1])
# Level 6 AI: TODO
def level6(ot):
moves = ot.get_possible_moves()
score_move = [-99999, []]
for move in moves:
temp_ot = deepcopy(ot)
temp_ot.make_move(move)
if ot.move_no < 50: new_score = negamax(temp_ot, 0, -99999, 99999, 3) * (1 if temp_ot.turn == 2 else -1)
else: new_score = negamax(temp_ot, 10, -99999, 99999, 4) * (1 if temp_ot.turn == 2 else -1)
new_score = negamax(temp_ot, 2, -99999, 99999, 3) * (1 if temp_ot.turn == 2 else -1)
if new_score > score_move[0]: score_move = [new_score, [move]]
elif new_score == score_move[0]: score_move[1].append(move)
return choice(score_move[1])
# Perform negamax algorithm with alpha beta pruning, with the required depth and heuristic evaluator type
def negamax(ot, depth, alpha, beta, type):
if depth == 0 or ot.check_victory() != 0:
if type == 1: return heuristic_eval1(ot.board)
if type == 2: return heuristic_eval2(ot.board, ot.turn, ot.move_no)
if type == 3: return heuristic_eval3(ot.board, ot.turn, ot.move_no)
if type == 4: return heur_pieces(ot.board)
moves = ot.get_possible_moves()
ot.check_no_move(moves)
score = -99999
for move in moves:
temp_ot = deepcopy(ot)
temp_ot.make_move(move)
new_score = negamax(temp_ot, depth - 1, -beta, -alpha, type) * (1 if ot.turn == 1 else -1)
score = max(score, new_score)
alpha = max(alpha, score)
if beta <= alpha: break
return score * (1 if ot.turn == 1 else -1)
# Evaluate a board state based on some predetermined tile score, roughly gained through learning process
def heuristic_eval1(state):
return sum(val1[i][j] * (1 if state[i][j] == 1 else -1 if state[i][j] == 2 else 0) for i in range(8) for j in range(8))
# Evaluate a board state based on some trained model by regression supervised learning
def heuristic_eval2(state, turn, move_no):
heuristics = heuristics_all(state, turn)
if move_no >= 50: model = load(open(join("learning_data", "model1.sav"), 'rb'))
elif 41 <= move_no <= 50: model = load(open(join("learning_data", "model2.sav"), 'rb'))
elif 21 <= move_no <= 40: model = load(open(join("learning_data", "model3.sav"), 'rb'))
elif 1 <= move_no <= 20: model = load(open(join("learning_data", "model4.sav"), 'rb'))
return round(model.predict([heuristics])[0])
def heuristic_eval3(state, turn, move_no):
# heuristics = heuristics_all2(state, turn)
# if move_no >= 50: model = load(open(join("learning_data", "model1.sav"), 'rb'))
# elif 41 <= move_no <= 50: model = load(open(join("learning_data", "model2.sav"), 'rb'))
# elif 21 <= move_no <= 40: model = load(open(join("learning_data", "model3.sav"), 'rb'))
# elif 1 <= move_no <= 20: model = load(open(join("learning_data", "model4.sav"), 'rb'))
# return round(model.predict([heuristics])[0])
return sum(val1[i][j] * (1 if state[i][j] == 1 else -1 if state[i][j] == 2 else 0) for i in range(8) for j in range(8))
# if move_no >= 41:
# a, b, c, d, e = 1, 1, 1, 5, 4
# elif 21 <= move_no <= 40:
# a, b, c, d, e = 0.5, 1.5, 1, 3, 7
# elif move_no <= 20:
# a, b, c, d, e = 0, 2, 2, 1, 10
# score = heur_pieces(state) * a + heur_weight2(state) * b + heur_mobility(state) * c + heur_parity(state, turn) * d + heur_stablility(state) * e
# #score = heur_weight2(state)
# return score
# Returns a list of all the heuristic values ordered as the following
def heuristics_all(board, turn):
return [heur_pieces(board), heur_weight1(board), heur_mobility(board), heur_parity(board, turn), heur_stablility(board)]
def heuristics_all2(board, turn):
return [heur_pieces(board), heur_weight2(board), heur_mobility(board), heur_parity(board, turn), heur_stablility(board)]
# Returns the difference between the black pieces and white pieces
def heur_pieces(board):
black_tiles, white_tiles = count_tiles(board)
return black_tiles - white_tiles
# Returns a weighted score of a board state based on different tile weights
def heur_weight1(board):
return sum(val2[i][j] * (1 if board[i][j] == 1 else -1 if board[i][j] == 2 else 0) for i in range(8) for j in range(8))
def heur_weight2(board):
return sum(val3[i][j] * (1 if board[i][j] == 1 else -1 if board[i][j] == 2 else 0) for i in range(8) for j in range(8))
# Returns the difference between the possible move that can be made by black and white on a certain state
def heur_mobility(board):
temp_ot = Othello(8, 8)
temp_ot.board = board
temp_ot.turn = 1
moves_black = len(temp_ot.get_possible_moves())
temp_ot.turn = 2
moves_white = len(temp_ot.get_possible_moves())
return moves_black - moves_white
# Returns 1 if black is expected to make the last move in the game, otherwise returns -1
def heur_parity(board, turn):
black_tiles, white_tiles = count_tiles(board)
empty_tiles = 64 - black_tiles - white_tiles
return (-1 * (1 if turn == 1 else -1) if empty_tiles % 2 == 0 else 1 * (1 if turn == 1 else -1))
# Returns the difference of lower bound of stable black and white tiles on the sides only
def heur_stablility(board):
sides = [[board[0][i] for i in range(8)], [board[7][i] for i in range(8)], [board[i][0] for i in range(8)], [board[i][7] for i in range(8)]]
score = sum(check_stable_side(side) for side in sides)
score += sum((1 if board[x[0]][x[1]] == 1 else - 1) if board[x[0]][x[1]] != 0 else 0 for x in [(0, 0), (0, 7), (7, 0), (7, 7)])
return score
# Returns the difference of stable black and white tiles on a side excluding the corner
def check_stable_side(tiles):
if all(tile != 0 for tile in tiles):
return sum((1 if tiles[i] == 1 else -1) for i in range(1, 7, 1))
score = 0
if tiles[0] != 0:
for i in range(6):
if tiles[1 + i] == tiles[0]: score += (1 if tiles[0] == 1 else -1)
else: break
if tiles[7] != 0:
for i in range(6):
if tiles[6 - i] == tiles[7]: score += (1 if tiles[7] == 1 else -1)
else: break
return score
# Given a board, returns the number of black tiles, number of white tiles
def count_tiles(board):
black_tiles, white_tiles = 0, 0
for i in range(8):
for j in range(8):
if board[i][j] == 1: black_tiles += 1
if board[i][j] == 2: white_tiles += 1
return black_tiles, white_tiles
# Used for training purpose to gain a rough estimate value for different tiles
def get_values(board, turn):
a, b, c, d, e, f, g, h, i, j = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
for p in range(8):
for q in range(8):
mul = (1 if board[p][q] == 1 else -1 if board[p][q] == 2 else 0)
if (p, q) in [(0, 0), (0, 7), (7, 0), (7, 7)]: a += 1 * mul
if (p, q) in [(0, 1), (1, 0), (0, 6), (1, 7), (6, 0), (7, 1), (6, 7), (7, 6)]: b += 1 * mul
if (p, q) in [(0, 2), (2, 0), (0, 5), (2, 7), (5, 0), (7, 2), (5, 7), (7, 5)]: c += 1 * mul
if (p, q) in [(0, 3), (3, 0), (0, 4), (3, 7), (4, 0), (7, 3), (4, 7), (7, 4)]: d += 1 * mul
if (p, q) in [(1, 1), (1, 6), (6, 1), (6, 6)]: e += 1 * mul
if (p, q) in [(1, 2), (2, 1), (1, 5), (2, 6), (5, 1), (6, 2), (5, 6), (6, 5)]: f += 1 * mul
if (p, q) in [(1, 3), (3, 1), (1, 4), (3, 6), (4, 1), (6, 3), (4, 6), (6, 4)]: g += 1 * mul
if (p, q) in [(2, 2), (2, 5), (5, 2), (5, 5)]: h += 1 * mul
if (p, q) in [(2, 3), (2, 4), (3, 2), (3, 5), (4, 2), (4, 5), (5, 3), (5, 4)]: i += 1 * mul
if (p, q) in [(3, 3), (4, 4), (4, 4), (3, 3)]: j += 1 * mul
return [a, b, c, d, e, f, g, h, i, j, turn]