-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathucb.py
146 lines (131 loc) · 4.18 KB
/
ucb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# Cmput 455 sample code
# UCB algorithm
# Written by Martin Mueller
from math import log,sqrt
from board_util import GoBoardUtil
from pattern_util import PatternUtil
import sys
#from gtp_connection import point_to_coord, format_point
INFINITY = float('inf')
EMPTY = 0
BLACK = 1
WHITE = 2
BORDER = 3
PASS = None
MAXSIZE = 25
def mean(stats, i):
return stats[i][0] / stats[i][1]
def ucb(stats, C, i, n):
if stats[i][1] == 0:
return INFINITY
return mean(stats, i) + C * sqrt(log(n) / stats[i][1])
def findBest(stats, C, n):
best = -1
bestScore = -INFINITY
for i in range(len(stats)):
score = ucb(stats, C, i, n)
if score > bestScore:
bestScore = score
best = i
assert best != -1
return best
def bestArm(stats): # Most-pulled arm
best = -1
bestScore = -INFINITY
for i in range(len(stats)):
if stats[i][1] > bestScore:
bestScore = stats[i][1]
best = i
assert best != -1
return best
# tuple = (move, percentage, wins, pulls)
def byPercentage(tuple):
return tuple[1]
# tuple = (move, percentage, wins, pulls)
def byPulls(tuple):
return tuple[3]
def writeMoves(board, moves, stats):
gtp_moves = []
for i in range(len(moves)):
if moves[i] != None:
x, y = point_to_coord(moves[i], board.size)
pointString = format_point((x,y))
#else:
# pointString = 'Pass'
if stats[i][1] != 0:
gtp_moves.append((pointString,
stats[i][0]/stats[i][1]))
#stats[i][0],
#stats[i][1]))
else:
gtp_moves.append((pointString,
0.0))
#stats[i][0],
#stats[i][1]))
#sys.stderr.write("Statistics: {}\n"
# .format(sorted(gtp_moves, key = byPulls,
# reverse = True)))
sorted(gtp_moves, key = byPercentage, reverse = True)
points = []
probs = []
for pair in gtp_moves:
points.append(pair[0])
probs.append(pair[1])
total = sum(probs)
probs_out = []
for probability in probs:
probs_out.append(round(probability/total, 3))
return points, probs_out
def simulate(board, move, toplay):
"""
Run a simulate game for a given move.
"""
cboard = board.copy()
cboard.play_move(move, toplay)
opp = GoBoardUtil.opponent(toplay)
return PatternUtil.playGame(cboard,
opp,
komi=0,
limit=100,
random_simulation = True, #implement a way to change this accordingly
use_pattern = False, #implement a way to change this accordingly
check_selfatari = False)
def runUcb(board, C, moves, toplay, sim_num, get_best):
stats = [[0,0] for _ in moves]
num_simulation = len(moves) * sim_num
for n in range(num_simulation):
moveIndex = findBest(stats, C, n)
result = simulate(board, moves[moveIndex], toplay)
if result == toplay:
stats[moveIndex][0] += 1 # win
stats[moveIndex][1] += 1
if get_best:
bestIndex = bestArm(stats)
best = moves[bestIndex]
return best
else:
return writeMoves(board, moves, stats)
#return best
def point_to_coord(point, boardsize):
"""
Transform point given as board array index
to (row, col) coordinate representation.
Special case: PASS is not transformed
"""
if point == PASS:
return PASS
else:
NS = boardsize + 1
return divmod(point, NS)
def format_point(move):
"""
Return move coordinates as a string such as 'a1', or 'pass'.
"""
column_letters = "ABCDEFGHJKLMNOPQRSTUVWXYZ"
#column_letters = "abcdefghjklmnopqrstuvwxyz"
if move == PASS:
return "pass"
row, col = move
if not 0 <= row < MAXSIZE or not 0 <= col < MAXSIZE:
raise ValueError
return column_letters[col - 1]+ str(row)