-
Notifications
You must be signed in to change notification settings - Fork 0
/
farkle.py
278 lines (237 loc) · 7.99 KB
/
farkle.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
"""
a system that learns the dice game Farkle.
Takeaways:
1. large batch size is critical when there is high variance in outcomes;
otherwise what a system absorbs is randomized by the scores between batches
2. setting learning rate is hard; this is why ugly addition-based approaches
work well and are the standard method.
3. corollary: regularization sucks but is a reasonable way to keep weights
in a reasonable size range
"""
from collections import Counter
import numpy as np
import matplotlib.pyplot as plt
class Farkle:
def __init__(self, verbosity=0):
self.current_score = 0
# self.theta = [20.0, -1, -.05, .05, 20]
self.theta = np.random.rand(4)
self.score_history = []
self.v = verbosity
@staticmethod
def roll(n):
# roll n dice (static because it does not affect game state)
return np.random.randint(1, 7, size=n)
@staticmethod
def score(dice, greedy=0):
# take a roll; return (score, number of leftover dice)
# static because score does not affect game state
n = len(dice)
if n > 6:
print('invalid roll')
return False
c = Counter(dice)
# 6 of a kind
if max(c.values()) == 6:
return 3000, 0
# straight
elif max(c.values()) == 1 and n == 6:
return 1500, 0
# 5 of a kind
elif max(c.values()) == 5:
if c[1] == 1:
return 2100, 0
elif c[5] == 1:
return 2050, 0
else:
return 2000, n - 5
# 4 of a kind, full house
elif max(c.values()) == 4:
# full house
if min(c.values()) == 2:
return 1500, 0
if c[1] == 1 and c[5] == 1:
return 1150, 0
# less than 6 used
if greedy:
if c[1] == 1:
return 1100, n - 5
elif c[5] == 1:
return 1050, n - 5
else:
return 1000, n - 4
else:
return 1500, n - 4
# 3 of a kind
elif max(c.values()) == 3:
# double triple
if list(c.values()) == [3, 3]:
return 2500, 0
# single three-of-a-kind
else:
# score three of a kind
three_of = c.pop(c.most_common(1)[0][0])
if three_of == 1:
temp_score = 300
else:
temp_score = three_of * 100
used = 3
# score remainder
if greedy or list(c.keys()) in ([1, 5], [5, 1]):
# use all if possible or if greedy
temp_score += c[1] * 100
used += c[1]
temp_score += c[5] * 50
used += c[5]
return temp_score, n - used
else: # use only 3 of a kind otherwise
return temp_score, n - 3
# 3 pairs
elif list(c.values()) == [2, 2, 2]:
return 1500, 0
# nothing
else:
if greedy or list(c.keys()) == [1, 5] or list(c.keys()) == [5, 1]:
temp_score = c[1] * 100
used = c[1]
temp_score += c[5] * 50
used += c[5]
return temp_score, n - used
else:
if c[1]:
return 100, n - 1
elif c[5]:
return 50, n - 1
else:
return 0, n
def play(self):
self.current_score = 0
self._play(6)
def _play(self, num_dice):
dice = self.roll(num_dice)
if self.v:
print('rolled {}:'.format(num_dice), dice)
temp_score, remaining_dice = self.score(dice)
if temp_score:
if self.v:
print('scored', temp_score)
self.current_score += temp_score
if not remaining_dice:
remaining_dice = 6
decision = self.decide(remaining_dice)
if decision:
self._play(remaining_dice)
else:
if self.v:
print('quit while ahead w/ {}'.format(self.current_score))
self.score_history.append(self.current_score)
return self.current_score
# fail
else:
if self.v:
if self.current_score:
print('you lost {} points by being greedy'
.format(self.current_score))
else:
print('unlucky, score = 0')
self.score_history.append(0)
return 0
def decide(self, num_dice):
# return True to continue or False to stop.
score = self.current_score
t = self.theta
activation = sum([num_dice/6 * t[0],
score/3000 * t[1],
score/num_dice/500 * t[2]])
# np.log10(score + .000001) * t[1],
# num_dice / (score + 1) * self.theta[3]])
return activation > t[-1]
def display(self):
plt.figure()
for dice in range(1, 7):
for i in range(31):
self.current_score = 100 * i
choice = self.decide(dice)
if choice:
plt.scatter(dice, self.current_score, c='green', label='continue')
else:
plt.scatter(dice, self.current_score, c='red', label='stop')
plt.title('decision boundary; red=stop')
plt.xlabel('dice remaining')
plt.ylabel('score')
plt.show()
# histogram
plt.figure()
plt.title('score distribution')
plt.xlabel('score')
plt.ylabel('frequency')
plt.hist(self.score_history, bins=40)
plt.show()
if __name__ == '__main__':
f = Farkle()
best_score = 0
best_theta = f.theta.copy()
games = 8000
f.score_history = []
# baseline
for game in range(games):
f.play()
# f.display()
last = np.mean(f.score_history)
best_time = 0
delta = 100
print('running {} games per epoch for each weight'.format(games))
print('initial score = {}, theta={}'.format(last, f.theta))
# change parameters, if it works keep it
for step in range(50):
if last == 0:
last = 1
# gradient - score(theta + delta) - score(theta)
gradient = np.zeros(len(f.theta))
if step - best_time > 4:
delta /= 4
best_time = step
print('delta =', delta)
if delta < 10:
# end when changes are done
f.theta = best_theta.copy()
break
# compute partials (by brute force)
for w in range(len(f.theta)):
# save old weight
temp = f.theta[w]
# try increasing theta[w]
f.theta[w] += delta
f.score_history = []
for _ in range(games):
f.play()
plus = np.mean(f.score_history) / last
# try decreasing
f.theta[w] = temp - delta
f.score_history = []
for _ in range(games):
f.play()
minus = np.mean(f.score_history) / last
gradient[w] = plus - minus
# reset theta
f.theta[w] = temp
f.theta += np.clip(gradient * delta, -delta/10, delta/10)
print('for step=', step)
print('gradient =', gradient)
print('theta set =', f.theta)
# test new position
f.score_history = []
for _ in range(games):
f.play()
current = np.mean(f.score_history)
print('new score =', current)
if current > best_score:
best_time = step
best_theta = f.theta.copy()
best_score = current
last = current
f.theta = best_theta.copy()
print(f.theta)
f.v = 1
f.play()
f.display()