-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathMonteCarlo.py
138 lines (118 loc) · 3.94 KB
/
MonteCarlo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import math
import random
from abc import ABC, abstractmethod
def indices(lst, element):
result = []
offset = -1
while True:
try:
offset = lst.index(element, offset+1)
except ValueError:
return result
result.append(offset)
class Node:
def __init__(self, tree, color, state, moves, parent=None):
self.tree = tree
self.color = color # True or False
self.parent = parent
self.wins = 0
self.games = 0
self.state = state
self.moves = moves
self.children = [None for i in moves]
self.winner = self.tree.getWinner(self.state, self.color)
def isLeaf(self):
return self.children == []
def isTerminal(self):
return self.winner != None
def isExpanded(self):
for i in self.children:
if i == None:
return False
return True
def expand(self):
index = random.choice(indices(self.children, None))
newState = self.tree.stateTransition(self.state, self.moves[index], not self.color)
newMoves = self.tree.getMoves(newState, not self.color)
newNode = Node(self.tree, not self.color, newState, newMoves, self)
self.children[index] = newNode
return newNode
class MonteCarloTree(ABC):
def __init__(self, initState = []):
if initState == []:
initState = self.getInitialState()
self.root = Node(self, True, initState, self.getMoves(initState, True))
@staticmethod
@abstractmethod
def getInitialState():
pass
@staticmethod
@abstractmethod
def getMoves(state, moveColor):
pass
@staticmethod
@abstractmethod
def stateTransition(state, move, moveColor):
pass
@staticmethod
@abstractmethod
def getWinner(state, moveColor):
pass
def backPropagate(self, n):
winner = n.winner
while n != None:
n.games += 1
if winner == "draw":
n.wins += .5
elif n.color == winner:
n.wins += 1
n = n.parent
def playout(self, n):
while not n.isTerminal():
if not n.isExpanded():
n = n.expand()
else:
print(n.isTerminal())
print(n.isExpanded())
print(n.children)
for row in n.state:
print(row)
print()
n = random.choice(n.children)
self.backPropagate(n)
def getScore(self, n):
return (n.wins/n.games)+math.sqrt(2)*math.sqrt(math.log(self.root.games)/n.games)
def treePolicy(self):
currentNode = self.root
while not currentNode.isTerminal():
if currentNode.isExpanded():
scores = [self.getScore(n) for n in currentNode.children]
currentNode = currentNode.children[scores.index(max(scores))]
else:
return currentNode.expand()
return currentNode
def iteration(self):
selectedNode = self.treePolicy()
self.playout(selectedNode)
def chooseMove(self):
bestChild = self.root.children[0]
for child in self.root.children[1:]:
if child.games > bestChild.games:
bestChild = child
self.root = bestChild
return self.root.isTerminal()
def decide(self, numIterations=10000):
for _ in range(numIterations):
self.iteration()
gameIsOver = self.chooseMove()
return self.root.state, gameIsOver
@abstractmethod
def printBoard(self):
for row in self.root.state:
print(row)
print()
def __str__(self):
rep = str(self.root.wins) + "/" + str(self.root.games) + "\n"
for child in self.root.children:
rep += str(child.wins) + "/" + str(child.games) + " "
return rep