-
Notifications
You must be signed in to change notification settings - Fork 0
/
gamecomponents.py
82 lines (56 loc) · 2.09 KB
/
gamecomponents.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
from typing import List, Dict, NamedTuple, Optional
import math
import random
import statistics
## object used to represent the actions
class Action(object):
max_ind = 2
def __init__(self, index: int):
self.index = index
def __hash__(self):
return self.index
def __eq__(self, other):
return self.index == other.index
def __gt__(self, other):
return self.index > other.index
def RandomAction() -> Action:
return Action(random.randint(0,Action.max_ind-1))
## Policy object containing policy information and providing basic calculations
# (NB Not present in the original pseudocode)
class Policy(object):
action_list = []
def __init__(self, vals: Optional[List[float]] = None):
if vals is None:
self.content = [0.5 for _ in Policy.action_list]
else:
self.content = vals
def as_list(self):
return self.content
def as_dict(self):
return {a: self.content[0][a.__hash__()] for a in Policy.action_list}
def items(self):
return zip(Policy.action_list,self.content)
def as_probabilities(self):
mina = statistics.mean(self.content)
tmp = [math.exp(a-mina) for a in self.content]
policy_sum = sum(tmp)
tmp = [i/policy_sum for i in tmp]
return zip(Policy.action_list,tmp)
## Not relevant for cartpole
class Player:
pass
## Container for history of actions over a game instance
class ActionHistory(object):
def __init__(self, history: List[Action], action_space_size: int):
self.history = list(history)
self.action_space_size = action_space_size
def clone(self):
return ActionHistory(self.history, self.action_space_size)
def add_action(self, action: Action):
self.history.append(action)
def last_action(self) -> Action:
return self.history[-1]
def action_space(self) -> List[Action]:
return [Action(i) for i in range(self.action_space_size)]
def to_play(self) -> Player:
return Player()