-
Notifications
You must be signed in to change notification settings - Fork 37
/
payoff_matrix.py
228 lines (190 loc) · 9.59 KB
/
payoff_matrix.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
__author__ = 'elubin'
import numpy
import logging
class PayoffMatrix(object):
"""
A class that encapsulates the notion of a set of payoff matrices for a game (one for each player), and provides
convenience methods for getting the payoff for each player given a strategy set, as well as calculating the
expected payoff given a distribution of players playing each strategy.
"""
def __init__(self, num_players, payoff_matrices):
self.num_player_types = num_players
self.payoff_matrices = payoff_matrices
self.num_strats = []
root = self.payoff_matrices[0]
for i in range(self.num_player_types):
self.num_strats.append(len(root))
root = root[0]
self.verify_payoff_matrix_dimensions()
self.compute_dominated_strategies()
def verify_payoff_matrix_dimensions(self):
"""
Verify that "depth" of each payoff matrix matches number of elements in player_dist
"""
for m in self.payoff_matrices:
self._verify_dimensions(m, self.num_strats[:])
def _verify_dimensions(self, m, num_strats):
"""
Recursive helper function to verify the dimensions of the payoff matrix
"""
if len(num_strats) == 0:
assert isinstance(m, (int, float))
return
n = num_strats.pop(0)
assert n == len(m)
for i in m:
self._verify_dimensions(i, num_strats[:])
def get_payoff(self, recipient, *strats):
"""
Get the payoff for the player index recipient, by specifying the strategies that everyone plays in increasing
player order.
@param recipient: the index of the player for which to get the payoff, 0-indexed
@type recipient: int
@param strats: the iterable of strategies played by each player, in the order of their indices
@type strats: list(int)
@return: the payoff that the recipient gets from all playres playing the given strategy
@rtype: float
"""
matrix = self.payoff_matrices[recipient]
for idx in strats:
matrix = matrix[idx]
return matrix
def get_expected_payoff(self, player_idx, strategy, current_state):
"""
Get the expected payoff if the player at idx player_idx plays indexed by strategy given the current state.
@param player_idx: the index of the player for which to get the expected payoff
@type player_idx: int
@param strategy: the index that the player will play
@type strategy: int
@param current_state: The state of the population(s). Each entry in the parent array refers to a player type, each entry in each sublist refers to the number or
frequency of players playing that strategy.
@type current_state: list
@return: the expected payoff
@rtype: float
"""
return self._iterate_through_players(player_idx, 0, {player_idx: strategy}, 1.0, current_state)
def _iterate_through_players(self, target_player_idx, current_player_idx, other_player_stategies, probability, current_state):
if len(other_player_stategies) == self.num_player_types:
strats = [0] * self.num_player_types
for i in range(self.num_player_types):
strats[i] = other_player_stategies[i]
payoff = self.get_payoff(target_player_idx, *strats)
return payoff * probability
elif current_player_idx in other_player_stategies:
# skip it, we already picked the strategy
return self._iterate_through_players(target_player_idx, current_player_idx + 1, other_player_stategies, probability, current_state)
else:
# iterate over the current player idx dimension, recursively calling yourself on every iteration'
payoff = 0
for strat in range(self.num_strats[current_player_idx]):
n = current_state[current_player_idx][strat]
p = float(n) / current_state[current_player_idx].sum()
dict_copy = other_player_stategies.copy()
#dict_copy = dict(other_player_stategies.items())
dict_copy[current_player_idx] = strat
payoff += self._iterate_through_players(target_player_idx, current_player_idx + 1, dict_copy, probability * p, current_state)
return payoff
def get_all_strategy_tuples(self):
"""
@return: a generator of all strategy tuples representing non-mixed strategies for all players
@rtype: generator
"""
return self._strategy_tuple_helper(0, ())
def _strategy_tuple_helper(self, p, s):
if p == self.num_player_types:
yield s
return
for s_i in range(self.num_strats[p]):
for r in self._strategy_tuple_helper(p + 1, s + (s_i, )):
yield r
def compute_dominated_strategies(self):
# for every strategy for every player, iterate through all strategies for all other players and see if there are
# any strategies that are completely dominated by other strategies
# dominated is a dictionary of sets
# we have a loop to simulate the iterated elimination of dominated strategies
continue_iterating = True
dominated_strategies = set()
while continue_iterating:
continue_iterating = False
for p_i in range(self.num_player_types):
payoffs = []
for s_i in range(self.num_strats[p_i]):
payoffs.append(numpy.array(self._get_all_payoffs(p_i, s_i, dominated_strategies)))
for s_1 in range(self.num_strats[p_i]):
if (p_i, s_1) in dominated_strategies:
continue
# consider s_1 as a dominated strategy
for s_2 in range(self.num_strats[p_i]):
# if s_2 is dominated, we can ignore it. can't both be be dominated and dominate another one
if (p_i, s_2) in dominated_strategies:
continue
if (payoffs[s_2] > payoffs[s_1]).all():
dominated_strategies.add((p_i, s_1))
continue_iterating = True
break
# (player index, strategy) index set of tuples of dominated strategies
self.dominated_strategies = set()
def _get_all_payoffs(self, p, s, dominated):
# get a list of all possible payoffs a given player can get for playing a given strategy
# the list is computed in order, by iterating through all the other strategy pairs for all the other players
# TODO need to ignore payoffs for dominated strategies
return list(self._get_all_payoffs_helper(p, s, 0, (), dominated))
def _get_all_payoffs_helper(self, p, s, cur_p, cur_s, dominated):
if cur_p == self.num_player_types:
yield self.get_payoff(p, *cur_s)
return
elif cur_p == p:
for r in self._get_all_payoffs_helper(p, s, cur_p + 1, cur_s + (s, ), dominated):
yield r
return
else:
for s_i in range(self.num_strats[cur_p]):
if (cur_p, s_i) in dominated:
continue
for r in self._get_all_payoffs_helper(p, s, cur_p + 1, cur_s + (s_i, ), dominated):
yield r
def is_pure_equilibrium(self, s):
assert self.num_player_types == len(s)
strategies = list(s)
for n_i in range(self.num_player_types):
best_payoff = self.get_payoff(n_i, *s)
for s_i in range(self.num_strats[n_i]):
if s_i == s[n_i]:
continue
strategies[n_i] = s_i
p = self.get_payoff(n_i, *strategies)
if p > best_payoff:
return False, n_i, s_i # profitable deviation for player n_i to play s_i instead of s[n_i]
strategies[n_i] = s[n_i]
return True
def is_mixed_equilibrium(self, s):
assert self.num_player_types == len(s)
logging.debug("testing %s", s)
for n_i in range(self.num_player_types):
logging.debug("player %d", n_i)
payoffs = []
for i, s_i in enumerate(s[n_i]):
if s_i > 0:
# get expected payoff of mixing this strategy
payoffs.append((i, self.get_expected_payoff(n_i, i, s)))
logging.debug("payoffs %s", payoffs)
if len(payoffs) > 1:
for i, (idx_i, p) in enumerate(payoffs):
for j, (idx_j, q) in enumerate(payoffs[i:]):
if abs(q - p) > (1e-08 + 1e-05 * abs(p)):
return True, n_i, ((idx_i, p), (idx_j, q))
else:
# only one strategy, this is pure equilibrium
# check to make sure there's no incentive of switching strategies
# get index of pure strategy
s_idx = [i for i, x in enumerate(s[n_i]) if s[n_i][i] > 0][0]
best_payoff = self.get_expected_payoff(n_i, s_idx, s)
logging.debug("Best payoff %f", best_payoff)
for s_i in range(self.num_strats[n_i]):
if s_i == s_idx:
continue
p = self.get_expected_payoff(n_i, s_i, s)
logging.debug("Strategy %d payoff %f", s_i, p)
if p > best_payoff:
return False, n_i, s_i # profitable deviation for player n_i to play s_i instead of s[n_i]
return True