Skip to content

Commit

Permalink
Working OBL!
Browse files Browse the repository at this point in the history
  • Loading branch information
lukearcus committed Aug 31, 2022
1 parent 1918c4e commit c201022
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 4 deletions.
2 changes: 1 addition & 1 deletion agents/players.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def action(self):
class OBL(RL):
belief = 0

def __init__(self, learner, player_id, fict_game, belief_iters = 1000):
def __init__(self, learner, player_id, fict_game, belief_iters = 10000):
self.belief_iters = belief_iters
super().__init__(learner, player_id)
self.fict_game = fict_game
Expand Down
16 changes: 13 additions & 3 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
log = logging.getLogger(__name__)

def main():
logging.basicConfig(level=logging.DEBUG, format='%(relativeCreated)6d %(threadName)s %(message)s')
if len(sys.argv) > 1:
if '--lvls' in sys.argv:
level_ind = sys.argv.index('--lvls')
Expand Down Expand Up @@ -57,6 +56,13 @@ def main():
averaged_bel ='--avg_bel' in sys.argv or '-ab' in sys.argv
averaged_pol ='--avg_pol' in sys.argv or '-ap' in sys.argv
learn_with_avg = '--avg_learn' in sys.argv or '-al' in sys.argv
if '--debug' in sys.argv:
logging.basicConfig(level=logging.DEBUG,\
format='%(relativeCreated)6d %(threadName)s %(message)s')
elif '-v' in sys.argv or '--verbose' in sys.argv:
logging.basicConfig(level=logging.INFO,\
format='%(relativeCreated)6d %(threadName)s %(message)s')

games_per_lvl=100000
exploit_freq= 1

Expand Down Expand Up @@ -96,6 +102,10 @@ def main():
for p in players:
pols.append(p.opt_pol)
if p.belief is not None:
if learn_with_avg:
for p_id, other_p in enumerate(p.other_players):
if other_p != "me":
other_p.opt_pol = players[p_id].opt_pol
p.update_belief()
bels.append(np.copy(p.belief))
else:
Expand Down Expand Up @@ -144,7 +154,7 @@ def main():
p.other_players[other_p_id].opt_pol = other_pol
for p in players:
p.reset()
play_to_convergence(players, game, tol=1e-7)
play_to_convergence(players, game, tol=1e-5)
#for i in range(games_per_lvl):
# reward_hist[lvl][i] = float(play_game(players, game))
times.append(time.perf_counter()-tic)
Expand Down Expand Up @@ -194,7 +204,7 @@ def main():
else:
bel_plot = belief_hist
plot_everything(pol_plot, bel_plot, "kuhn", reward_hist[-1], exploitability)
filename="results/OBL"
filename="results/OBL_all_average"
np.savez(filename, pols=pol_plot, bels=bel_plot, explot=exploitability, rewards=reward_hist)
return 0

Expand Down

0 comments on commit c201022

Please sign in to comment.