Skip to content

Commit

Permalink
added arguments to main, improved avg bel
Browse files Browse the repository at this point in the history
  • Loading branch information
lukearcus committed Aug 16, 2022
1 parent 4ea5a2f commit ca8208f
Show file tree
Hide file tree
Showing 3 changed files with 132 additions and 60 deletions.
47 changes: 32 additions & 15 deletions UI/plot_funcs.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,30 @@
import matplotlib.pyplot as plt
import numpy as np
from scipy.ndimage.filters import gaussian_filter1d
from matplotlib import cm
from matplotlib.colors import Normalize

LABEL_SETS = {
'kuhn_policy': {'x':("bet","check"),'y': ("1 low", "2 low", "3 low", "1 high", "2 high", "3 high")},
'kuhn_belief': {'x':("1","2","3"),'y':("1 low", "2 low", "3 low", "1 high", "2 high", "3 high")},
'kuhn_policy': {'x_ticks':("bet","check"),'y_ticks': ("1, no bets", "2, no bets", "3, no bets", "1, bet", "2, bet", "3, bet"),'x_label':"action",'y_label':"state"},
'kuhn_belief': {'x_ticks':("1","2","3"),'y_ticks':("1, no bets", "2, no bets", "3, no bets", "1, bet", "2, bet", "3, bet"),'x_label':"opponent card",'y_label':"state"},
}

def plot_everything(pols, bels, game, reward):
fig = plt.figure()
fig = plt.figure(figsize=[16, 12])
if bels is not None:
subfigs = fig.subfigures(1,2)
multiple_heatmaps(pols, subfigs[0], game + "_policy")
subfigs = fig.subfigures(1,3, width_ratios=[3,3,1])
multiple_heatmaps(pols, subfigs[0], game + "_policy", labels=True)
subfigs[0].suptitle('Policies', fontsize=32)
multiple_heatmaps(bels, subfigs[1], game+ "_belief")
subfigs[1].suptitle('Beliefs', fontsize = 32)
cbar_ax = subfigs[2].add_axes([0.15, 0.15, 0.05, 0.7])
subfigs[2].colorbar(cm.ScalarMappable(norm=Normalize(vmin=0,vmax=1)), cax=cbar_ax)
#fig.subplots_adjust(right=0.8)
#cbar_ax = fig.add_axes([0.85, 0.15, 0.05, 0.7])
#fig.colorbar(cm.ScalarMappable(norm=Normalize(vmin=0,vmax=1)), cax=cbar_ax)
fig2 = plt.figure()
ax = fig2.subplots()
reward_smoothed(reward, fig2)
reward_smoothed(reward, ax)
else:
multiple_heatmaps(pols, fig, game + "_policy")
fig.suptitle('Policies', fontsize=32)
Expand All @@ -28,36 +35,46 @@ def reward_smoothed(reward, ax):
ax.plot(gaussian_filter1d(reward,1000))


def multiple_heatmaps(im_list, fig, label_name, overlay_vals=False):
def multiple_heatmaps(im_list, fig, label_name, labels = False, overlay_vals=False):

num_ims = len(im_list)
axs = fig.subplots(num_ims,2)
if num_ims > 1:
big_axes = fig.subplots(nrows=num_ims, ncols=1, sharey=True)
if num_ims > 1:
if labels:
big_axes = fig.subplots(nrows=num_ims, ncols=1, sharey=True)

for row, big_ax in enumerate(big_axes, start=0):
big_ax.set_title("Level %s \n" % row, fontsize=16)
for row, big_ax in enumerate(big_axes, start=0):
big_ax.set_ylabel("Level %s" % row, fontsize=16)
big_ax.set_facecolor('0.85')

big_ax.tick_params(colors=(1.,1.,1., 0.0), top='off', bottom='off', left='off', right='off')
big_ax._frameon = False
big_ax.tick_params(colors=(1.,1.,1., 0.0), top='off', bottom='off', left='off', right='off')
big_ax._frameon = False

plt.subplots_adjust(
hspace=0.4)
for i, im in enumerate(im_list):
plot_heatmap(im[0], axs[i,0], label_name, overlay_vals)
plot_heatmap(im[1], axs[i,1], label_name, overlay_vals)
axs[0, 0].set_title("agent 1")
axs[0, 1].set_title("agent 2")
else:
for i, im in enumerate(im_list):
plot_heatmap(im[0], axs[0], label_name, overlay_vals)
plot_heatmap(im[1], axs[1], label_name, overlay_vals)
axs[0].set_title("agent 1")
axs[1].set_title("agent 2")

fig.set_facecolor('w')


def plot_heatmap(im, ax, label_name, overlay_vals=False):
ax.imshow(im, vmin=0,vmax=1)
x_label_list = LABEL_SETS[label_name]["x"]
y_label_list = LABEL_SETS[label_name]["y"]
x_label_list = LABEL_SETS[label_name]["x_ticks"]
y_label_list = LABEL_SETS[label_name]["y_ticks"]
ax.set_xticks(np.arange(len(x_label_list)),labels=x_label_list)
ax.set_yticks(np.arange(len(y_label_list)),labels=y_label_list)
ax.set_xlabel(LABEL_SETS[label_name]["x_label"])
ax.set_ylabel(LABEL_SETS[label_name]["y_label"])
plt.setp(ax.get_xticklabels(), rotation=45, ha='right', rotation_mode='anchor')
if overlay_vals:
for (j,i),label in np.ndenumerate(im):
Expand Down
127 changes: 86 additions & 41 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,28 +3,72 @@
import agents.learners as learners
from UI.plot_funcs import plot_everything
from functions import play_game
import numpy as np
import sys

game = Kuhn_Poker_int_io()
num_lvls = 1
games_per_lvl=100000
num_players = 2
RL_learners = [learners.actor_critic(learners.softmax, learners.value_advantage, 2, 6, extra_samples = 0)\
for p in range(num_players)]
fict_game = Fict_Kuhn_int()

#players = [RL(RL_learners[p],p) for p in range(num_players)]
players = [OBL(RL_learners[p], p, fict_game) for p in range(num_players)]

for p in range(num_players):
curr_player = players.pop(p)
if curr_player.belief is not None:
curr_player.set_other_players(players)
players.insert(p, curr_player)

reward_hist = [[0 for i in range(games_per_lvl)] for lvl in range(num_lvls)]
pol_hist = []
belief_hist = []
for lvl in range(num_lvls):
def main():
game = Kuhn_Poker_int_io()
if len(sys.argv) > 1:
if '--lvls' in sys.argv:
level_ind = sys.argv.index('--lvls')
if len(sys.argv) > level_ind:
try:
num_lvls = int(sys.argv[level_ind+1])
except TypeError:
print("Please enter a numerical value for number of levels")
return -1
else:
print("Please enter number of levels")
return(-1)
else:
num_lvls = 10
averaged ='--avg' in sys.argv or '-a' in sys.argv
games_per_lvl=100000

num_players = 2
RL_learners = [learners.actor_critic(learners.softmax, learners.value_advantage, 2, 6, extra_samples = 0)\
for p in range(num_players)]
fict_game = Fict_Kuhn_int()

#players = [RL(RL_learners[p],p) for p in range(num_players)]
players = [OBL(RL_learners[p], p, fict_game) for p in range(num_players)]

for p in range(num_players):
curr_player = players.pop(p)
if curr_player.belief is not None:
curr_player.set_other_players(players)
players.insert(p, curr_player)

reward_hist = [[0 for i in range(games_per_lvl)] for lvl in range(num_lvls)]
pol_hist = []
belief_hist = []
avg_bels = []
for lvl in range(num_lvls):
pols = []
bels = []
for p in players:
pols.append(p.opt_pol)
if p.belief is not None:
p.update_belief()
bels.append(np.copy(p.belief))
else:
bels.append(np.zeros((1,1)))
pol_hist.append(pols)
belief_hist.append(bels)
if averaged:
new_avg_bels = []
for p_id, p in enumerate(players):
total_bel = np.zeros_like(belief_hist[0][p_id])
for i in range(lvl+1):
total_bel += belief_hist[i][p_id]
avg_bel = total_bel / (lvl+1)
p.belief = np.copy(avg_bel)
new_avg_bels.append(avg_bel)
avg_bels.append(new_avg_bels)
for p in players:
p.reset()
for i in range(games_per_lvl):
reward_hist[lvl][i] = float(play_game(players, game))
pols = []
bels = []
for p in players:
Expand All @@ -36,25 +80,26 @@
bels.append(np.zeros((1,1)))
pol_hist.append(pols)
belief_hist.append(bels)
for p in players:
p.reset()
for i in range(games_per_lvl):
reward_hist[lvl][i] = float(play_game(players, game))
pols = []
bels = []
for p in players:
pols.append(p.opt_pol)
if p.belief is not None:
p.update_belief()
bels.append(p.belief)

if averaged:
new_avg_bels = []
for p_id, p in enumerate(players):
total_bel = np.zeros_like(belief_hist[0][p_id])
for i in range(lvl+1):
total_bel += belief_hist[i][p_id]
avg_bel = total_bel / (lvl+1)
new_avg_bels.append(avg_bel)
avg_bels.append(new_avg_bels)
#pol_hist = pol_hist[-5:]
#belief_hist = belief_hist[-5:]

if averaged:
plot_everything(pol_hist, avg_bels, "kuhn", reward_hist[-1])
else:
bels.append(np.zeros((1,1)))
pol_hist.append(pols)
belief_hist.append(bels)

#pol_hist = pol_hist[-5:]
#belief_hist = belief_hist[-5:]

plot_everything(pol_hist, belief_hist, "kuhn", reward)
plot_everything(pol_hist, belief_hist, "kuhn", reward_hist[-1])

import pdb; pdb.set_trace()
return 0

import pdb; pdb.set_trace()
if __name__=="__main__":
main()
18 changes: 14 additions & 4 deletions test_BR.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from UI.plot_funcs import reward_smoothed
from functions import play_game
import matplotlib.pyplot as plt
import numpy as np

game = Kuhn_Poker_int_io()
games_per_lvl=100000
Expand All @@ -28,31 +29,40 @@

reward_hist = []

change = [[], []]
for i in range(games_per_lvl):
old_pol = np.copy(players[0].opt_pol)
reward_hist.append(float(play_game(players, game)))
change[0].append(np.linalg.norm(players[0].opt_pol-old_pol))

R = reward_hist[-100:]
pols = []
pols.append(players[0].opt_pol)
V_1 = players[0].learner.advantage_func.V
#V_1 = players[0].learner.advantage_func.V

players = [fixed_pol(pol[0]), RL(RL_learners[1],1)]

for i in range(games_per_lvl):
old_pol = np.copy(players[1].opt_pol)
reward_hist.append(-float(play_game(players, game)))
change[1].append(np.linalg.norm(players[1].opt_pol-old_pol))

R += reward_hist[-100:]
pols.append(players[1].opt_pol)
V_2 = players[1].learner.advantage_func.V
#V_2 = players[1].learner.advantage_func.V

print(sum(R)/200)
print(pols[0])
print(pols[1])
print(V_1)
print(V_2)
#print(V_1)
#print(V_2)
fig = plt.figure()
ax = fig.subplots()
reward_smoothed(reward_hist, ax)

plt.plot(change[0])
plt.plot(change[1])
plt.show()


import pdb; pdb.set_trace()

0 comments on commit ca8208f

Please sign in to comment.