Skip to content

Commit

Permalink
Problem: can't learn optimal policy well (when other player plays NE pol
Browse files Browse the repository at this point in the history
we don't learn NE pol)
  • Loading branch information
lukearcus committed Aug 11, 2022
1 parent 8fa28cf commit 9b30055
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 3 deletions.
6 changes: 3 additions & 3 deletions main_FSP.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@

#test
extras = 0
num_BR = 10000
num_mixed = 1000
num_BR = 5000
num_mixed = 5000
iters = 100000
time = 60
time = 300

KP_game = game.Kuhn_Poker_int_io()

Expand Down
6 changes: 6 additions & 0 deletions test_BR.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@
#pol = [np.array([[0.75,0.25],[0.75,0.25],[0.75,0.25],[0.5,0.5],[0.5,0.5],[0.5,0.5]]),\
# np.array([[0.67,0.33],[0.69,0.31],[0.71,0.29],[0.19,0.81],[0.77,0.23],[0.79,0.21]])]
pol = [np.array([[2/3, 1/3],[2/3,1/3],[2/3,1/3],[1/3,2/3],[2/3,1/3],[2/3,1/3]]) for i in range(2)]
pol = [np.array([[0.816, 0.184],[0.811,0.189],[0.811,0.189],[0.375,0.625],[0.625,0.375],[0.625,0.376]]),\
np.array([[0.53, 0.47],[0.771,0.229],[0.775,0.225],[0.159,0.841],[0.842,0.158],[0.838,0.162]])]

alpha=0.0
pol = [np.array([[alpha, 1-alpha],[0,1],[3*alpha,1-3*alpha],[0,1],[alpha+1/3,2/3-alpha],[1,0]]),\
np.array([[1/3, 2/3],[0,1],[1,0],[0,1],[1/3,2/3],[1,0]])]

players = [RL(RL_learners[0],0), fixed_pol(pol[1])]

Expand Down

0 comments on commit 9b30055

Please sign in to comment.