Problem: can't learn optimal policy well (when other player plays NE pol

we don't learn NE pol)
lukearcus · Aug 11, 2022 · 9b30055 · 9b30055
1 parent 8fa28cf
commit 9b30055
Show file tree

Hide file tree

Showing 2 changed files with 9 additions and 3 deletions.
diff --git a/main_FSP.py b/main_FSP.py
@@ -15,10 +15,10 @@
 
 #test
 extras = 0
-num_BR = 10000
-num_mixed = 1000
+num_BR = 5000
+num_mixed = 5000
 iters = 100000
-time = 60
+time = 300
 
 KP_game = game.Kuhn_Poker_int_io()
 

diff --git a/test_BR.py b/test_BR.py
@@ -16,6 +16,12 @@
 #pol = [np.array([[0.75,0.25],[0.75,0.25],[0.75,0.25],[0.5,0.5],[0.5,0.5],[0.5,0.5]]),\
 #       np.array([[0.67,0.33],[0.69,0.31],[0.71,0.29],[0.19,0.81],[0.77,0.23],[0.79,0.21]])]
 pol = [np.array([[2/3, 1/3],[2/3,1/3],[2/3,1/3],[1/3,2/3],[2/3,1/3],[2/3,1/3]]) for i in range(2)]
+pol = [np.array([[0.816, 0.184],[0.811,0.189],[0.811,0.189],[0.375,0.625],[0.625,0.375],[0.625,0.376]]),\
+       np.array([[0.53, 0.47],[0.771,0.229],[0.775,0.225],[0.159,0.841],[0.842,0.158],[0.838,0.162]])]
+
+alpha=0.0
+pol = [np.array([[alpha, 1-alpha],[0,1],[3*alpha,1-3*alpha],[0,1],[alpha+1/3,2/3-alpha],[1,0]]),\
+       np.array([[1/3, 2/3],[0,1],[1,0],[0,1],[1/3,2/3],[1,0]])]
 
 players = [RL(RL_learners[0],0), fixed_pol(pol[1])]