From 9b3005516dd3ac196474a05db2e59bd77df2c9a4 Mon Sep 17 00:00:00 2001
From: lukearcus <rickard@robots.ox.ac.uk>
Date: Thu, 11 Aug 2022 11:40:49 +0100
Subject: [PATCH] Problem: can't learn optimal policy well (when other player
 plays NE pol we don't learn NE pol)

---
 main_FSP.py | 6 +++---
 test_BR.py  | 6 ++++++
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/main_FSP.py b/main_FSP.py
index 287e623..5ecb706 100644
--- a/main_FSP.py
+++ b/main_FSP.py
@@ -15,10 +15,10 @@
 
 #test
 extras = 0
-num_BR = 10000
-num_mixed = 1000
+num_BR = 5000
+num_mixed = 5000
 iters = 100000
-time = 60
+time = 300
 
 KP_game = game.Kuhn_Poker_int_io()
 
diff --git a/test_BR.py b/test_BR.py
index de7d3b8..7312897 100644
--- a/test_BR.py
+++ b/test_BR.py
@@ -16,6 +16,12 @@
 #pol = [np.array([[0.75,0.25],[0.75,0.25],[0.75,0.25],[0.5,0.5],[0.5,0.5],[0.5,0.5]]),\
 #       np.array([[0.67,0.33],[0.69,0.31],[0.71,0.29],[0.19,0.81],[0.77,0.23],[0.79,0.21]])]
 pol = [np.array([[2/3, 1/3],[2/3,1/3],[2/3,1/3],[1/3,2/3],[2/3,1/3],[2/3,1/3]]) for i in range(2)]
+pol = [np.array([[0.816, 0.184],[0.811,0.189],[0.811,0.189],[0.375,0.625],[0.625,0.375],[0.625,0.376]]),\
+       np.array([[0.53, 0.47],[0.771,0.229],[0.775,0.225],[0.159,0.841],[0.842,0.158],[0.838,0.162]])]
+
+alpha=0.0
+pol = [np.array([[alpha, 1-alpha],[0,1],[3*alpha,1-3*alpha],[0,1],[alpha+1/3,2/3-alpha],[1,0]]),\
+       np.array([[1/3, 2/3],[0,1],[1,0],[0,1],[1/3,2/3],[1,0]])]
 
 players = [RL(RL_learners[0],0), fixed_pol(pol[1])]