From 9b3005516dd3ac196474a05db2e59bd77df2c9a4 Mon Sep 17 00:00:00 2001 From: lukearcus Date: Thu, 11 Aug 2022 11:40:49 +0100 Subject: [PATCH] Problem: can't learn optimal policy well (when other player plays NE pol we don't learn NE pol) --- main_FSP.py | 6 +++--- test_BR.py | 6 ++++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/main_FSP.py b/main_FSP.py index 287e623..5ecb706 100644 --- a/main_FSP.py +++ b/main_FSP.py @@ -15,10 +15,10 @@ #test extras = 0 -num_BR = 10000 -num_mixed = 1000 +num_BR = 5000 +num_mixed = 5000 iters = 100000 -time = 60 +time = 300 KP_game = game.Kuhn_Poker_int_io() diff --git a/test_BR.py b/test_BR.py index de7d3b8..7312897 100644 --- a/test_BR.py +++ b/test_BR.py @@ -16,6 +16,12 @@ #pol = [np.array([[0.75,0.25],[0.75,0.25],[0.75,0.25],[0.5,0.5],[0.5,0.5],[0.5,0.5]]),\ # np.array([[0.67,0.33],[0.69,0.31],[0.71,0.29],[0.19,0.81],[0.77,0.23],[0.79,0.21]])] pol = [np.array([[2/3, 1/3],[2/3,1/3],[2/3,1/3],[1/3,2/3],[2/3,1/3],[2/3,1/3]]) for i in range(2)] +pol = [np.array([[0.816, 0.184],[0.811,0.189],[0.811,0.189],[0.375,0.625],[0.625,0.375],[0.625,0.376]]),\ + np.array([[0.53, 0.47],[0.771,0.229],[0.775,0.225],[0.159,0.841],[0.842,0.158],[0.838,0.162]])] + +alpha=0.0 +pol = [np.array([[alpha, 1-alpha],[0,1],[3*alpha,1-3*alpha],[0,1],[alpha+1/3,2/3-alpha],[1,0]]),\ + np.array([[1/3, 2/3],[0,1],[1,0],[0,1],[1/3,2/3],[1,0]])] players = [RL(RL_learners[0],0), fixed_pol(pol[1])]