diff --git a/FSP.py b/FSP.py index 19c2171..1874aac 100644 --- a/FSP.py +++ b/FSP.py @@ -24,7 +24,7 @@ def gen_data(self, pi, beta, eta): #import pdb; pdb.set_trace() sigma = [] for p in range(self.num_players): - sigma.append((1-eta)*pi[p]+eta*beta[p]) + sigma.append((1-eta)*pi[p]+eta*beta[p]) # this step might be wrong D = [[] for i in range(self.num_players)] for i in range(self.n): res = self.play_game(sigma) diff --git a/README.md b/README.md index 3379781..af42dc4 100644 --- a/README.md +++ b/README.md @@ -1 +1,25 @@ # General Sum Off-Belief Learning + +run main.py for rl, obl or ot-rl. Run main\_FSP.py for fictitious self play, the following options can be used for either (although some will not have any effect on FSP). + +options: + **--lvls** LEVELS + Select number of OBL/OT_RL levels to run through, defaults to 10. + **--game** kuhn/leduc + Choose either kuhn poker or leduc hold 'em. + **-ab, --avg_bel** + Generate an averaged belief (over levels), and use this in OBL. + **-ap, --avg_pol** + Generate the averaged policy across levels and use this when evaluating. + **-al, --avg_learn** + When carrying out OBL, use the opponent's averaged policy to find their action. + **-a, --all_avg** + Averaged belief, policy and learning. + **--debug** + Prints out debugging information. + **-v** + Prints out some information about progress. + **--obl** + Uses OBL to learn. + **--ot_rl** + Uses OT-RL to learn, updates lower level policies based on the distribution induced in higher levels. diff --git a/main.py b/main.py index 1797e53..3a0a164 100644 --- a/main.py +++ b/main.py @@ -9,7 +9,7 @@ import logging from multiprocessing import Pool log = logging.getLogger(__name__) -NUM_LOOPS=10 +NUM_LOOPS=1 def run(options, games_per_lvl=100000, exploit_freq= 1): num_lvls = options["num_lvls"]