diff --git a/FSP.py b/FSP.py
index 19c2171..1874aac 100644
--- a/FSP.py
+++ b/FSP.py
@@ -24,7 +24,7 @@ def gen_data(self, pi, beta, eta):
         #import pdb; pdb.set_trace()
         sigma = []
         for p in range(self.num_players):
-            sigma.append((1-eta)*pi[p]+eta*beta[p])
+            sigma.append((1-eta)*pi[p]+eta*beta[p]) # this step might be wrong
         D = [[] for i in range(self.num_players)]
         for i in range(self.n):
             res = self.play_game(sigma)
diff --git a/README.md b/README.md
index 3379781..af42dc4 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,25 @@
 # General Sum Off-Belief Learning
+
+run main.py for rl, obl or ot-rl. Run main\_FSP.py for fictitious self play, the following options can be used for either (although some will not have any effect on FSP).
+
+options:
+	**--lvls** LEVELS
+		Select number of OBL/OT_RL levels to run through, defaults to 10.
+	**--game** kuhn/leduc
+		Choose either kuhn poker or leduc hold 'em.
+	**-ab, --avg_bel**
+		Generate an averaged belief (over levels), and use this in OBL. 
+	**-ap, --avg_pol**
+		Generate the averaged policy across levels and use this when evaluating.
+	**-al, --avg_learn**
+		When carrying out OBL, use the opponent's averaged policy to find their action.
+	**-a, --all_avg**
+		Averaged belief, policy and learning.
+	**--debug**
+		Prints out debugging information.
+	**-v**
+		Prints out some information about progress.
+	**--obl**
+		Uses OBL to learn.
+	**--ot_rl**
+		Uses OT-RL to learn, updates lower level policies based on the distribution induced in higher levels.
diff --git a/main.py b/main.py
index 1797e53..3a0a164 100644
--- a/main.py
+++ b/main.py
@@ -9,7 +9,7 @@
 import logging
 from multiprocessing import Pool
 log = logging.getLogger(__name__)
-NUM_LOOPS=10
+NUM_LOOPS=1
 
 def run(options, games_per_lvl=100000, exploit_freq= 1):
         num_lvls = options["num_lvls"]