joseph-jnl · joseph-jnl · Oct 19, 2024 · Oct 19, 2024 · Oct 19, 2024 · Oct 19, 2024
diff --git a/.gitignore b/.gitignore
@@ -115,4 +115,9 @@ ENV/
 .hydra/
 
 #experiment local outputs
-experiments/ch2_bandits/outputs/
+experiments/ch2_bandits/outputs/
+experiments/ch2_bandits/wandb/
+
+# profiler artifacts
+*.svg
+*.perf
diff --git a/experiments/ch2_bandits/notebooks/RL-greedy_epsilon_bandit.ipynb b/experiments/ch2_bandits/notebooks/RL-greedy_epsilon_bandit.ipynb
diff --git a/experiments/ch2_bandits/notebooks/prototype_bandits.ipynb b/experiments/ch2_bandits/notebooks/prototype_bandits.ipynb
diff --git a/experiments/ch2_bandits/run.py b/experiments/ch2_bandits/run.py
@@ -118,7 +118,7 @@ def main(cfg: DictConfig):
     bandit_type = cfg.bandit._target_.split(".")[-1]
     # Q_init = cfg.Q_init._target_.split(".")[-1]
     hp = {
-        (bandit_type if k == "_target_" else k): v
+        ("class" if k == "_target_" else k): (bandit_type if k == "_target_" else v)
         for k, v in OmegaConf.to_container(cfg.bandit).items()
     }
     hp["n_cpus"] = cfg.run.n_jobs
@@ -149,8 +149,8 @@ def main(cfg: DictConfig):
     )
 
     if cfg.experiment.upload:
-        tag = "debug" if HydraConfig.get().verbose else cfg.experiment["tag"]
-        wandb.init(project="rlbook", group="bandits", config=hp, tags=[tag])
+        hp["tag"] = "debug" if HydraConfig.get().verbose else cfg.experiment["tag"]
+        wandb.init(project="rlbook", group="bandits", config=hp, tags=[hp["tag"]])
         wandb.define_metric("reward", summary="last")
         wandb.define_metric("optimal_action_percent", summary="last")
         df_avg_ar = average_runs(df_ar)

diff --git a/src/rlbook/__init__.py b/src/rlbook/__init__.py
@@ -1,2 +1,2 @@
 def hello() -> str:
-    return "Hello from rlbook!"
+    return "Hello from rlbook!"
diff --git a/src/rlbook/bandits/algorithms.py b/src/rlbook/bandits/algorithms.py
@@ -4,7 +4,7 @@
 from concurrent.futures import ProcessPoolExecutor
 from copy import deepcopy
 from itertools import repeat
-from math import log, sqrt
+from math import ceil, log, sqrt
 from multiprocessing import cpu_count
 from typing import Dict
 
@@ -119,6 +119,7 @@ def _multirun(self, testbed, steps, n_runs, n_jobs=4):
                 repeat(testbed, n_runs),
                 [steps for n in range(n_runs)],
                 list(range(n_runs)),
+                chunksize=ceil(n_runs / n_jobs),
             )
         return np.squeeze(np.stack(list(action_values), axis=2))
 

diff --git a/tests/test_bandits.py b/tests/test_bandits.py
@@ -20,6 +20,7 @@
 def testbed_fixed():
     return NormalTestbed(EXPECTED_VALUES, p_drift=0)
 
+
 @pytest.fixture
 def egreedy_bandit(testbed_fixed):
     return EpsilonGreedy(init_constant(testbed_fixed, q_val=10), epsilon=0.2)
@@ -30,16 +31,16 @@ def test_multirun_bandit_randomness(egreedy_bandit, testbed_fixed):
 
     egreedy_bandit.run(testbed_fixed, 20, n_runs=20, n_jobs=4)
     df = egreedy_bandit.output_df()
-    
+
     # Pivot results:
     # run   0 1 2 3
     # step
     #  0    a a a a
     #  1    a a a a
     #  2    a a a a
     # where a = action taken
-    actions_by_run = df[["run", "step", "action"]].pivot(index="step", columns=["run"], values="action")
+    actions_by_run = df[["run", "step", "action"]].pivot(
+        index="step", columns=["run"], values="action"
+    )
 
     assert not all(actions_by_run[0].eq(actions_by_run[1]))
-
-