Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Misc PR #4

Merged
merged 4 commits into from
Oct 19, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -115,4 +115,9 @@ ENV/
.hydra/

#experiment local outputs
experiments/ch2_bandits/outputs/
experiments/ch2_bandits/outputs/
experiments/ch2_bandits/wandb/

# profiler artifacts
*.svg
*.perf
1,295 changes: 0 additions & 1,295 deletions experiments/ch2_bandits/notebooks/RL-greedy_epsilon_bandit.ipynb

This file was deleted.

242 changes: 0 additions & 242 deletions experiments/ch2_bandits/notebooks/prototype_bandits.ipynb

This file was deleted.

6 changes: 3 additions & 3 deletions experiments/ch2_bandits/run.py
Original file line number Diff line number Diff line change
@@ -118,7 +118,7 @@ def main(cfg: DictConfig):
bandit_type = cfg.bandit._target_.split(".")[-1]
# Q_init = cfg.Q_init._target_.split(".")[-1]
hp = {
(bandit_type if k == "_target_" else k): v
("class" if k == "_target_" else k): (bandit_type if k == "_target_" else v)
for k, v in OmegaConf.to_container(cfg.bandit).items()
}
hp["n_cpus"] = cfg.run.n_jobs
@@ -149,8 +149,8 @@ def main(cfg: DictConfig):
)

if cfg.experiment.upload:
tag = "debug" if HydraConfig.get().verbose else cfg.experiment["tag"]
wandb.init(project="rlbook", group="bandits", config=hp, tags=[tag])
hp["tag"] = "debug" if HydraConfig.get().verbose else cfg.experiment["tag"]
wandb.init(project="rlbook", group="bandits", config=hp, tags=[hp["tag"]])
wandb.define_metric("reward", summary="last")
wandb.define_metric("optimal_action_percent", summary="last")
df_avg_ar = average_runs(df_ar)
2 changes: 1 addition & 1 deletion src/rlbook/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
def hello() -> str:
return "Hello from rlbook!"
return "Hello from rlbook!"
3 changes: 2 additions & 1 deletion src/rlbook/bandits/algorithms.py
Original file line number Diff line number Diff line change
@@ -4,7 +4,7 @@
from concurrent.futures import ProcessPoolExecutor
from copy import deepcopy
from itertools import repeat
from math import log, sqrt
from math import ceil, log, sqrt
from multiprocessing import cpu_count
from typing import Dict

@@ -119,6 +119,7 @@ def _multirun(self, testbed, steps, n_runs, n_jobs=4):
repeat(testbed, n_runs),
[steps for n in range(n_runs)],
list(range(n_runs)),
chunksize=ceil(n_runs / n_jobs),
)
return np.squeeze(np.stack(list(action_values), axis=2))

9 changes: 5 additions & 4 deletions tests/test_bandits.py
Original file line number Diff line number Diff line change
@@ -20,6 +20,7 @@
def testbed_fixed():
return NormalTestbed(EXPECTED_VALUES, p_drift=0)


@pytest.fixture
def egreedy_bandit(testbed_fixed):
return EpsilonGreedy(init_constant(testbed_fixed, q_val=10), epsilon=0.2)
@@ -30,16 +31,16 @@ def test_multirun_bandit_randomness(egreedy_bandit, testbed_fixed):

egreedy_bandit.run(testbed_fixed, 20, n_runs=20, n_jobs=4)
df = egreedy_bandit.output_df()

# Pivot results:
# run 0 1 2 3
# step
# 0 a a a a
# 1 a a a a
# 2 a a a a
# where a = action taken
actions_by_run = df[["run", "step", "action"]].pivot(index="step", columns=["run"], values="action")
actions_by_run = df[["run", "step", "action"]].pivot(
index="step", columns=["run"], values="action"
)

assert not all(actions_by_run[0].eq(actions_by_run[1]))