optuna · nabenabe0928 · Oct 8, 2024 · Sep 25, 2024 · Sep 25, 2024 · Sep 25, 2024
diff --git a/package/samplers/mab_epsilon_greedy/LICENSE b/package/samplers/mab_epsilon_greedy/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2024 <Ryota Nishijima>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/package/samplers/mab_epsilon_greedy/README.md b/package/samplers/mab_epsilon_greedy/README.md
@@ -0,0 +1,25 @@
+---
+author: Ryota Nishijima
+title: MAB Epsilon-Greedy Sampler
-title: MAB Epsilon-Greedy Sampler
+title: A Sampler Based on Epsilon-Greedy Multi-Armed Bandit Algorithm
-title: MAB Epsilon-Greedy Sampler
+title: A Sampler Based on Epsilon-Greedy Multi-Armed Bandit Algorithm
+description: Sampler based on multi-armed bandit algorithm with epsilon-greedy arm selection.
+tags: [sampler, multi-armed bandit]
+optuna_versions: [4.0.0]
+license: MIT License
+---
+
+## Class or Function Names
+
+- MABEpsilonGreedySampler
+
+## Example
+
+```python
+mod = optunahub.load_module("samplers/mab_epsilon_greedy")
+sampler = mod.MABEpsilonGreedySampler()
+```
+
+See [`example.py`](https://github.com/optuna/optunahub-registry/blob/main/package/samplers/mab_epsilon_greedy/example.py) for more details.
+
+## Others
+
+This package provides a sampler based on Multi-armed bandit algorithm with epsilon-greedy selection.
diff --git a/package/samplers/mab_epsilon_greedy/__init__.py b/package/samplers/mab_epsilon_greedy/__init__.py
@@ -0,0 +1,4 @@
+from .mab_epsilon_greedy import MABEpsilonGreedySampler
+
+
+__all__ = ["MABEpsilonGreedySampler"]
diff --git a/package/samplers/mab_epsilon_greedy/example.py b/package/samplers/mab_epsilon_greedy/example.py
@@ -0,0 +1,20 @@
+import optuna
+import optunahub
+
+
+if __name__ == "__main__":
+    module = optunahub.load_module(
+        package="samplers/mab_epsilon_greedy",
+    )
+    sampler = module.MABEpsilonGreedySampler()
+
+    def objective(trial: optuna.Trial) -> float:
+        x = trial.suggest_categorical("arm_1", [1, 2, 3])
+        y = trial.suggest_categorical("arm_2", [1, 2])
+
+        return x + y
+
+    study = optuna.create_study(sampler=sampler)
+    study.optimize(objective, n_trials=20)
+
+    print(study.best_trial.value, study.best_trial.params)
diff --git a/package/samplers/mab_epsilon_greedy/mab_epsilon_greedy.py b/package/samplers/mab_epsilon_greedy/mab_epsilon_greedy.py
@@ -0,0 +1,70 @@
+from collections import defaultdict
+from typing import Any
+from typing import Optional
+
+from optuna.distributions import BaseDistribution
+from optuna.samplers import RandomSampler
+from optuna.study import Study
+from optuna.study._study_direction import StudyDirection
+from optuna.trial import FrozenTrial
+from optuna.trial import TrialState
+
+
+class MABEpsilonGreedySampler(RandomSampler):
+    """Sampler based on Multi-armed Bandit Algorithm.
+
+    Args:
+        epsilon (float):
+            Params for epsolon-greedy algorithm.
+            epsilon is probability of selecting arm randomly.
+        seed (int | None):
+            Seed for random number generator and arm selection.
+
+    """
+
+    def __init__(
+        self,
+        epsilon: float = 0.7,
+        seed: Optional[int] = None,
+    ) -> None:
+        super().__init__(seed)
+        self._epsilon = epsilon
+
+    def sample_independent(
+        self,
+        study: Study,
+        trial: FrozenTrial,
+        param_name: str,
+        param_distribution: BaseDistribution,
+    ) -> Any:
+        states = (TrialState.COMPLETE, TrialState.PRUNED)
+        trials = study._get_trials(deepcopy=False, states=states, use_cache=True)
+
+        rewards_by_choice: defaultdict = defaultdict(float)
+        cnt_by_choice: defaultdict = defaultdict(int)
+        for t in trials:
+            rewards_by_choice[t.params[param_name]] += t.value
+            cnt_by_choice[t.params[param_name]] += 1
+
+        # Use never selected arm for initialization like UCB1 algorithm.
+        # ref. https://github.com/optuna/optunahub-registry/pull/155#discussion_r1780446062
+        never_selected = [
+            arm for arm in param_distribution.choices if arm not in rewards_by_choice
+        ]
+        if never_selected:
+            return self._rng.rng.choice(never_selected)
+
+        # If all arms are selected at least once, select arm by epsilon-greedy.
+        if self._rng.rng.rand() < self._epsilon:
+            return self._rng.rng.choice(param_distribution.choices)
+        else:
+            if study.direction == StudyDirection.MINIMIZE:
+                return min(
+                    param_distribution.choices,
+                    key=lambda x: rewards_by_choice[x] / max(cnt_by_choice[x], 1),
-                    key=lambda x: rewards_by_choice[x] / max(cnt_by_choice[x], 1),
+                    key=lambda x: rewards_by_choice[x] / cnt_by_choice[x],
-                    key=lambda x: rewards_by_choice[x] / max(cnt_by_choice[x], 1),
+                    key=lambda x: rewards_by_choice[x] / cnt_by_choice[x],
+                )
+            else:
+                return max(
+                    param_distribution.choices,
+                    key=lambda x: rewards_by_choice[x] / max(cnt_by_choice[x], 1),
-                    key=lambda x: rewards_by_choice[x] / max(cnt_by_choice[x], 1),
+                    key=lambda x: rewards_by_choice[x] / cnt_by_choice[x],
-                    key=lambda x: rewards_by_choice[x] / max(cnt_by_choice[x], 1),
+                    key=lambda x: rewards_by_choice[x] / cnt_by_choice[x],
+                )
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,4 @@
		from .mab_epsilon_greedy import MABEpsilonGreedySampler


		__all__ = ["MABEpsilonGreedySampler"]