diff --git a/baybe/kernels/priors/basic.py b/baybe/kernels/priors/basic.py index f7b79fe29..ec744de77 100644 --- a/baybe/kernels/priors/basic.py +++ b/baybe/kernels/priors/basic.py @@ -112,4 +112,4 @@ def to_gpytorch(self, *args, **kwargs): # noqa: D102 def numpy(self) -> np.ndarray: """Return alpha and beta as a numpy ndarray.""" - return np.array([self.alpha, self.beta]) + return np.array([self.alpha, self.beta]).reshape(-1, 1) diff --git a/baybe/surrogates/multi_armed_bandit.py b/baybe/surrogates/multi_armed_bandit.py index 57cb46eaa..1100e67ef 100644 --- a/baybe/surrogates/multi_armed_bandit.py +++ b/baybe/surrogates/multi_armed_bandit.py @@ -51,12 +51,12 @@ def _posterior_beta_parameters(self) -> np.ndarray[float]: @property def means(self) -> np.ndarray[float]: """Posterior means of the bandit arms.""" - return beta(*self._posterior_beta_parameters.T).mean() + return beta(*self._posterior_beta_parameters).mean() @property def variances(self) -> np.ndarray[float]: """Posterior variances of the bandit arms.""" - return beta(*self._posterior_beta_parameters.T).var() + return beta(*self._posterior_beta_parameters).var() def _posterior(self, candidates: Tensor) -> tuple[Tensor, Tensor]: # See base class. diff --git a/examples/Multi_Armed_Bandit/Multi_Armed_Bandit_Header.md b/examples/Multi_Armed_Bandit/Multi_Armed_Bandit_Header.md new file mode 100644 index 000000000..2accce4ff --- /dev/null +++ b/examples/Multi_Armed_Bandit/Multi_Armed_Bandit_Header.md @@ -0,0 +1,4 @@ +# Multi-Armed Bandit + +These examples demonstrate BayBE's +{doc}`Multi-Armed Bandit Capabilities `. \ No newline at end of file diff --git a/examples/Multi_Armed_Bandit/find_maximizing_arm.py b/examples/Multi_Armed_Bandit/find_maximizing_arm.py index 75ae5ff35..b5175aace 100644 --- a/examples/Multi_Armed_Bandit/find_maximizing_arm.py +++ b/examples/Multi_Armed_Bandit/find_maximizing_arm.py @@ -1,7 +1,13 @@ -import click +## Example for a Multi Armed Bandit + +# This example shows how to use the bernoulli multi armed bandit surrogate. + +from collections.abc import Iterable +from typing import Union + import numpy as np -from multi_armed_bandit import MultiArmedBandit -from scipy.stats import bernoulli +from attrs import define +from scipy.stats import bernoulli, rv_continuous, rv_discrete from baybe import Campaign from baybe.acquisition import ProbabilityOfImprovement @@ -16,64 +22,83 @@ from baybe.surrogates import BernoulliMultiArmedBanditSurrogate from baybe.targets import BinaryTarget +### Setup +# We are using a 5-armed bandit in this example. The bandit has a random win rate for now. +N_ARMS = 5 +N_ITERATIONS = 300 np.random.seed(0) -@click.command() -@click.option("--n_arms", default=2, help="Number of arms") -@click.option("--n_iter", default=1, help="Number of iterations") -@click.option( - "--log_each_n_steps", default=100, help="Number of iteratins between prints." +@define +class MultiArmedBanditModel: + """Representation of a multi armed bandit.""" + + real_distributions: list[Union[rv_discrete, rv_continuous]] + """List of the reward distribution per arm.""" + + def sample(self, arm_idxs: Iterable[int]): + """Draw reward samples from the arms indexed in arm_idxs.""" + return [self.real_distributions[arm_idx].rvs() for arm_idx in arm_idxs] + + @property + def means(self): + """Return the real means of the reward distributions.""" + return [dist.stats(moments="m") for dist in self.real_distributions] + + +mab = MultiArmedBanditModel( + real_distributions=[bernoulli(np.random.rand()) for _ in range(N_ARMS)] ) -def run_experiment(n_arms, n_iter, log_each_n_steps): - """Search for the arm with maximum win rate.""" - mab = MultiArmedBandit( - real_distributions=[bernoulli(np.random.rand()) for _ in range(n_arms)] - ) - print("real means", mab.means) - target = BinaryTarget(name="win_rate") - objective = SingleTargetObjective(target=target) - parameters = [ - CategoricalParameter( - name="arm", - values=[str(i) for i in range(n_arms)], - ) - ] - searchspace = SearchSpace.from_product(parameters) - mabs = BernoulliMultiArmedBanditSurrogate() - recommender = TwoPhaseMetaRecommender( - initial_recommender=FPSRecommender( - allow_repeated_recommendations=True, - allow_recommending_already_measured=True, - ), - recommender=SequentialGreedyRecommender( - surrogate_model=mabs, - allow_repeated_recommendations=True, - allow_recommending_already_measured=True, - acquisition_function=ProbabilityOfImprovement(), - ), +print("real means", mab.means) + + +### Campaign +# We are using the BinaryTarget as we are modeling a brnoulli reward. +# The searchspace has one categorical parameter to model the arms of the bandit. +# The probability of improvement acquisition function is not perfect in this setting +# as it assumes a normal distribution of the win rate. + +target = BinaryTarget(name="win_rate") +objective = SingleTargetObjective(target=target) +parameters = [ + CategoricalParameter( + name="arm", + values=[str(i) for i in range(N_ARMS)], ) - campaign = Campaign(searchspace, objective, recommender) - - total_reward = 0 - for i in range(n_iter): - df = campaign.recommend(batch_size=1) - reward = mab.sample(df.index.tolist()) - total_reward += sum(reward) - df["win_rate"] = reward - campaign.add_measurements(df) - - if (i + 1) % log_each_n_steps == 0: - print("iter", i + 1) - print("estimated means", mabs.means) - print("-" * 5) - - real_means = mab.means - print("real means", real_means) - print("optimal expected reward", max(real_means) * n_iter) - print("total_reward", total_reward) - print("mean reward", total_reward / n_iter) - - -if __name__ == "__main__": - run_experiment() +] +searchspace = SearchSpace.from_product(parameters) +mabs = BernoulliMultiArmedBanditSurrogate() +recommender = TwoPhaseMetaRecommender( + initial_recommender=FPSRecommender( + allow_repeated_recommendations=True, + allow_recommending_already_measured=True, + ), + recommender=SequentialGreedyRecommender( + surrogate_model=mabs, + allow_repeated_recommendations=True, + allow_recommending_already_measured=True, + acquisition_function=ProbabilityOfImprovement(), + ), +) +campaign = Campaign(searchspace, objective, recommender) + + +### Optimization Loop +total_reward = 0 +for i in range(N_ITERATIONS): + df = campaign.recommend(batch_size=1) + reward = mab.sample(df.index.tolist()) + total_reward += sum(reward) + df["win_rate"] = reward + campaign.add_measurements(df) + + if (i + 1) % 50 == 0: + print("iter", i + 1) + print("estimated means", mabs.means) + print("-" * 5) + +real_means = mab.means +print("real means", real_means) +print("optimal expected reward", max(real_means) * N_ITERATIONS) +print("total_reward", total_reward) +print("mean reward", total_reward / N_ITERATIONS) diff --git a/examples/Multi_Armed_Bandit/multi_armed_bandit.py b/examples/Multi_Armed_Bandit/multi_armed_bandit.py deleted file mode 100644 index 1a4223ebb..000000000 --- a/examples/Multi_Armed_Bandit/multi_armed_bandit.py +++ /dev/null @@ -1,21 +0,0 @@ -from collections.abc import Iterable - -from attr import define -from scipy.stats import rv_continuous, rv_discrete - - -@define -class MultiArmedBandit: - """Representation of a multi armed bandit.""" - - real_distributions: list[rv_discrete | rv_continuous] - """List of the reward distribution per arm.""" - - def sample(self, arm_idxs: Iterable[int]): - """Draw reward samples from the arms indexed in arm_idxs.""" - return [self.real_distributions[arm_idx].rvs() for arm_idx in arm_idxs] - - @property - def means(self): - """Return the real means of the reward distributions.""" - return [dist.stats(moments="m") for dist in self.real_distributions]