Move outcome_names from BenchmarkRunner to BenchmarkTestFunction (

facebook#3021) Summary: **Context**: This will enable constructing the `BenchmarkRunner` based on the `BenchmarkProblem` and `BenchmarkMethod` rather than asking the user to provide it. In addition to making things simpler (it's weird that a runner is part of a problem!), that will enable the Runner to be aware of aspects of the method, such as parallelism. This will also enable us to return metrics in a dict format (`{outcome_name: value}`) if we choose to do so in the future. That may be simpler since the data already gets processed into dicts by the runner. Note that for problems based on BoTorch problems, names are usually already set programmatically, so that logic moves to the test problem. **This diff**: * Requires `outcome_names` on `BenchmarkTestFunction` * Removes `outcome_names` as an argument from `BenchmarkRunner` * Sets outcome names automatically on `BoTorchTestFunction` when they are not provided, following the convention used elsewhere. Update usages: * Remove `outcome_names` from calls to `BenchmarkRunner` * Add `outcome_names` to calls to `BenchmarkTestFunction`, where needed; they are generally already present on surroagate test functions and can be constructed automatically for BoTorch-based problems. Differential Revision: D65497700
esantorella · Nov 5, 2024 · 1719c67 · 1719c67
1 parent 93c236e
commit 1719c67
Show file tree

Hide file tree

Showing 10 changed files with 44 additions and 24 deletions.
diff --git a/ax/benchmark/benchmark_problem.py b/ax/benchmark/benchmark_problem.py
@@ -378,8 +378,9 @@ def create_problem_from_botorch(
         search_space=search_space,
         optimization_config=optimization_config,
         runner=BenchmarkRunner(
-            test_function=BoTorchTestFunction(botorch_problem=test_problem),
-            outcome_names=outcome_names,
+            test_function=BoTorchTestFunction(
+                botorch_problem=test_problem, outcome_names=outcome_names
+            ),
             search_space_digest=extract_search_space_digest(
                 search_space=search_space,
                 param_names=list(search_space.parameters.keys()),

diff --git a/ax/benchmark/benchmark_runner.py b/ax/benchmark/benchmark_runner.py
@@ -47,15 +47,13 @@ class BenchmarkRunner(Runner):
           not over-engineer for that before such a use case arrives.
 
     Args:
-        outcome_names: The names of the outcomes returned by the problem.
         test_function: A ``BenchmarkTestFunction`` from which to generate
             deterministic data before adding noise.
         noise_std: The standard deviation of the noise added to the data. Can be
             a list or dict to be per-metric.
         search_space_digest: Used to extract target fidelity and task.
     """
 
-    outcome_names: list[str]
     test_function: BenchmarkTestFunction
     noise_std: float | list[float] | dict[str, float] = 0.0
     # pyre-fixme[16]: Pyre doesn't understand InitVars
@@ -71,6 +69,11 @@ def __post_init__(self, search_space_digest: SearchSpaceDigest | None) -> None:
         else:
             self.target_fidelity_and_task = {}
 
+    @property
+    def outcome_names(self) -> list[str]:
+        """The names of the outcomes."""
+        return self.test_function.outcome_names
+
     def get_Y_true(self, params: Mapping[str, TParamValue]) -> Tensor:
         """Evaluates the test problem.
 

diff --git a/ax/benchmark/benchmark_test_function.py b/ax/benchmark/benchmark_test_function.py
@@ -21,12 +21,14 @@ class BenchmarkTestFunction(ABC):
     (Noise - if desired - is added by the runner.)
     """
 
+    outcome_names: list[str]
+
     @abstractmethod
     def evaluate_true(self, params: Mapping[str, TParamValue]) -> Tensor:
         """
         Evaluate noiselessly.
 
         Returns:
-            1d tensor of shape (num_outcomes,).
+            1d tensor of shape (len(outcome_names),).
         """
         ...
diff --git a/ax/benchmark/benchmark_test_functions/botorch_test.py b/ax/benchmark/benchmark_test_functions/botorch_test.py
@@ -6,11 +6,12 @@
 # pyre-strict
 
 from collections.abc import Mapping
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from itertools import islice
 
 import torch
 from ax.benchmark.benchmark_test_function import BenchmarkTestFunction
+from botorch.test_functions.multi_objective import MultiObjectiveTestProblem
 from botorch.test_functions.synthetic import BaseTestProblem, ConstrainedBaseTestProblem
 from botorch.utils.transforms import normalize, unnormalize
 
@@ -33,6 +34,7 @@ class BoTorchTestFunction(BenchmarkTestFunction):
             evaluated using the raw parameter values.
     """
 
+    outcome_names: list[str] = field(default_factory=list)
     botorch_problem: BaseTestProblem
     modified_bounds: list[tuple[float, float]] | None = None
 
@@ -45,6 +47,18 @@ def __post_init__(self) -> None:
                 "noise should be set on the `BenchmarkRunner`, not the test function."
             )
         self.botorch_problem = self.botorch_problem.to(dtype=torch.double)
+        if len(self.outcome_names) == 0:
+            n_objectives = (
+                self.botorch_problem.num_objectives
+                if isinstance(self.botorch_problem, MultiObjectiveTestProblem)
+                else 1
+            )
+            is_constrained = isinstance(
+                self.botorch_problem, ConstrainedBaseTestProblem
+            )
+            self.outcome_names = [f"objective_{i}" for i in range(n_objectives)] + (
+                ["constraint"] if is_constrained else []
+            )
 
     def tensorize_params(self, params: Mapping[str, int | float]) -> torch.Tensor:
         X = torch.tensor(

diff --git a/ax/benchmark/problems/hpo/torchvision.py b/ax/benchmark/problems/hpo/torchvision.py
@@ -215,8 +215,9 @@ def get_pytorch_cnn_torchvision_benchmark_problem(
         objective_name="accuracy",
     )
     runner = BenchmarkRunner(
-        test_function=PyTorchCNNTorchvisionBenchmarkTestFunction(name=name),
-        outcome_names=outcome_names,
+        test_function=PyTorchCNNTorchvisionBenchmarkTestFunction(
+            name=name, outcome_names=outcome_names
+        )
     )
     return BenchmarkProblem(
         name=f"HPO_PyTorchCNN_Torchvision::{name}",

diff --git a/ax/benchmark/problems/synthetic/discretized/mixed_integer.py b/ax/benchmark/problems/synthetic/discretized/mixed_integer.py
@@ -104,9 +104,10 @@ def _get_problem_from_common_inputs(
         test_problem = test_problem_class(dim=dim, bounds=test_problem_bounds)
     runner = BenchmarkRunner(
         test_function=BoTorchTestFunction(
-            botorch_problem=test_problem, modified_bounds=bounds
+            botorch_problem=test_problem,
+            modified_bounds=bounds,
+            outcome_names=[metric_name],
         ),
-        outcome_names=[metric_name],
     )
     return BenchmarkProblem(
         name=benchmark_name + ("_observed_noise" if observe_noise_sd else ""),

diff --git a/ax/benchmark/problems/synthetic/hss/jenatton.py b/ax/benchmark/problems/synthetic/hss/jenatton.py
@@ -119,7 +119,7 @@ def get_jenatton_benchmark_problem(
         search_space=search_space,
         optimization_config=optimization_config,
         runner=BenchmarkRunner(
-            test_function=Jenatton(), outcome_names=[name], noise_std=noise_std
+            test_function=Jenatton(outcome_names=[name]), noise_std=noise_std
         ),
         num_trials=num_trials,
         observe_noise_stds=observe_noise_sd,

diff --git a/ax/benchmark/tests/test_benchmark_problem.py b/ax/benchmark/tests/test_benchmark_problem.py
@@ -54,7 +54,6 @@ def test_inference_value_not_implemented(self) -> None:
         optimization_config = OptimizationConfig(objective=objectives[0])
         runner = BenchmarkRunner(
             test_function=BoTorchTestFunction(botorch_problem=Branin()),
-            outcome_names=["foo"],
         )
         with self.assertRaisesRegex(NotImplementedError, "Only `n_best_points=1`"):
             BenchmarkProblem(

diff --git a/ax/benchmark/tests/test_benchmark_runner.py b/ax/benchmark/tests/test_benchmark_runner.py
@@ -85,11 +85,7 @@ def test_runner(self) -> None:
                 outcome_names = ["branin"]
 
             # Set up runner
-            runner = BenchmarkRunner(
-                test_function=test_function,
-                outcome_names=outcome_names,
-                noise_std=noise_std,
-            )
+            runner = BenchmarkRunner(test_function=test_function, noise_std=noise_std)
 
             test_description = f"{test_function=}, {noise_std=}"
             with self.subTest(
@@ -231,17 +227,16 @@ def test_runner(self) -> None:
                     BenchmarkRunner.deserialize_init_args({})
 
     def test_heterogeneous_noise(self) -> None:
-        for noise_std in [[0.1, 0.05], {"objective": 0.1, "constraint": 0.05}]:
+        for noise_std in [[0.1, 0.05], {"objective_0": 0.1, "constraint": 0.05}]:
             runner = BenchmarkRunner(
                 test_function=BoTorchTestFunction(
                     botorch_problem=ConstrainedHartmann(dim=6)
                 ),
                 noise_std=noise_std,
-                outcome_names=["objective", "constraint"],
             )
             self.assertDictEqual(
                 checked_cast(dict, runner.get_noise_stds()),
-                {"objective": 0.1, "constraint": 0.05},
+                {"objective_0": 0.1, "constraint": 0.05},
             )
 
             X = torch.rand(1, 6, dtype=torch.double)
@@ -257,4 +252,4 @@ def test_heterogeneous_noise(self) -> None:
             self.assertSetEqual(set(res.keys()), {"Ys", "Ystds", "outcome_names"})
             self.assertSetEqual(set(res["Ys"].keys()), {"0_0"})
             self.assertEqual(res["Ystds"]["0_0"], [0.1, 0.05])
-            self.assertEqual(res["outcome_names"], ["objective", "constraint"])
+            self.assertEqual(res["outcome_names"], ["objective_0", "constraint"])
diff --git a/ax/utils/testing/benchmark_stubs.py b/ax/utils/testing/benchmark_stubs.py
@@ -6,7 +6,7 @@
 
 # pyre-strict
 
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from typing import Any
 
 import numpy as np
@@ -103,7 +103,7 @@ def get_soo_surrogate_test_function(lazy: bool = True) -> SurrogateTestFunction:
 def get_soo_surrogate() -> BenchmarkProblem:
     experiment = get_branin_experiment(with_completed_trial=True)
     test_function = get_soo_surrogate_test_function()
-    runner = BenchmarkRunner(test_function=test_function, outcome_names=["branin"])
+    runner = BenchmarkRunner(test_function=test_function)
 
     observe_noise_sd = True
     objective = Objective(
@@ -140,7 +140,7 @@ def get_moo_surrogate() -> BenchmarkProblem:
         outcome_names=outcome_names,
         get_surrogate_and_datasets=lambda: (surrogate, []),
     )
-    runner = BenchmarkRunner(test_function=test_function, outcome_names=outcome_names)
+    runner = BenchmarkRunner(test_function=test_function)
     observe_noise_sd = True
     optimization_config = MultiObjectiveOptimizationConfig(
         objective=MultiObjective(
@@ -243,9 +243,13 @@ def get_aggregated_benchmark_result() -> AggregatedBenchmarkResult:
 
 @dataclass(kw_only=True)
 class DummyTestFunction(BenchmarkTestFunction):
+    outcome_names: list[str] = field(default_factory=list)
     num_outcomes: int = 1
     dim: int = 6
 
+    def __post_init__(self) -> None:
+        self.outcome_names = [f"objective_{i}" for i in range(self.num_outcomes)]
+
     # pyre-fixme[14]: Inconsistent override, as dict[str, float] is not a
     # `TParameterization`
     def evaluate_true(self, params: dict[str, float]) -> torch.Tensor: