From d839dc1d224c7da0d826547782d3678f27829cde Mon Sep 17 00:00:00 2001 From: Shmg Date: Fri, 3 Jan 2025 11:24:38 +0100 Subject: [PATCH 1/3] Add ChimeraObjective with key functionalities and library registrations --- baybe/acquisition/base.py | 7 + baybe/objectives/__init__.py | 2 + baybe/objectives/chimera.py | 362 +++++++++++++++++++++++++++++++++++ baybe/objectives/enum.py | 3 + 4 files changed, 374 insertions(+) create mode 100644 baybe/objectives/chimera.py diff --git a/baybe/acquisition/base.py b/baybe/acquisition/base.py index 410a308e0..23cb135e8 100644 --- a/baybe/acquisition/base.py +++ b/baybe/acquisition/base.py @@ -16,6 +16,7 @@ UnidentifiedSubclassError, ) from baybe.objectives.base import Objective +from baybe.objectives.chimera import ChimeraObjective from baybe.objectives.desirability import DesirabilityObjective from baybe.objectives.single import SingleTargetObjective from baybe.searchspace.core import SearchSpace @@ -127,6 +128,12 @@ def to_botorch( additional_params["best_f"] = ( bo_surrogate.posterior(train_x).mean.max().item() ) + case ChimeraObjective(): + # TODO: for now minimization + if "best_f" in signature_params: + additional_params["best_f"] = ( + bo_surrogate.posterior(train_x).mean.min().item() + ) case _: raise ValueError(f"Unsupported objective type: {objective}") diff --git a/baybe/objectives/__init__.py b/baybe/objectives/__init__.py index 30a79bb16..832a5af3c 100644 --- a/baybe/objectives/__init__.py +++ b/baybe/objectives/__init__.py @@ -1,9 +1,11 @@ """BayBE objectives.""" +from baybe.objectives.chimera import ChimeraObjective from baybe.objectives.desirability import DesirabilityObjective from baybe.objectives.single import SingleTargetObjective __all__ = [ "SingleTargetObjective", "DesirabilityObjective", + "ChimeraObjective", ] diff --git a/baybe/objectives/chimera.py b/baybe/objectives/chimera.py new file mode 100644 index 000000000..c9733c823 --- /dev/null +++ b/baybe/objectives/chimera.py @@ -0,0 +1,362 @@ +"""Functionality for chimera objectives.""" + +import gc +import warnings +from enum import Enum +from typing import TypeGuard + +import cattrs +import numpy as np +import numpy.typing as npt +import pandas as pd +from attrs import define, field +from attrs.validators import deep_iterable, ge, gt, instance_of, min_len +from typing_extensions import override + +from baybe.objectives.base import Objective +from baybe.targets.base import Target +from baybe.targets.numerical import NumericalTarget +from baybe.utils.basic import to_tuple +from baybe.utils.dataframe import get_transform_objects, pretty_print_df +from baybe.utils.plotting import to_string +from baybe.utils.validation import finite_float + + +def _is_all_numerical_targets( + x: tuple[Target, ...], / +) -> TypeGuard[tuple[NumericalTarget, ...]]: + """Typeguard helper function.""" + return all(isinstance(y, NumericalTarget) for y in x) + + +class ThresholdType(Enum): + """Available types for target thresholds.""" + + ABSOLUTE = "ABSOLUTE" + """The target threshold is an absolute value.""" + + PERCENTILE = "PERCENTILE" + """The target threshold is a percentile value.""" + + FRACTION = "FRACTION" + """The target threshold is a fraction value.""" + + +@define(frozen=True, slots=False) +class ChimeraObjective(Objective): + """An objective scalarizing multiple targets using desirability values.""" + + _targets: tuple[Target, ...] = field( + converter=to_tuple, + validator=[min_len(2), deep_iterable(member_validator=instance_of(Target))], + alias="targets", + ) + "The targets considered by the objective." + + targets_threshold_values: tuple[float, ...] = field( + converter=lambda w: cattrs.structure(w, tuple[float, ...]), + validator=deep_iterable(member_validator=[finite_float, ge(0.0)]), + ) + """The target degradation thresholds for each target from its optimum.""" + + targets_threshold_types: tuple[ThresholdType, ...] | None = field( + converter=lambda x: None + if x is None + else tuple( + ThresholdType(value) if isinstance(value, str) else value for value in x + ) + ) + """An optional tuple of target threshold types.""" + + softness: float = field( + converter=float, + validator=gt(0.0), + ) + """The softness parameter regulating the Heaviside function.""" + + @targets_threshold_values.default + def _default_targets_threshold_values(self) -> tuple[float, ...]: + default_values = (0.0,) * len(self._targets) # TODO: intepretation? + warnings.warn( + f"The values for targets thresholds have not been specified. " + f"Setting the target threshold values to {default_values}.", + UserWarning, + ) + return default_values + + @targets_threshold_types.default + def _default_targets_threshold_types(self) -> tuple[ThresholdType, ...]: + default_values = (ThresholdType.FRACTION,) * len(self._targets) + warnings.warn( + f"The types for target thresholds have not been specified. " + f"Setting the target threshold types to {default_values}.", + UserWarning, + ) + return default_values + + @softness.default # TODO: do we need to add warning here? + def _default_softness(self) -> float: + default_value = 1e-3 + return default_value + + @_targets.validator + def _validate_targets(self, _, targets) -> None: # noqa: DOC101, DOC103 + if not _is_all_numerical_targets(targets): + raise TypeError( + f"'{self.__class__.__name__}' currently only supports targets " + f"of type '{NumericalTarget.__name__}'." + ) + if len({t.name for t in targets}) != len(targets): + raise ValueError("All target names must be unique.") + if not all(target._is_transform_normalized for target in targets): + raise ValueError( + "All targets must have normalized computational representations to " + "enable the computation of desirability values. This requires having " + "appropriate target bounds and transformations in place." + ) + + @targets_threshold_values.validator + def _validate_targets_threshold_values(self, _, values) -> None: + if (lv := len(values)) != (lt := len(self._targets)): + raise ValueError( + f"If custom threshold values are specified, there must be one for each target. " # noqa: E501 + f"Specified number of targets: {lt}. Specified number of threshold values: {lv}." # noqa: E501 + ) + + @targets_threshold_types.validator + def _validate_targets_threshold_types(self, _, types) -> None: + if (lt := len(types)) != (ltg := len(self._targets)): + raise ValueError( + f"If custom threshold types are specified, there must be one for each target. " # noqa: E501 + f"Specified number of targets: {ltg}. Specified number of threshold types: {lt}." # noqa: E501 + ) + + def _soft_heaviside(self, value: float, softness: float) -> float: + arg = -value / softness + return np.exp(-np.logaddexp(0, arg)) + + def _hard_heaviside(self, value: float) -> float: + return (value >= 0).astype( + float + ) # Pandas handles booleans as floats automatically + + def step(self, value: float, softness: float = 1e-6) -> float: + """Apply a step function to the given value based on the specified softness. + + Args: + value: The input value to apply the step function to. + softness: The softness parameter for the step function. + If less than 1e-5, a hard Heaviside step function is used. + Otherwise, a soft Heaviside step function is used. + Default is 1e-6. + + Returns: + The result of the step function applied to the input value. + """ + if softness < 1e-5: + return self._hard_heaviside(value) + + return self._soft_heaviside(value, softness) + + def _invert_binary(self, a: float) -> float: + return 1 - a + + def _shift( + self, + transformed: pd.DataFrame, + transformed_threshold_values: list[float], + ) -> tuple[np.ndarray, np.ndarray]: + # Initialize with the first column of transformed + shifted_values = [transformed.values[:, 0]] + shifted_thresholds = [] + # Initialize the shift, where the primary target is unshifted + shift = 0.0 + # Initialize the domain with the index of transformed + domain = transformed.index + + for target, threshold_value, threshold_type in zip( + self.targets, transformed_threshold_values, self.targets_threshold_types + ): + if threshold_type == ThresholdType.FRACTION: + _threshold = threshold_value + elif threshold_type == ThresholdType.PERCENTILE: + _threshold = transformed[target.name].quantile( + threshold_value, interpolation="linear" + ) + elif threshold_type == ThresholdType.ABSOLUTE: + _threshold = threshold_value + else: + raise ValueError(f"Unsupported ThresholdType: {threshold_type}") + + # Compute and store shifted threshold + _shifted_threshold = _threshold - shift + shifted_thresholds.append(_shifted_threshold) + + # Adjust to region of interest for the next (lower-)level objective + interest = transformed[target.name][domain] < _shifted_threshold + if interest.any(): + domain = domain[interest] + # print(target, "| New domain: ", domain, "\n") + # else: + # print(target, "| No interest", "\n") + # continue + + # Compute new shift + current_idx = self.targets.index(target) + next_idx = (current_idx + 1) % len( + self.targets + ) # Loop back to target_idx == 0 + shift = transformed.values[:, next_idx].max() - min(shifted_thresholds) + # TODO: Explanation + # We ensure no value of lower-level target can exceed the baseline + # defined (minimum) by the cumulative minima from higher-level target + # Apply shift directly to the corresponding target values + _shifted_value = transformed.values[:, next_idx] - shift + shifted_values.append(_shifted_value) + # print("next_idx: ", next_idx, "\n") + + return np.array(shifted_values), np.asarray(shifted_thresholds) + + def _scalarize( + self, shifted_values: npt.ArrayLike, shifted_thresholds: npt.ArrayLike + ) -> np.ndarray: + # Start with the last term in the shifted_transformed (the fallback term) + # TODO: explain a bit what is this fallback term + merits = shifted_values[-1].copy() + + # Reverse iterate through all but the last target + for idx in reversed(range(shifted_values.shape[0] - 1)): + current_obj = shifted_values[idx] + current_tol = shifted_thresholds[idx] + + # Compute step functions / positive and negative masks + pos_mask = self.step(current_obj - current_tol) + neg_mask = self._invert_binary(pos_mask) + # TODO: here typecasting happening + + # Scalarize through inversely updating merits: + # (kept if within threshold, else replaced by higher-level) + merits = merits * neg_mask + pos_mask * current_obj + + # Normalize CHIMERA merits + if merits.max() > 0: + merits = (merits - merits.min()) / (merits.max() - merits.min()) + + return merits + + @override + @property + def targets(self) -> tuple[Target, ...]: + return self._targets + + @override + def __str__(self) -> str: + targets_list = [target.summary() for target in self.targets] + targets_df = pd.DataFrame(targets_list) + targets_df["Threshold values"] = self.targets_threshold_values + targets_df["Threshold types"] = [t.value for t in self.targets_threshold_types] + + fields = [ + to_string("Type", self.__class__.__name__, single_line=True), + to_string("Targets", pretty_print_df(targets_df)), + # to_string("Scalarizer", "Chimera", single_line=True), + ] + + return to_string("Objective", *fields) + + @override + def transform( + self, + df: pd.DataFrame | None = None, + /, + *, + allow_missing: bool = False, + allow_extra: bool | None = None, + data: pd.DataFrame | None = None, + ) -> pd.DataFrame: + # >>>>>>>>>> Deprecation + if not ((df is None) ^ (data is None)): + raise ValueError( + "Provide the dataframe to be transformed as argument to `df`." + ) + + if data is not None: + df = data + warnings.warn( + "Providing the dataframe via the `data` argument is deprecated and " + "will be removed in a future version. Please pass your dataframe " + "as positional argument instead.", + DeprecationWarning, + ) + + # Mypy does not infer from the above that `df` must be a dataframe here + assert isinstance(df, pd.DataFrame) + + if allow_extra is None: + allow_extra = True + if set(df.columns) - {p.name for p in self.targets}: + warnings.warn( + "For backward compatibility, the new `allow_extra` flag is set " + "to `True` when left unspecified. However, this behavior will be " + "changed in a future version. If you want to invoke the old " + "behavior, please explicitly set `allow_extra=True`.", + DeprecationWarning, + ) + # <<<<<<<<<< Deprecation + + # Extract the relevant part of the dataframe + targets = get_transform_objects( + df, self.targets, allow_missing=allow_missing, allow_extra=allow_extra + ) + transformed = df[[t.name for t in targets]].copy() + + # Transform all targets individually + for target in self.targets: + transformed[target.name] = target.transform(df[target.name]) + # All values in transformed become "closer to 1, the better" here + # TODO: for non-numerical targets? for MODE="MATCH"? + + # Transform threshold values for each target + _threshold_values_transformed = list(self.targets_threshold_values) + # TODO: typecasting happening + + def transform_threshold_value(x: float, target: NumericalTarget) -> float: + """Transform the threshold value using the target's transform method.""" + return target.transform(pd.Series([x])).values[0] + + # TODO: for non-numerical targets? for MODE="MATCH"? + + for target, threshold_type, threshold_value in zip( + targets, self.targets_threshold_types, self.targets_threshold_values + ): + # Invert maximization problems to minimization problems + transformed[target.name] = 1.0 - transformed[target.name] + # TODO: for non-numerical targets? for MODE="MATCH"? + _threshold_values_transformed[targets.index(target)] = threshold_value + # Invert the threshold value if it is an absolute threshold + if threshold_type == ThresholdType.ABSOLUTE: + _threshold_values_transformed[targets.index(target)] = ( + 1.0 - transform_threshold_value(threshold_value, target) + ) + # TODO: everything becomes a minimization problem, meaning value < threshold + + # Shift objectives and thresholds + shifted_values, shifted_thresholds = self._shift( + transformed, _threshold_values_transformed + ) + # TODO: caching? + + # Scalarize the shifted targets into CHIMERA merit values + vals = self._scalarize(shifted_values, shifted_thresholds) + # TODO: How do we intepretate this value? Examples + clarification + # TODO: Is normalization needed? Is this closer to 1 the better or the opposite? + # TODO: What happens if we reformulate this to a maximization problem? + + # Store the total Chimera merit in a dataframe column + transformed = pd.DataFrame({"Merit": vals}, index=transformed.index) + + return transformed + + +# Collect leftover original slotted classes processed by `attrs.define` +gc.collect() diff --git a/baybe/objectives/enum.py b/baybe/objectives/enum.py index 9981e9c9d..bf904c528 100644 --- a/baybe/objectives/enum.py +++ b/baybe/objectives/enum.py @@ -11,3 +11,6 @@ class Scalarizer(Enum): GEOM_MEAN = "GEOM_MEAN" """Geometric mean.""" + + CHIMERA = "CHIMERA" + """Chimera scalarizer.""" From d10e65c59898e5a68552819fa989f353930ee501 Mon Sep 17 00:00:00 2001 From: Shmg Date: Fri, 3 Jan 2025 11:33:47 +0100 Subject: [PATCH 2/3] Update acquisition for ChimeraObjective --- baybe/acquisition/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baybe/acquisition/base.py b/baybe/acquisition/base.py index 23cb135e8..7ace42ce8 100644 --- a/baybe/acquisition/base.py +++ b/baybe/acquisition/base.py @@ -129,7 +129,7 @@ def to_botorch( bo_surrogate.posterior(train_x).mean.max().item() ) case ChimeraObjective(): - # TODO: for now minimization + # Minimize the Chimera merits if "best_f" in signature_params: additional_params["best_f"] = ( bo_surrogate.posterior(train_x).mean.min().item() From df26cc35d226c474f4fb3cfee1db2bab41dc33fd Mon Sep 17 00:00:00 2001 From: Shmg Date: Fri, 3 Jan 2025 14:06:24 +0100 Subject: [PATCH 3/3] Remove chimera from desirability scalarization mechanism --- baybe/objectives/enum.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/baybe/objectives/enum.py b/baybe/objectives/enum.py index bf904c528..9981e9c9d 100644 --- a/baybe/objectives/enum.py +++ b/baybe/objectives/enum.py @@ -11,6 +11,3 @@ class Scalarizer(Enum): GEOM_MEAN = "GEOM_MEAN" """Geometric mean.""" - - CHIMERA = "CHIMERA" - """Chimera scalarizer."""