Skip to content

Commit

Permalink
gp output scaler (#309)
Browse files Browse the repository at this point in the history
Co-authored-by: Simon Sung <[email protected]>
  • Loading branch information
simonsung06 and Simon Sung authored Dec 7, 2023
1 parent cb12051 commit 094b320
Show file tree
Hide file tree
Showing 17 changed files with 217 additions and 86 deletions.
7 changes: 2 additions & 5 deletions bofire/data_models/surrogates/fully_bayesian.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,14 @@

from pydantic import conint, validator

from bofire.data_models.surrogates.botorch import BotorchSurrogate
from bofire.data_models.surrogates.scaler import ScalerEnum
from bofire.data_models.surrogates.trainable import TrainableSurrogate
from bofire.data_models.surrogates.trainable_botorch import TrainableBotorchSurrogate


class SaasSingleTaskGPSurrogate(BotorchSurrogate, TrainableSurrogate):
class SaasSingleTaskGPSurrogate(TrainableBotorchSurrogate):
type: Literal["SaasSingleTaskGPSurrogate"] = "SaasSingleTaskGPSurrogate"
warmup_steps: conint(ge=1) = 256 # type: ignore
num_samples: conint(ge=1) = 128 # type: ignore
thinning: conint(ge=1) = 16 # type: ignore
scaler: ScalerEnum = ScalerEnum.NORMALIZE

@validator("thinning")
def validate_thinning(cls, value, values):
Expand Down
7 changes: 2 additions & 5 deletions bofire/data_models/surrogates/linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,11 @@
from bofire.data_models.priors.api import BOTORCH_NOISE_PRIOR, AnyPrior

# from bofire.data_models.strategies.api import FactorialStrategy
from bofire.data_models.surrogates.botorch import BotorchSurrogate
from bofire.data_models.surrogates.scaler import ScalerEnum
from bofire.data_models.surrogates.trainable import TrainableSurrogate
from bofire.data_models.surrogates.trainable_botorch import TrainableBotorchSurrogate


class LinearSurrogate(BotorchSurrogate, TrainableSurrogate):
class LinearSurrogate(TrainableBotorchSurrogate):
type: Literal["LinearSurrogate"] = "LinearSurrogate"

kernel: LinearKernel = Field(default_factory=lambda: LinearKernel())
noise_prior: AnyPrior = Field(default_factory=lambda: BOTORCH_NOISE_PRIOR())
scaler: ScalerEnum = ScalerEnum.NORMALIZE
7 changes: 2 additions & 5 deletions bofire/data_models/surrogates/mixed_single_task_gp.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,17 @@
HammondDistanceKernel,
MaternKernel,
)
from bofire.data_models.surrogates.botorch import BotorchSurrogate
from bofire.data_models.surrogates.single_task_gp import ScalerEnum
from bofire.data_models.surrogates.trainable import TrainableSurrogate
from bofire.data_models.surrogates.trainable_botorch import TrainableBotorchSurrogate


class MixedSingleTaskGPSurrogate(BotorchSurrogate, TrainableSurrogate):
class MixedSingleTaskGPSurrogate(TrainableBotorchSurrogate):
type: Literal["MixedSingleTaskGPSurrogate"] = "MixedSingleTaskGPSurrogate"
continuous_kernel: AnyContinuousKernel = Field(
default_factory=lambda: MaternKernel(ard=True, nu=2.5)
)
categorical_kernel: AnyCategoricalKernal = Field(
default_factory=lambda: HammondDistanceKernel(ard=True)
)
scaler: ScalerEnum = ScalerEnum.NORMALIZE

@validator("input_preprocessing_specs")
def validate_categoricals(cls, v, values):
Expand Down
28 changes: 4 additions & 24 deletions bofire/data_models/surrogates/mlp.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
from typing import Annotated, Literal, Sequence

from pydantic import Field, validator
from pydantic import Field

from bofire.data_models.surrogates.botorch import BotorchSurrogate
from bofire.data_models.surrogates.scaler import ScalerEnum
from bofire.data_models.surrogates.trainable import TrainableSurrogate
from bofire.data_models.surrogates.trainable_botorch import TrainableBotorchSurrogate


class MLPEnsemble(BotorchSurrogate, TrainableSurrogate):
class MLPEnsemble(TrainableBotorchSurrogate):
type: Literal["MLPEnsemble"] = "MLPEnsemble"
n_estimators: Annotated[int, Field(ge=1)] = 5
hidden_layer_sizes: Sequence = (100,)
Expand All @@ -19,23 +18,4 @@ class MLPEnsemble(BotorchSurrogate, TrainableSurrogate):
weight_decay: Annotated[float, Field(ge=0.0)] = 0.0
subsample_fraction: Annotated[float, Field(gt=0.0)] = 1.0
shuffle: bool = True
scaler: ScalerEnum = ScalerEnum.NORMALIZE
output_scaler: ScalerEnum = ScalerEnum.STANDARDIZE

@validator("output_scaler")
def validate_output_scaler(cls, output_scaler):
"""validates that output_scaler is a valid type
Args:
output_scaler (ScalerEnum): Scaler used to transform the output
Raises:
ValueError: when ScalerEnum.NORMALIZE is used
Returns:
ScalerEnum: Scaler used to transform the output
"""
if output_scaler == ScalerEnum.NORMALIZE:
raise ValueError("Normalize is not supported as an output transform.")

return output_scaler
scaler: ScalerEnum = ScalerEnum.STANDARDIZE
7 changes: 2 additions & 5 deletions bofire/data_models/surrogates/polynomial.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,14 @@
from bofire.data_models.priors.api import BOTORCH_NOISE_PRIOR, AnyPrior

# from bofire.data_models.strategies.api import FactorialStrategy
from bofire.data_models.surrogates.botorch import BotorchSurrogate
from bofire.data_models.surrogates.scaler import ScalerEnum
from bofire.data_models.surrogates.trainable import TrainableSurrogate
from bofire.data_models.surrogates.trainable_botorch import TrainableBotorchSurrogate


class PolynomialSurrogate(BotorchSurrogate, TrainableSurrogate):
class PolynomialSurrogate(TrainableBotorchSurrogate):
type: Literal["PolynomialSurrogate"] = "PolynomialSurrogate"

kernel: PolynomialKernel = Field(default_factory=lambda: PolynomialKernel(power=2))
noise_prior: AnyPrior = Field(default_factory=lambda: BOTORCH_NOISE_PRIOR())
scaler: ScalerEnum = ScalerEnum.NORMALIZE

@staticmethod
def from_power(power: int, inputs: Inputs, outputs: Outputs):
Expand Down
8 changes: 5 additions & 3 deletions bofire/data_models/surrogates/random_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
from pydantic import Field
from typing_extensions import Annotated

from bofire.data_models.surrogates.botorch import BotorchSurrogate
from bofire.data_models.surrogates.trainable import TrainableSurrogate
from bofire.data_models.surrogates.scaler import ScalerEnum
from bofire.data_models.surrogates.trainable_botorch import TrainableBotorchSurrogate


class RandomForestSurrogate(BotorchSurrogate, TrainableSurrogate):
class RandomForestSurrogate(TrainableBotorchSurrogate):
type: Literal["RandomForestSurrogate"] = "RandomForestSurrogate"

# hyperparams passed down to `RandomForestRegressor`
Expand All @@ -30,3 +30,5 @@ class RandomForestSurrogate(BotorchSurrogate, TrainableSurrogate):
random_state: Optional[int] = None
ccp_alpha: Annotated[float, Field(ge=0)] = 0.0
max_samples: Optional[Union[int, float]] = None
scaler: ScalerEnum = ScalerEnum.IDENTITY
output_scaler: ScalerEnum = ScalerEnum.IDENTITY
8 changes: 3 additions & 5 deletions bofire/data_models/surrogates/single_task_gp.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,8 @@
)

# from bofire.data_models.strategies.api import FactorialStrategy
from bofire.data_models.surrogates.botorch import BotorchSurrogate
from bofire.data_models.surrogates.scaler import ScalerEnum
from bofire.data_models.surrogates.trainable import Hyperconfig, TrainableSurrogate
from bofire.data_models.surrogates.trainable import Hyperconfig
from bofire.data_models.surrogates.trainable_botorch import TrainableBotorchSurrogate


class SingleTaskGPHyperconfig(Hyperconfig):
Expand Down Expand Up @@ -92,7 +91,7 @@ def matern_15(ard: bool, lengthscale_prior: AnyPrior) -> MaternKernel:
raise ValueError(f"Kernel {hyperparameters.kernel} not known.")


class SingleTaskGPSurrogate(BotorchSurrogate, TrainableSurrogate):
class SingleTaskGPSurrogate(TrainableBotorchSurrogate):
type: Literal["SingleTaskGPSurrogate"] = "SingleTaskGPSurrogate"

kernel: AnyKernel = Field(
Expand All @@ -106,7 +105,6 @@ class SingleTaskGPSurrogate(BotorchSurrogate, TrainableSurrogate):
)
)
noise_prior: AnyPrior = Field(default_factory=lambda: BOTORCH_NOISE_PRIOR())
scaler: ScalerEnum = ScalerEnum.NORMALIZE
hyperconfig: Optional[SingleTaskGPHyperconfig] = Field(
default_factory=lambda: SingleTaskGPHyperconfig()
)
5 changes: 2 additions & 3 deletions bofire/data_models/surrogates/tanimoto_gp.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,11 @@
BOTORCH_SCALE_PRIOR,
AnyPrior,
)
from bofire.data_models.surrogates.botorch import BotorchSurrogate
from bofire.data_models.surrogates.scaler import ScalerEnum
from bofire.data_models.surrogates.trainable import TrainableSurrogate
from bofire.data_models.surrogates.trainable_botorch import TrainableBotorchSurrogate


class TanimotoGPSurrogate(BotorchSurrogate, TrainableSurrogate):
class TanimotoGPSurrogate(TrainableBotorchSurrogate):
type: Literal["TanimotoGPSurrogate"] = "TanimotoGPSurrogate"

kernel: AnyKernel = Field(
Expand Down
28 changes: 28 additions & 0 deletions bofire/data_models/surrogates/trainable_botorch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from pydantic import validator

from bofire.data_models.surrogates.botorch import BotorchSurrogate
from bofire.data_models.surrogates.scaler import ScalerEnum
from bofire.data_models.surrogates.trainable import TrainableSurrogate


class TrainableBotorchSurrogate(BotorchSurrogate, TrainableSurrogate):
scaler: ScalerEnum = ScalerEnum.NORMALIZE
output_scaler: ScalerEnum = ScalerEnum.STANDARDIZE

@validator("output_scaler")
def validate_output_scaler(cls, output_scaler):
"""validates that output_scaler is a valid type
Args:
output_scaler (ScalerEnum): Scaler used to transform the output
Raises:
ValueError: when ScalerEnum.NORMALIZE is used
Returns:
ScalerEnum: Scaler used to transform the output
"""
if output_scaler == ScalerEnum.NORMALIZE:
raise ValueError("Normalize is not supported as an output transform.")

return output_scaler
6 changes: 5 additions & 1 deletion bofire/surrogates/fully_bayesian.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from bofire.data_models.enum import OutputFilteringEnum
from bofire.data_models.surrogates.api import SaasSingleTaskGPSurrogate as DataModel
from bofire.data_models.surrogates.scaler import ScalerEnum
from bofire.surrogates.botorch import BotorchSurrogate
from bofire.surrogates.single_task_gp import get_scaler
from bofire.surrogates.trainable import TrainableSurrogate
Expand All @@ -25,6 +26,7 @@ def __init__(
self.num_samples = data_model.num_samples
self.thinning = data_model.thinning
self.scaler = data_model.scaler
self.output_scaler = data_model.output_scaler
super().__init__(data_model=data_model, **kwargs)

model: Optional[SaasFullyBayesianSingleTaskGP] = None
Expand All @@ -42,7 +44,9 @@ def _fit(self, X: pd.DataFrame, Y: pd.DataFrame, disable_progbar: bool = True):
self.model = SaasFullyBayesianSingleTaskGP(
train_X=tX,
train_Y=tY,
outcome_transform=Standardize(m=1),
outcome_transform=Standardize(m=1)
if self.output_scaler == ScalerEnum.STANDARDIZE
else None,
input_transform=scaler,
)
fit_fully_bayesian_model_nuts(
Expand Down
6 changes: 5 additions & 1 deletion bofire/surrogates/mixed_single_task_gp.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import bofire.kernels.api as kernels
from bofire.data_models.enum import CategoricalEncodingEnum, OutputFilteringEnum
from bofire.data_models.surrogates.api import MixedSingleTaskGPSurrogate as DataModel
from bofire.data_models.surrogates.scaler import ScalerEnum
from bofire.surrogates.botorch import BotorchSurrogate
from bofire.surrogates.single_task_gp import get_scaler
from bofire.surrogates.trainable import TrainableSurrogate
Expand All @@ -27,6 +28,7 @@ def __init__(
self.continuous_kernel = data_model.continuous_kernel
self.categorical_kernel = data_model.categorical_kernel
self.scaler = data_model.scaler
self.output_scaler = data_model.output_scaler
super().__init__(data_model=data_model, **kwargs)

model: Optional[botorch.models.MixedSingleTaskGP] = None
Expand Down Expand Up @@ -81,7 +83,9 @@ def _fit(self, X: pd.DataFrame, Y: pd.DataFrame):
cat_dims=cat_dims,
# cont_kernel_factory=self.continuous_kernel.to_gpytorch,
cont_kernel_factory=partial(kernels.map, data_model=self.continuous_kernel),
outcome_transform=Standardize(m=tY.shape[-1]),
outcome_transform=Standardize(m=tY.shape[-1])
if self.output_scaler == ScalerEnum.STANDARDIZE
else None,
input_transform=tf,
)
mll = ExactMarginalLogLikelihood(self.model.likelihood, self.model)
Expand Down
49 changes: 40 additions & 9 deletions bofire/surrogates/random_forest.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
import codecs
import pickle
import base64
import io
from typing import Optional

import numpy as np
import pandas as pd
import torch
from botorch.models.ensemble import EnsembleModel
from botorch.models.transforms.outcome import OutcomeTransform, Standardize
from sklearn.ensemble import RandomForestRegressor
from sklearn.utils.validation import check_is_fitted
from torch import Tensor

from bofire.data_models.enum import OutputFilteringEnum
from bofire.data_models.surrogates.api import RandomForestSurrogate as DataModel
from bofire.data_models.surrogates.scaler import ScalerEnum
from bofire.surrogates.botorch import BotorchSurrogate
from bofire.surrogates.single_task_gp import get_scaler
from bofire.surrogates.trainable import TrainableSurrogate
from bofire.utils.torch_tools import tkwargs

Expand All @@ -22,7 +25,11 @@ class _RandomForest(EnsembleModel):
Predictions of the individual trees are interpreted as uncertainty.
"""

def __init__(self, rf: RandomForestRegressor):
def __init__(
self,
rf: RandomForestRegressor,
output_scaler: Optional[OutcomeTransform] = None,
):
"""Constructs the model.
Args:
Expand All @@ -33,6 +40,8 @@ def __init__(self, rf: RandomForestRegressor):
raise ValueError("`rf` is not a sklearn RandomForestRegressor.")
check_is_fitted(rf)
self._rf = rf
if output_scaler is not None:
self.outcome_transform = output_scaler

def forward(self, X: Tensor):
r"""Compute the model output at X.
Expand Down Expand Up @@ -97,6 +106,8 @@ def __init__(
self.random_state = data_model.random_state
self.ccp_alpha = data_model.ccp_alpha
self.max_samples = data_model.max_samples
self.scaler = data_model.scaler
self.output_scaler = data_model.output_scaler
super().__init__(data_model=data_model, **kwargs)

_output_filtering: OutputFilteringEnum = OutputFilteringEnum.ALL
Expand All @@ -110,6 +121,22 @@ def _fit(self, X: pd.DataFrame, Y: pd.DataFrame):
Y (pd.DataFrame): Dataframe with Y values.
"""
transformed_X = self.inputs.transform(X, self.input_preprocessing_specs)

scaler = get_scaler(self.inputs, self.input_preprocessing_specs, self.scaler, X)
tX = (
scaler.transform(torch.from_numpy(transformed_X.values)).numpy()
if scaler is not None
else transformed_X.values
)

if self.output_scaler == ScalerEnum.STANDARDIZE:
output_scaler = Standardize(m=Y.shape[-1])
ty = torch.from_numpy(Y.values).to(**tkwargs)
ty = output_scaler(ty)[0].numpy()
else:
output_scaler = None
ty = Y.values

rf = RandomForestRegressor(
n_estimators=self.n_estimators,
criterion=self.criterion,
Expand All @@ -126,15 +153,19 @@ def _fit(self, X: pd.DataFrame, Y: pd.DataFrame):
ccp_alpha=self.ccp_alpha,
max_samples=self.max_samples,
)
rf.fit(X=transformed_X.values, y=Y.values.ravel())
self.model = _RandomForest(rf=rf)
rf.fit(X=tX, y=ty.ravel())

self.model = _RandomForest(rf=rf, output_scaler=output_scaler)
if scaler is not None:
self.model.input_transform = scaler

def _dumps(self) -> str:
"""Dumps the random forest to a string via pickle as this is not directly json serializable."""
return codecs.encode(pickle.dumps(self.model._rf), "base64").decode() # type: ignore
buffer = io.BytesIO()
torch.save(self.model, buffer)
return base64.b64encode(buffer.getvalue()).decode()

def loads(self, data: str):
"""Loads the actual random forest from a base64 encoded pickle bytes object and writes it to the `model` attribute."""
self.model = _RandomForest(
rf=pickle.loads(codecs.decode(data.encode(), "base64"))
)
buffer = io.BytesIO(base64.b64decode(data.encode()))
self.model = torch.load(buffer)
5 changes: 4 additions & 1 deletion bofire/surrogates/single_task_gp.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ def __init__(
):
self.kernel = data_model.kernel
self.scaler = data_model.scaler
self.output_scaler = data_model.output_scaler
self.noise_prior = data_model.noise_prior
super().__init__(data_model=data_model, **kwargs)

Expand All @@ -115,7 +116,9 @@ def _fit(self, X: pd.DataFrame, Y: pd.DataFrame):
active_dims=list(range(tX.shape[1])),
ard_num_dims=1, # this keyword is ingored
),
outcome_transform=Standardize(m=tY.shape[-1]),
outcome_transform=Standardize(m=tY.shape[-1])
if self.output_scaler == ScalerEnum.STANDARDIZE
else None,
input_transform=scaler,
)

Expand Down
Loading

0 comments on commit 094b320

Please sign in to comment.