Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gp output scaler #309

Merged
merged 10 commits into from
Dec 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 2 additions & 5 deletions bofire/data_models/surrogates/fully_bayesian.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,14 @@

from pydantic import conint, validator

from bofire.data_models.surrogates.botorch import BotorchSurrogate
from bofire.data_models.surrogates.scaler import ScalerEnum
from bofire.data_models.surrogates.trainable import TrainableSurrogate
from bofire.data_models.surrogates.trainable_botorch import TrainableBotorchSurrogate


class SaasSingleTaskGPSurrogate(BotorchSurrogate, TrainableSurrogate):
class SaasSingleTaskGPSurrogate(TrainableBotorchSurrogate):
type: Literal["SaasSingleTaskGPSurrogate"] = "SaasSingleTaskGPSurrogate"
warmup_steps: conint(ge=1) = 256 # type: ignore
num_samples: conint(ge=1) = 128 # type: ignore
thinning: conint(ge=1) = 16 # type: ignore
scaler: ScalerEnum = ScalerEnum.NORMALIZE

@validator("thinning")
def validate_thinning(cls, value, values):
Expand Down
7 changes: 2 additions & 5 deletions bofire/data_models/surrogates/linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,11 @@
from bofire.data_models.priors.api import BOTORCH_NOISE_PRIOR, AnyPrior

# from bofire.data_models.strategies.api import FactorialStrategy
from bofire.data_models.surrogates.botorch import BotorchSurrogate
from bofire.data_models.surrogates.scaler import ScalerEnum
from bofire.data_models.surrogates.trainable import TrainableSurrogate
from bofire.data_models.surrogates.trainable_botorch import TrainableBotorchSurrogate


class LinearSurrogate(BotorchSurrogate, TrainableSurrogate):
class LinearSurrogate(TrainableBotorchSurrogate):
type: Literal["LinearSurrogate"] = "LinearSurrogate"

kernel: LinearKernel = Field(default_factory=lambda: LinearKernel())
noise_prior: AnyPrior = Field(default_factory=lambda: BOTORCH_NOISE_PRIOR())
scaler: ScalerEnum = ScalerEnum.NORMALIZE
7 changes: 2 additions & 5 deletions bofire/data_models/surrogates/mixed_single_task_gp.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,17 @@
HammondDistanceKernel,
MaternKernel,
)
from bofire.data_models.surrogates.botorch import BotorchSurrogate
from bofire.data_models.surrogates.single_task_gp import ScalerEnum
from bofire.data_models.surrogates.trainable import TrainableSurrogate
from bofire.data_models.surrogates.trainable_botorch import TrainableBotorchSurrogate


class MixedSingleTaskGPSurrogate(BotorchSurrogate, TrainableSurrogate):
class MixedSingleTaskGPSurrogate(TrainableBotorchSurrogate):
type: Literal["MixedSingleTaskGPSurrogate"] = "MixedSingleTaskGPSurrogate"
continuous_kernel: AnyContinuousKernel = Field(
default_factory=lambda: MaternKernel(ard=True, nu=2.5)
)
categorical_kernel: AnyCategoricalKernal = Field(
default_factory=lambda: HammondDistanceKernel(ard=True)
)
scaler: ScalerEnum = ScalerEnum.NORMALIZE

@validator("input_preprocessing_specs")
def validate_categoricals(cls, v, values):
Expand Down
28 changes: 4 additions & 24 deletions bofire/data_models/surrogates/mlp.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
from typing import Annotated, Literal, Sequence

from pydantic import Field, validator
from pydantic import Field

from bofire.data_models.surrogates.botorch import BotorchSurrogate
from bofire.data_models.surrogates.scaler import ScalerEnum
from bofire.data_models.surrogates.trainable import TrainableSurrogate
from bofire.data_models.surrogates.trainable_botorch import TrainableBotorchSurrogate


class MLPEnsemble(BotorchSurrogate, TrainableSurrogate):
class MLPEnsemble(TrainableBotorchSurrogate):
type: Literal["MLPEnsemble"] = "MLPEnsemble"
n_estimators: Annotated[int, Field(ge=1)] = 5
hidden_layer_sizes: Sequence = (100,)
Expand All @@ -19,23 +18,4 @@ class MLPEnsemble(BotorchSurrogate, TrainableSurrogate):
weight_decay: Annotated[float, Field(ge=0.0)] = 0.0
subsample_fraction: Annotated[float, Field(gt=0.0)] = 1.0
shuffle: bool = True
scaler: ScalerEnum = ScalerEnum.NORMALIZE
output_scaler: ScalerEnum = ScalerEnum.STANDARDIZE

@validator("output_scaler")
def validate_output_scaler(cls, output_scaler):
"""validates that output_scaler is a valid type

Args:
output_scaler (ScalerEnum): Scaler used to transform the output

Raises:
ValueError: when ScalerEnum.NORMALIZE is used

Returns:
ScalerEnum: Scaler used to transform the output
"""
if output_scaler == ScalerEnum.NORMALIZE:
raise ValueError("Normalize is not supported as an output transform.")

return output_scaler
scaler: ScalerEnum = ScalerEnum.STANDARDIZE
7 changes: 2 additions & 5 deletions bofire/data_models/surrogates/polynomial.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,14 @@
from bofire.data_models.priors.api import BOTORCH_NOISE_PRIOR, AnyPrior

# from bofire.data_models.strategies.api import FactorialStrategy
from bofire.data_models.surrogates.botorch import BotorchSurrogate
from bofire.data_models.surrogates.scaler import ScalerEnum
from bofire.data_models.surrogates.trainable import TrainableSurrogate
from bofire.data_models.surrogates.trainable_botorch import TrainableBotorchSurrogate


class PolynomialSurrogate(BotorchSurrogate, TrainableSurrogate):
class PolynomialSurrogate(TrainableBotorchSurrogate):
type: Literal["PolynomialSurrogate"] = "PolynomialSurrogate"

kernel: PolynomialKernel = Field(default_factory=lambda: PolynomialKernel(power=2))
noise_prior: AnyPrior = Field(default_factory=lambda: BOTORCH_NOISE_PRIOR())
scaler: ScalerEnum = ScalerEnum.NORMALIZE

@staticmethod
def from_power(power: int, inputs: Inputs, outputs: Outputs):
Expand Down
8 changes: 5 additions & 3 deletions bofire/data_models/surrogates/random_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
from pydantic import Field
from typing_extensions import Annotated

from bofire.data_models.surrogates.botorch import BotorchSurrogate
from bofire.data_models.surrogates.trainable import TrainableSurrogate
from bofire.data_models.surrogates.scaler import ScalerEnum
from bofire.data_models.surrogates.trainable_botorch import TrainableBotorchSurrogate


class RandomForestSurrogate(BotorchSurrogate, TrainableSurrogate):
class RandomForestSurrogate(TrainableBotorchSurrogate):
type: Literal["RandomForestSurrogate"] = "RandomForestSurrogate"

# hyperparams passed down to `RandomForestRegressor`
Expand All @@ -30,3 +30,5 @@ class RandomForestSurrogate(BotorchSurrogate, TrainableSurrogate):
random_state: Optional[int] = None
ccp_alpha: Annotated[float, Field(ge=0)] = 0.0
max_samples: Optional[Union[int, float]] = None
scaler: ScalerEnum = ScalerEnum.IDENTITY
output_scaler: ScalerEnum = ScalerEnum.IDENTITY
8 changes: 3 additions & 5 deletions bofire/data_models/surrogates/single_task_gp.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,8 @@
)

# from bofire.data_models.strategies.api import FactorialStrategy
from bofire.data_models.surrogates.botorch import BotorchSurrogate
from bofire.data_models.surrogates.scaler import ScalerEnum
from bofire.data_models.surrogates.trainable import Hyperconfig, TrainableSurrogate
from bofire.data_models.surrogates.trainable import Hyperconfig
from bofire.data_models.surrogates.trainable_botorch import TrainableBotorchSurrogate


class SingleTaskGPHyperconfig(Hyperconfig):
Expand Down Expand Up @@ -92,7 +91,7 @@ def matern_15(ard: bool, lengthscale_prior: AnyPrior) -> MaternKernel:
raise ValueError(f"Kernel {hyperparameters.kernel} not known.")


class SingleTaskGPSurrogate(BotorchSurrogate, TrainableSurrogate):
class SingleTaskGPSurrogate(TrainableBotorchSurrogate):
type: Literal["SingleTaskGPSurrogate"] = "SingleTaskGPSurrogate"

kernel: AnyKernel = Field(
Expand All @@ -106,7 +105,6 @@ class SingleTaskGPSurrogate(BotorchSurrogate, TrainableSurrogate):
)
)
noise_prior: AnyPrior = Field(default_factory=lambda: BOTORCH_NOISE_PRIOR())
scaler: ScalerEnum = ScalerEnum.NORMALIZE
hyperconfig: Optional[SingleTaskGPHyperconfig] = Field(
default_factory=lambda: SingleTaskGPHyperconfig()
)
5 changes: 2 additions & 3 deletions bofire/data_models/surrogates/tanimoto_gp.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,11 @@
BOTORCH_SCALE_PRIOR,
AnyPrior,
)
from bofire.data_models.surrogates.botorch import BotorchSurrogate
from bofire.data_models.surrogates.scaler import ScalerEnum
from bofire.data_models.surrogates.trainable import TrainableSurrogate
from bofire.data_models.surrogates.trainable_botorch import TrainableBotorchSurrogate


class TanimotoGPSurrogate(BotorchSurrogate, TrainableSurrogate):
class TanimotoGPSurrogate(TrainableBotorchSurrogate):
type: Literal["TanimotoGPSurrogate"] = "TanimotoGPSurrogate"

kernel: AnyKernel = Field(
Expand Down
28 changes: 28 additions & 0 deletions bofire/data_models/surrogates/trainable_botorch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from pydantic import validator

from bofire.data_models.surrogates.botorch import BotorchSurrogate
from bofire.data_models.surrogates.scaler import ScalerEnum
from bofire.data_models.surrogates.trainable import TrainableSurrogate


class TrainableBotorchSurrogate(BotorchSurrogate, TrainableSurrogate):
scaler: ScalerEnum = ScalerEnum.NORMALIZE
output_scaler: ScalerEnum = ScalerEnum.STANDARDIZE

@validator("output_scaler")
def validate_output_scaler(cls, output_scaler):
"""validates that output_scaler is a valid type

Args:
output_scaler (ScalerEnum): Scaler used to transform the output

Raises:
ValueError: when ScalerEnum.NORMALIZE is used

Returns:
ScalerEnum: Scaler used to transform the output
"""
if output_scaler == ScalerEnum.NORMALIZE:
raise ValueError("Normalize is not supported as an output transform.")

return output_scaler
6 changes: 5 additions & 1 deletion bofire/surrogates/fully_bayesian.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from bofire.data_models.enum import OutputFilteringEnum
from bofire.data_models.surrogates.api import SaasSingleTaskGPSurrogate as DataModel
from bofire.data_models.surrogates.scaler import ScalerEnum
from bofire.surrogates.botorch import BotorchSurrogate
from bofire.surrogates.single_task_gp import get_scaler
from bofire.surrogates.trainable import TrainableSurrogate
Expand All @@ -25,6 +26,7 @@ def __init__(
self.num_samples = data_model.num_samples
self.thinning = data_model.thinning
self.scaler = data_model.scaler
self.output_scaler = data_model.output_scaler
super().__init__(data_model=data_model, **kwargs)

model: Optional[SaasFullyBayesianSingleTaskGP] = None
Expand All @@ -42,7 +44,9 @@ def _fit(self, X: pd.DataFrame, Y: pd.DataFrame, disable_progbar: bool = True):
self.model = SaasFullyBayesianSingleTaskGP(
train_X=tX,
train_Y=tY,
outcome_transform=Standardize(m=1),
outcome_transform=Standardize(m=1)
if self.output_scaler == ScalerEnum.STANDARDIZE
else None,
input_transform=scaler,
)
fit_fully_bayesian_model_nuts(
Expand Down
6 changes: 5 additions & 1 deletion bofire/surrogates/mixed_single_task_gp.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import bofire.kernels.api as kernels
from bofire.data_models.enum import CategoricalEncodingEnum, OutputFilteringEnum
from bofire.data_models.surrogates.api import MixedSingleTaskGPSurrogate as DataModel
from bofire.data_models.surrogates.scaler import ScalerEnum
from bofire.surrogates.botorch import BotorchSurrogate
from bofire.surrogates.single_task_gp import get_scaler
from bofire.surrogates.trainable import TrainableSurrogate
Expand All @@ -27,6 +28,7 @@ def __init__(
self.continuous_kernel = data_model.continuous_kernel
self.categorical_kernel = data_model.categorical_kernel
self.scaler = data_model.scaler
self.output_scaler = data_model.output_scaler
super().__init__(data_model=data_model, **kwargs)

model: Optional[botorch.models.MixedSingleTaskGP] = None
Expand Down Expand Up @@ -81,7 +83,9 @@ def _fit(self, X: pd.DataFrame, Y: pd.DataFrame):
cat_dims=cat_dims,
# cont_kernel_factory=self.continuous_kernel.to_gpytorch,
cont_kernel_factory=partial(kernels.map, data_model=self.continuous_kernel),
outcome_transform=Standardize(m=tY.shape[-1]),
outcome_transform=Standardize(m=tY.shape[-1])
if self.output_scaler == ScalerEnum.STANDARDIZE
else None,
input_transform=tf,
)
mll = ExactMarginalLogLikelihood(self.model.likelihood, self.model)
Expand Down
49 changes: 40 additions & 9 deletions bofire/surrogates/random_forest.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
import codecs
import pickle
import base64
import io
from typing import Optional

import numpy as np
import pandas as pd
import torch
from botorch.models.ensemble import EnsembleModel
from botorch.models.transforms.outcome import OutcomeTransform, Standardize
from sklearn.ensemble import RandomForestRegressor
from sklearn.utils.validation import check_is_fitted
from torch import Tensor

from bofire.data_models.enum import OutputFilteringEnum
from bofire.data_models.surrogates.api import RandomForestSurrogate as DataModel
from bofire.data_models.surrogates.scaler import ScalerEnum
from bofire.surrogates.botorch import BotorchSurrogate
from bofire.surrogates.single_task_gp import get_scaler
from bofire.surrogates.trainable import TrainableSurrogate
from bofire.utils.torch_tools import tkwargs

Expand All @@ -22,7 +25,11 @@ class _RandomForest(EnsembleModel):
Predictions of the individual trees are interpreted as uncertainty.
"""

def __init__(self, rf: RandomForestRegressor):
def __init__(
self,
rf: RandomForestRegressor,
output_scaler: Optional[OutcomeTransform] = None,
):
"""Constructs the model.

Args:
Expand All @@ -33,6 +40,8 @@ def __init__(self, rf: RandomForestRegressor):
raise ValueError("`rf` is not a sklearn RandomForestRegressor.")
check_is_fitted(rf)
self._rf = rf
if output_scaler is not None:
self.outcome_transform = output_scaler

def forward(self, X: Tensor):
r"""Compute the model output at X.
Expand Down Expand Up @@ -97,6 +106,8 @@ def __init__(
self.random_state = data_model.random_state
self.ccp_alpha = data_model.ccp_alpha
self.max_samples = data_model.max_samples
self.scaler = data_model.scaler
self.output_scaler = data_model.output_scaler
super().__init__(data_model=data_model, **kwargs)

_output_filtering: OutputFilteringEnum = OutputFilteringEnum.ALL
Expand All @@ -110,6 +121,22 @@ def _fit(self, X: pd.DataFrame, Y: pd.DataFrame):
Y (pd.DataFrame): Dataframe with Y values.
"""
transformed_X = self.inputs.transform(X, self.input_preprocessing_specs)

scaler = get_scaler(self.inputs, self.input_preprocessing_specs, self.scaler, X)
tX = (
scaler.transform(torch.from_numpy(transformed_X.values)).numpy()
if scaler is not None
else transformed_X.values
)

if self.output_scaler == ScalerEnum.STANDARDIZE:
output_scaler = Standardize(m=Y.shape[-1])
ty = torch.from_numpy(Y.values).to(**tkwargs)
ty = output_scaler(ty)[0].numpy()
else:
output_scaler = None
ty = Y.values

rf = RandomForestRegressor(
n_estimators=self.n_estimators,
criterion=self.criterion,
Expand All @@ -126,15 +153,19 @@ def _fit(self, X: pd.DataFrame, Y: pd.DataFrame):
ccp_alpha=self.ccp_alpha,
max_samples=self.max_samples,
)
rf.fit(X=transformed_X.values, y=Y.values.ravel())
self.model = _RandomForest(rf=rf)
rf.fit(X=tX, y=ty.ravel())

self.model = _RandomForest(rf=rf, output_scaler=output_scaler)
if scaler is not None:
self.model.input_transform = scaler

def _dumps(self) -> str:
"""Dumps the random forest to a string via pickle as this is not directly json serializable."""
return codecs.encode(pickle.dumps(self.model._rf), "base64").decode() # type: ignore
buffer = io.BytesIO()
torch.save(self.model, buffer)
return base64.b64encode(buffer.getvalue()).decode()

def loads(self, data: str):
"""Loads the actual random forest from a base64 encoded pickle bytes object and writes it to the `model` attribute."""
self.model = _RandomForest(
rf=pickle.loads(codecs.decode(data.encode(), "base64"))
)
buffer = io.BytesIO(base64.b64decode(data.encode()))
self.model = torch.load(buffer)
5 changes: 4 additions & 1 deletion bofire/surrogates/single_task_gp.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ def __init__(
):
self.kernel = data_model.kernel
self.scaler = data_model.scaler
self.output_scaler = data_model.output_scaler
self.noise_prior = data_model.noise_prior
super().__init__(data_model=data_model, **kwargs)

Expand All @@ -115,7 +116,9 @@ def _fit(self, X: pd.DataFrame, Y: pd.DataFrame):
active_dims=list(range(tX.shape[1])),
ard_num_dims=1, # this keyword is ingored
),
outcome_transform=Standardize(m=tY.shape[-1]),
outcome_transform=Standardize(m=tY.shape[-1])
if self.output_scaler == ScalerEnum.STANDARDIZE
else None,
input_transform=scaler,
)

Expand Down
Loading