-
Notifications
You must be signed in to change notification settings - Fork 30
/
Copy pathscratch.py
132 lines (115 loc) · 4.45 KB
/
scratch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import pandas as pd
import bofire.strategies.api as strategies
import bofire.surrogates.api as surrogates
from bofire.data_models.domain import api as domain_api
from bofire.data_models.features import api as features_api
from bofire.data_models.kernels import api as kernels_api
from bofire.data_models.molfeatures import api as molfeatures_api
from bofire.data_models.priors.api import HVARFNER_LENGTHSCALE_PRIOR
from bofire.data_models.strategies import api as strategies_api
from bofire.data_models.surrogates import api as surrogates_api
def test_SingleTaskGPModel_mixed_features():
"""test that we can use a single task gp with mixed features"""
inputs = domain_api.Inputs(
features=[
features_api.ContinuousInput(
key=f"x_{i+1}",
bounds=(-4, 4),
)
for i in range(2)
]
+ [
features_api.CategoricalInput(key="x_cat_1", categories=["mama", "papa"]),
features_api.CategoricalInput(key="x_cat_2", categories=["cat", "dog"]),
]
)
outputs = domain_api.Outputs(features=[features_api.ContinuousOutput(key="y")])
experiments = inputs.sample(n=10)
experiments.eval("y=((x_1**2 + x_2 - 11)**2+(x_1 + x_2**2 -7)**2)", inplace=True)
experiments.loc[experiments.x_cat_1 == "mama", "y"] *= 5.0
experiments.loc[experiments.x_cat_1 == "papa", "y"] /= 2.0
experiments.loc[experiments.x_cat_2 == "cat", "y"] *= -2.0
experiments.loc[experiments.x_cat_2 == "dog", "y"] /= -5.0
experiments["valid_y"] = 1
gp_data = surrogates_api.SingleTaskGPSurrogate(
inputs=inputs,
outputs=outputs,
kernel=kernels_api.AdditiveKernel(
kernels=[
kernels_api.HammingDistanceKernel(
ard=True,
features=["x_cat_1", "x_cat_2"],
),
kernels_api.RBFKernel(
ard=True,
lengthscale_prior=HVARFNER_LENGTHSCALE_PRIOR(),
features=[f"x_{i+1}" for i in range(2)],
),
]
),
)
gp_mapped = surrogates.map(gp_data)
assert hasattr(gp_mapped, "fit")
assert len(gp_mapped.kernel.kernels) == 2
assert gp_mapped.kernel.kernels[0].features == ["x_cat_1", "x_cat_2"]
assert gp_mapped.kernel.kernels[1].features == ["x_1", "x_2"]
gp_mapped.fit(experiments)
pred = gp_mapped.predict(experiments)
assert pred.shape == (10, 2)
assert gp_mapped.model.covar_module.kernels[0].active_dims.tolist() == [2, 3, 4, 5]
assert gp_mapped.model.covar_module.kernels[1].active_dims.tolist() == [0, 1]
if __name__ == "__main__":
test_SingleTaskGPModel_mixed_features()
import sys
sys.exit(0)
domain = domain_api.Domain(
inputs=domain_api.Inputs(
features=[
features_api.ContinuousInput(key="x1", bounds=(-1, 1)),
features_api.ContinuousInput(key="x2", bounds=(-1, 1)),
features_api.CategoricalMolecularInput(
key="mol", categories=["CO", "CCO", "CCCO"]
),
]
),
outputs=domain_api.Outputs(features=[features_api.ContinuousOutput(key="f")]),
)
strategy = strategies.map(
strategies_api.SoboStrategy(
domain=domain,
surrogate_specs=surrogates_api.BotorchSurrogates(
surrogates=[
surrogates_api.SingleTaskGPSurrogate(
inputs=domain.inputs,
outputs=domain.outputs,
input_preprocessing_specs={
"mol": molfeatures_api.Fingerprints(),
},
kernel=kernels_api.AdditiveKernel(
kernels=[
kernels_api.RBFKernel(
ard=True,
lengthscale_prior=HVARFNER_LENGTHSCALE_PRIOR(),
features=["x1", "x2"],
),
kernels_api.TanimotoKernel(
features=["mol"],
),
]
),
)
]
),
)
)
strategy.tell(
experiments=pd.DataFrame(
[
{"x1": 0.2, "x2": 0.4, "mol": "CO", "f": 1.0},
{"x1": 0.4, "x2": 0.2, "mol": "CCO", "f": 2.0},
{"x1": 0.6, "x2": 0.6, "mol": "CCCO", "f": 3.0},
]
)
)
candidates = strategy.ask(candidate_count=1)
print(candidates)