Skip to content

Commit

Permalink
Fix issue with stuff not being maximized (#233)
Browse files Browse the repository at this point in the history
  • Loading branch information
marcosfelt authored Feb 8, 2023
1 parent f4df0ee commit 4ccdddd
Showing 1 changed file with 23 additions and 24 deletions.
47 changes: 23 additions & 24 deletions summit/strategies/tsemo.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,6 @@ def __init__(self, domain, transform=None, **kwargs):
):
self.input_columns += [c[0] for c in v.ds.columns]

# Kernel
self.kernel = kwargs.get("kernel", Exponential)

# Spectral sampling settings
self.n_spectral_points = kwargs.get("n_spectral_points", 1500)
Expand Down Expand Up @@ -179,7 +177,7 @@ def suggest_experiments(self, num_experiments, prev_res: DataSet = None, **kwarg
self.all_experiments = prev_res
elif prev_res is not None and self.all_experiments is not None:
self.all_experiments = pd.concat([self.all_experiments, prev_res], axis=0)

if self.all_experiments.shape[0] <= 3:
lhs = LHS(self.domain, categorical_method=cat_method)
self.iterations += 1
Expand Down Expand Up @@ -235,25 +233,25 @@ def suggest_experiments(self, num_experiments, prev_res: DataSet = None, **kwarg
if (self.domain.num_continuous_dimensions() == 0) and (
self.domain.num_categorical_variables() == 1
):
X, y = self._categorical_enumerate(models)
X, yhat = self._categorical_enumerate(models)
# Mixed domains
elif self.categorical_combos is not None and len(self.input_columns) > 1:
X, y = self._nsga_optimize_mixed(models)
X, yhat = self._nsga_optimize_mixed(models)
# Continous domains
elif self.categorical_combos is None and len(self.input_columns) > 0:
X, y = self._nsga_optimize(models)
X, yhat = self._nsga_optimize(models)

# Return if no suggestiosn found
if X.shape[0] == 0 and y.shape[0] == 0:
if X.shape[0] == 0 and yhat.shape[0] == 0:
self.logger.warning("No suggestions found.")
self.iterations += 1
return None

# Select points that give maximum hypervolume improvement
self.hv_imp, indices = self._select_max_hvi(outputs, y, num_experiments)
self.hv_imp, indices = self._select_max_hvi(y=outputs, yhat=yhat, num_evals=num_experiments)

# Join to get single dataset with inputs and outputs and get suggestion
result = X.join(y)
result = X.join(yhat)
result = result.iloc[indices, :]

# Do any necessary transformations back
Expand Down Expand Up @@ -296,6 +294,9 @@ def _nsga_optimize(self, models):
y = np.atleast_2d(self.internal_res.F).tolist()
X = DataSet(X, columns=problem.X_columns)
y = DataSet(y, columns=[v.name for v in self.domain.output_variables])
for v in self.domain.output_variables:
if v.maximize:
y[v.name] = -y[v.name]
return X, y

def _nsga_optimize_mixed(self, models):
Expand Down Expand Up @@ -409,7 +410,7 @@ def from_dict(cls, d):
tsemo.all_experiments = DataSet.from_dict(ae)
return tsemo

def _select_max_hvi(self, y, samples, num_evals=1):
def _select_max_hvi(self, y, yhat, num_evals=1):
"""Returns the point(s) that maximimize hypervolume improvement
Parameters
Expand All @@ -426,18 +427,17 @@ def _select_max_hvi(self, y, samples, num_evals=1):
and the indices of the corresponding points in samples
"""
samples_original, samples_next = samples.copy(), samples.copy()
samples = samples.copy()
yhat = yhat.copy()
y = y.copy()

# Set up maximization and minimization
for v in self.domain.variables:
if v.is_objective and v.maximize:
for v in self.domain.output_variables:
if v.maximize:
y[v.name] = -1 * y[v.name]
samples[v.name] = -1 * samples[v.name]
yhat[v.name] = -1 * yhat[v.name]

# samples, mean, std = samples.standardize(return_mean=True, return_std=True)
samples = samples.data_to_numpy()
yhat = yhat.data_to_numpy()
Ynew = y.data_to_numpy()

# Reference
Expand All @@ -447,12 +447,12 @@ def _select_max_hvi(self, y, samples, num_evals=1):
)

indices = []
n = samples.shape[1]
mask = np.ones(samples.shape[0], dtype=bool)
samples_indices = np.arange(0, samples.shape[0])
n = yhat.shape[1]
mask = np.ones(yhat.shape[0], dtype=bool)
samples_indices = np.arange(0, yhat.shape[0])

for i in range(num_evals):
masked_samples = samples[mask, :]
masked_samples = yhat[mask, :]
Yfront, _ = pareto_efficient(Ynew, maximize=False)
if len(Yfront) == 0:
raise ValueError("Pareto front length too short")
Expand All @@ -474,7 +474,7 @@ def _select_max_hvi(self, y, samples, num_evals=1):

# Housekeeping: find the max HvI point and mask out for next round
original_index = samples_indices[mask][masked_index]
new_point = samples[original_index, :].reshape(1, n)
new_point = yhat[original_index, :].reshape(1, n)
Ynew = np.append(Ynew, new_point, axis=0)
mask[original_index] = False
indices.append(original_index)
Expand All @@ -486,9 +486,8 @@ def _select_max_hvi(self, y, samples, num_evals=1):
hv_imp = 0
else:
# Total hypervolume improvement
# Includes all points added to batch (hvY + last hv_improvement)
# Subtracts hypervolume without any points added (hvY0)
hv_imp = hv_improvement[masked_index] + hvY - hvY0
Yfront, _ = pareto_efficient(Ynew, maximize=False)
hv_imp = hypervolume(Yfront, r) - hvY0
return hv_imp, indices


Expand Down

0 comments on commit 4ccdddd

Please sign in to comment.