Skip to content

Commit

Permalink
Add unit test
Browse files Browse the repository at this point in the history
  • Loading branch information
ThomasMeissnerDS committed Oct 16, 2023
1 parent 8655050 commit e008817
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 14 deletions.
35 changes: 21 additions & 14 deletions bluecast/ml_modelling/xgboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
It also calculates class weights for imbalanced datasets. The weights may or may not be used deepending on the
hyperparameter tuning.
"""
from copy import deepcopy
from datetime import datetime
from typing import Dict, Literal, Optional, Tuple

Expand Down Expand Up @@ -97,10 +98,7 @@ def fit(
self.autotune(x_train, x_test, y_train, y_test)
print("Finished hyperparameter tuning")

if (
self.conf_training.autotune_model
and self.conf_training.enable_grid_search_fine_tuning
):
if self.conf_training.enable_grid_search_fine_tuning:
self.fine_tune(x_train, x_test, y_train, y_test)
print("Finished Grid search fine tuning")

Expand Down Expand Up @@ -431,21 +429,30 @@ def objective(trial):
trial, "test-mlogloss"
)
# copy best params to not overwrite them
tuned_params = self.conf_params_xgboost.params
tuned_params = deepcopy(self.conf_params_xgboost.params)
print(tuned_params)
alpha_space = trial.suggest_float(
"alpha", self.conf_xgboost.alpha_min, self.conf_xgboost.alpha_max
"alpha",
self.conf_params_xgboost.params["alpha"] * 0.9,
self.conf_params_xgboost.params["alpha"] * 1.1,
)
lambda_space = trial.suggest_float(
"lambda", self.conf_xgboost.lambda_min, self.conf_xgboost.lambda_max
"lambda",
self.conf_params_xgboost.params["lambda"] * 0.9,
self.conf_params_xgboost.params["lambda"] * 1.1,
)
eta_space = trial.suggest_float(
"eta", self.conf_xgboost.eta_min, self.conf_xgboost.eta_max
"eta",
self.conf_params_xgboost.params["eta"] * 0.9,
self.conf_params_xgboost.params["eta"] * 1.1,
)

tuned_params["alpha"] = alpha_space
tuned_params["lambda"] = lambda_space
tuned_params["eta"] = eta_space

print(tuned_params)

steps = tuned_params["steps"]
del tuned_params["steps"]

Expand Down Expand Up @@ -520,24 +527,24 @@ def objective(trial):
and isinstance(self.conf_params_xgboost.params["eta"], float)
):
search_space = {
"n_estimators_grid": np.linspace(
"alpha": np.linspace(
self.conf_params_xgboost.params["alpha"]
* 0.9, # TODO: fix design flaw in config and get rid of nested dict
self.conf_params_xgboost.params["alpha"] * 1.1,
5,
dtype=int,
dtype=float,
),
"max_depth_grid": np.linspace(
"lambda": np.linspace(
self.conf_params_xgboost.params["lambda"] * 0.9,
self.conf_params_xgboost.params["lambda"] * 1.1,
5,
dtype=int,
dtype=float,
),
"eta_depth_grid": np.linspace(
"eta": np.linspace(
self.conf_params_xgboost.params["eta"] * 0.9,
self.conf_params_xgboost.params["eta"] * 1.1,
5,
dtype=int,
dtype=float,
),
}
else:
Expand Down
50 changes: 50 additions & 0 deletions bluecast/tests/test_fine_tune.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import pytest

from bluecast.config.training_config import (
TrainingConfig,
XgboostFinalParamConfig,
XgboostTuneParamsConfig,
)
from bluecast.experimentation.tracking import ExperimentTracker
from bluecast.ml_modelling.xgboost import XgboostModel
from bluecast.tests.make_data.create_data import create_synthetic_dataframe


# Create a fixture for the XGBoost model
@pytest.fixture
def xgboost_model():
return XgboostModel(class_problem="binary")


# Test case to check if fine-tuning runs without errors
def test_fine_tune_runs_without_errors(xgboost_model):
xgboost_params = XgboostFinalParamConfig()

xgboost_model.conf_params_xgboost = xgboost_params
xgboost_model.conf_training = TrainingConfig()
xgboost_model.conf_xgboost = XgboostTuneParamsConfig()
print(xgboost_model.conf_params_xgboost.params)
xgboost_model.experiment_tracker = ExperimentTracker()
xgboost_model.conf_training.autotune_model = False

df_train, df_val = create_synthetic_dataframe(
2000, random_state=20
), create_synthetic_dataframe(2000, random_state=200)
df_train = df_train.drop(
["categorical_feature_1", "categorical_feature_2", "datetime_feature"], axis=1
)
df_val = df_val.drop(
["categorical_feature_1", "categorical_feature_2", "datetime_feature"], axis=1
)

x_train = df_train.drop("target", axis=1)
y_train = df_train["target"]
x_test = df_val.drop("target", axis=1)
y_test = df_val["target"]

xgboost_model.fine_tune(x_train, x_test, y_train, y_test)
assert (
(xgboost_model.conf_params_xgboost.params["alpha"] != 0.1)
or (xgboost_model.conf_params_xgboost.params["lambda"] != 0.1)
or (xgboost_model.conf_params_xgboost.params["eta"] != 0.1)
)

0 comments on commit e008817

Please sign in to comment.