From e00881779464be2f44cdea26618e38074b23e9c2 Mon Sep 17 00:00:00 2001 From: ThomasMeissnerDS Date: Mon, 16 Oct 2023 09:42:14 +0200 Subject: [PATCH] Add unit test --- bluecast/ml_modelling/xgboost.py | 35 +++++++++++++--------- bluecast/tests/test_fine_tune.py | 50 ++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 14 deletions(-) create mode 100644 bluecast/tests/test_fine_tune.py diff --git a/bluecast/ml_modelling/xgboost.py b/bluecast/ml_modelling/xgboost.py index 764dec16..5466b84a 100644 --- a/bluecast/ml_modelling/xgboost.py +++ b/bluecast/ml_modelling/xgboost.py @@ -4,6 +4,7 @@ It also calculates class weights for imbalanced datasets. The weights may or may not be used deepending on the hyperparameter tuning. """ +from copy import deepcopy from datetime import datetime from typing import Dict, Literal, Optional, Tuple @@ -97,10 +98,7 @@ def fit( self.autotune(x_train, x_test, y_train, y_test) print("Finished hyperparameter tuning") - if ( - self.conf_training.autotune_model - and self.conf_training.enable_grid_search_fine_tuning - ): + if self.conf_training.enable_grid_search_fine_tuning: self.fine_tune(x_train, x_test, y_train, y_test) print("Finished Grid search fine tuning") @@ -431,21 +429,30 @@ def objective(trial): trial, "test-mlogloss" ) # copy best params to not overwrite them - tuned_params = self.conf_params_xgboost.params + tuned_params = deepcopy(self.conf_params_xgboost.params) + print(tuned_params) alpha_space = trial.suggest_float( - "alpha", self.conf_xgboost.alpha_min, self.conf_xgboost.alpha_max + "alpha", + self.conf_params_xgboost.params["alpha"] * 0.9, + self.conf_params_xgboost.params["alpha"] * 1.1, ) lambda_space = trial.suggest_float( - "lambda", self.conf_xgboost.lambda_min, self.conf_xgboost.lambda_max + "lambda", + self.conf_params_xgboost.params["lambda"] * 0.9, + self.conf_params_xgboost.params["lambda"] * 1.1, ) eta_space = trial.suggest_float( - "eta", self.conf_xgboost.eta_min, self.conf_xgboost.eta_max + "eta", + self.conf_params_xgboost.params["eta"] * 0.9, + self.conf_params_xgboost.params["eta"] * 1.1, ) tuned_params["alpha"] = alpha_space tuned_params["lambda"] = lambda_space tuned_params["eta"] = eta_space + print(tuned_params) + steps = tuned_params["steps"] del tuned_params["steps"] @@ -520,24 +527,24 @@ def objective(trial): and isinstance(self.conf_params_xgboost.params["eta"], float) ): search_space = { - "n_estimators_grid": np.linspace( + "alpha": np.linspace( self.conf_params_xgboost.params["alpha"] * 0.9, # TODO: fix design flaw in config and get rid of nested dict self.conf_params_xgboost.params["alpha"] * 1.1, 5, - dtype=int, + dtype=float, ), - "max_depth_grid": np.linspace( + "lambda": np.linspace( self.conf_params_xgboost.params["lambda"] * 0.9, self.conf_params_xgboost.params["lambda"] * 1.1, 5, - dtype=int, + dtype=float, ), - "eta_depth_grid": np.linspace( + "eta": np.linspace( self.conf_params_xgboost.params["eta"] * 0.9, self.conf_params_xgboost.params["eta"] * 1.1, 5, - dtype=int, + dtype=float, ), } else: diff --git a/bluecast/tests/test_fine_tune.py b/bluecast/tests/test_fine_tune.py new file mode 100644 index 00000000..bd342dbc --- /dev/null +++ b/bluecast/tests/test_fine_tune.py @@ -0,0 +1,50 @@ +import pytest + +from bluecast.config.training_config import ( + TrainingConfig, + XgboostFinalParamConfig, + XgboostTuneParamsConfig, +) +from bluecast.experimentation.tracking import ExperimentTracker +from bluecast.ml_modelling.xgboost import XgboostModel +from bluecast.tests.make_data.create_data import create_synthetic_dataframe + + +# Create a fixture for the XGBoost model +@pytest.fixture +def xgboost_model(): + return XgboostModel(class_problem="binary") + + +# Test case to check if fine-tuning runs without errors +def test_fine_tune_runs_without_errors(xgboost_model): + xgboost_params = XgboostFinalParamConfig() + + xgboost_model.conf_params_xgboost = xgboost_params + xgboost_model.conf_training = TrainingConfig() + xgboost_model.conf_xgboost = XgboostTuneParamsConfig() + print(xgboost_model.conf_params_xgboost.params) + xgboost_model.experiment_tracker = ExperimentTracker() + xgboost_model.conf_training.autotune_model = False + + df_train, df_val = create_synthetic_dataframe( + 2000, random_state=20 + ), create_synthetic_dataframe(2000, random_state=200) + df_train = df_train.drop( + ["categorical_feature_1", "categorical_feature_2", "datetime_feature"], axis=1 + ) + df_val = df_val.drop( + ["categorical_feature_1", "categorical_feature_2", "datetime_feature"], axis=1 + ) + + x_train = df_train.drop("target", axis=1) + y_train = df_train["target"] + x_test = df_val.drop("target", axis=1) + y_test = df_val["target"] + + xgboost_model.fine_tune(x_train, x_test, y_train, y_test) + assert ( + (xgboost_model.conf_params_xgboost.params["alpha"] != 0.1) + or (xgboost_model.conf_params_xgboost.params["lambda"] != 0.1) + or (xgboost_model.conf_params_xgboost.params["eta"] != 0.1) + )