From e00881779464be2f44cdea26618e38074b23e9c2 Mon Sep 17 00:00:00 2001
From: ThomasMeissnerDS <meissnercorporation@gmx.de>
Date: Mon, 16 Oct 2023 09:42:14 +0200
Subject: [PATCH] Add unit test

---
 bluecast/ml_modelling/xgboost.py | 35 +++++++++++++---------
 bluecast/tests/test_fine_tune.py | 50 ++++++++++++++++++++++++++++++++
 2 files changed, 71 insertions(+), 14 deletions(-)
 create mode 100644 bluecast/tests/test_fine_tune.py

diff --git a/bluecast/ml_modelling/xgboost.py b/bluecast/ml_modelling/xgboost.py
index 764dec16..5466b84a 100644
--- a/bluecast/ml_modelling/xgboost.py
+++ b/bluecast/ml_modelling/xgboost.py
@@ -4,6 +4,7 @@
 It also calculates class weights for imbalanced datasets. The weights may or may not be used deepending on the
 hyperparameter tuning.
 """
+from copy import deepcopy
 from datetime import datetime
 from typing import Dict, Literal, Optional, Tuple
 
@@ -97,10 +98,7 @@ def fit(
             self.autotune(x_train, x_test, y_train, y_test)
             print("Finished hyperparameter tuning")
 
-        if (
-            self.conf_training.autotune_model
-            and self.conf_training.enable_grid_search_fine_tuning
-        ):
+        if self.conf_training.enable_grid_search_fine_tuning:
             self.fine_tune(x_train, x_test, y_train, y_test)
             print("Finished Grid search fine tuning")
 
@@ -431,21 +429,30 @@ def objective(trial):
                 trial, "test-mlogloss"
             )
             # copy best params to not overwrite them
-            tuned_params = self.conf_params_xgboost.params
+            tuned_params = deepcopy(self.conf_params_xgboost.params)
+            print(tuned_params)
             alpha_space = trial.suggest_float(
-                "alpha", self.conf_xgboost.alpha_min, self.conf_xgboost.alpha_max
+                "alpha",
+                self.conf_params_xgboost.params["alpha"] * 0.9,
+                self.conf_params_xgboost.params["alpha"] * 1.1,
             )
             lambda_space = trial.suggest_float(
-                "lambda", self.conf_xgboost.lambda_min, self.conf_xgboost.lambda_max
+                "lambda",
+                self.conf_params_xgboost.params["lambda"] * 0.9,
+                self.conf_params_xgboost.params["lambda"] * 1.1,
             )
             eta_space = trial.suggest_float(
-                "eta", self.conf_xgboost.eta_min, self.conf_xgboost.eta_max
+                "eta",
+                self.conf_params_xgboost.params["eta"] * 0.9,
+                self.conf_params_xgboost.params["eta"] * 1.1,
             )
 
             tuned_params["alpha"] = alpha_space
             tuned_params["lambda"] = lambda_space
             tuned_params["eta"] = eta_space
 
+            print(tuned_params)
+
             steps = tuned_params["steps"]
             del tuned_params["steps"]
 
@@ -520,24 +527,24 @@ def objective(trial):
             and isinstance(self.conf_params_xgboost.params["eta"], float)
         ):
             search_space = {
-                "n_estimators_grid": np.linspace(
+                "alpha": np.linspace(
                     self.conf_params_xgboost.params["alpha"]
                     * 0.9,  # TODO: fix design flaw in config and get rid of nested dict
                     self.conf_params_xgboost.params["alpha"] * 1.1,
                     5,
-                    dtype=int,
+                    dtype=float,
                 ),
-                "max_depth_grid": np.linspace(
+                "lambda": np.linspace(
                     self.conf_params_xgboost.params["lambda"] * 0.9,
                     self.conf_params_xgboost.params["lambda"] * 1.1,
                     5,
-                    dtype=int,
+                    dtype=float,
                 ),
-                "eta_depth_grid": np.linspace(
+                "eta": np.linspace(
                     self.conf_params_xgboost.params["eta"] * 0.9,
                     self.conf_params_xgboost.params["eta"] * 1.1,
                     5,
-                    dtype=int,
+                    dtype=float,
                 ),
             }
         else:
diff --git a/bluecast/tests/test_fine_tune.py b/bluecast/tests/test_fine_tune.py
new file mode 100644
index 00000000..bd342dbc
--- /dev/null
+++ b/bluecast/tests/test_fine_tune.py
@@ -0,0 +1,50 @@
+import pytest
+
+from bluecast.config.training_config import (
+    TrainingConfig,
+    XgboostFinalParamConfig,
+    XgboostTuneParamsConfig,
+)
+from bluecast.experimentation.tracking import ExperimentTracker
+from bluecast.ml_modelling.xgboost import XgboostModel
+from bluecast.tests.make_data.create_data import create_synthetic_dataframe
+
+
+# Create a fixture for the XGBoost model
+@pytest.fixture
+def xgboost_model():
+    return XgboostModel(class_problem="binary")
+
+
+# Test case to check if fine-tuning runs without errors
+def test_fine_tune_runs_without_errors(xgboost_model):
+    xgboost_params = XgboostFinalParamConfig()
+
+    xgboost_model.conf_params_xgboost = xgboost_params
+    xgboost_model.conf_training = TrainingConfig()
+    xgboost_model.conf_xgboost = XgboostTuneParamsConfig()
+    print(xgboost_model.conf_params_xgboost.params)
+    xgboost_model.experiment_tracker = ExperimentTracker()
+    xgboost_model.conf_training.autotune_model = False
+
+    df_train, df_val = create_synthetic_dataframe(
+        2000, random_state=20
+    ), create_synthetic_dataframe(2000, random_state=200)
+    df_train = df_train.drop(
+        ["categorical_feature_1", "categorical_feature_2", "datetime_feature"], axis=1
+    )
+    df_val = df_val.drop(
+        ["categorical_feature_1", "categorical_feature_2", "datetime_feature"], axis=1
+    )
+
+    x_train = df_train.drop("target", axis=1)
+    y_train = df_train["target"]
+    x_test = df_val.drop("target", axis=1)
+    y_test = df_val["target"]
+
+    xgboost_model.fine_tune(x_train, x_test, y_train, y_test)
+    assert (
+        (xgboost_model.conf_params_xgboost.params["alpha"] != 0.1)
+        or (xgboost_model.conf_params_xgboost.params["lambda"] != 0.1)
+        or (xgboost_model.conf_params_xgboost.params["eta"] != 0.1)
+    )