Update grid search to only overwrite params if the cv score or simple…

… score was better
ThomasMeissnerDS · Oct 16, 2023 · 29bb9db · 29bb9db
1 parent 16c4789
commit 29bb9db
Show file tree

Hide file tree

Showing 3 changed files with 109 additions and 13 deletions.
diff --git a/bluecast/experimentation/tracking.py b/bluecast/experimentation/tracking.py
@@ -93,3 +93,23 @@ def retrieve_results_as_df(self) -> pd.DataFrame:
             model_parameters_df, how="left", left_index=True, right_index=True
         )
         return results_df
+
+    def get_best_score(self) -> Union[int, float]:
+        """Expects results in the tracker"""
+
+        results_df = pd.DataFrame(
+            {
+                "experiment_id": self.experiment_id,
+                "score_category": self.score_category,
+                "eval_scores": self.eval_scores,
+                "metric_used": self.metric_used,
+                "metric_higher_is_better": self.metric_higher_is_better,
+            }
+        )
+        if results_df.empty:
+            raise ValueError("No results have been found in experiment tracker")
+
+        if self.metric_higher_is_better:
+            return results_df["eval_scores"].max()
+        else:
+            return results_df["eval_scores"].min()
diff --git a/bluecast/ml_modelling/xgboost.py b/bluecast/ml_modelling/xgboost.py
@@ -102,7 +102,7 @@ def fit(
             self.fine_tune(x_train, x_test, y_train, y_test)
             print("Finished Grid search fine tuning")
 
-        print("Start final model training")
+        logger("Start final model training")
         if self.conf_training.use_full_data_for_final_model:
             logger(
                 f"""{datetime.utcnow()}: Union train and test data for final model training based on TrainingConfig
@@ -154,7 +154,7 @@ def fit(
                 evals=eval_set,
                 verbose_eval=self.conf_xgboost.model_verbosity_during_final_training,
             )
-        print("Finished training")
+        logger("Finished training")
         return self.model
 
     def autotune(
@@ -366,7 +366,6 @@ def objective(trial):
             "objective": self.conf_xgboost.model_objective,  # OR  'binary:logistic' #the loss function being used
             "booster": self.conf_xgboost.booster,
             "eval_metric": self.conf_xgboost.model_eval_metric,
-            "verbose": self.conf_xgboost.model_verbosity,
             "tree_method": train_on,  # use GPU for training
             "num_class": y_train.nunique(),
             "max_depth": xgboost_best_param[
@@ -382,7 +381,7 @@ def objective(trial):
             "eta": xgboost_best_param["eta"],
             "steps": xgboost_best_param["steps"],
         }
-        print("Best params: ", self.conf_params_xgboost.params)
+        logger(f"Best params: {self.conf_params_xgboost.params}")
         self.conf_params_xgboost.sample_weight = xgboost_best_param["sample_weight"]
 
     def fine_tune(
@@ -430,7 +429,6 @@ def objective(trial):
             )
             # copy best params to not overwrite them
             tuned_params = deepcopy(self.conf_params_xgboost.params)
-            print(tuned_params)
             alpha_space = trial.suggest_float(
                 "alpha",
                 self.conf_params_xgboost.params["alpha"] * 0.9,
@@ -451,8 +449,6 @@ def objective(trial):
             tuned_params["lambda"] = lambda_space
             tuned_params["eta"] = eta_space
 
-            print(tuned_params)
-
             steps = tuned_params["steps"]
             del tuned_params["steps"]
 
@@ -550,6 +546,11 @@ def objective(trial):
         else:
             ValueError("Some parameters are not floats or strings")
 
+        if self.conf_training.autotune_model:
+            best_score_cv = self.experiment_tracker.get_best_score()
+        else:
+            best_score_cv = np.inf
+
         study = optuna.create_study(
             direction="minimize", sampler=optuna.samplers.GridSampler(search_space)
         )
@@ -569,11 +570,24 @@ def objective(trial):
         except (ZeroDivisionError, RuntimeError, ValueError):
             pass
 
-        xgboost_grid_best_param = study.best_trial.params
-        self.conf_params_xgboost.params["alpha"] = xgboost_grid_best_param["alpha"]
-        self.conf_params_xgboost.params["lambda"] = xgboost_grid_best_param["lambda"]
-        self.conf_params_xgboost.params["eta"] = xgboost_grid_best_param["eta"]
-        print("Best params: ", self.conf_params_xgboost.params)
+        if self.conf_training.autotune_model:
+            best_score_cv_grid = self.experiment_tracker.get_best_score()
+        else:
+            best_score_cv_grid = np.inf
+
+        if best_score_cv_grid < best_score_cv or not self.conf_training.autotune_model:
+            xgboost_grid_best_param = study.best_trial.params
+            self.conf_params_xgboost.params["alpha"] = xgboost_grid_best_param["alpha"]
+            self.conf_params_xgboost.params["lambda"] = xgboost_grid_best_param[
+                "lambda"
+            ]
+            self.conf_params_xgboost.params["eta"] = xgboost_grid_best_param["eta"]
+            logger(
+                f"Grid search improved eval metric from {best_score_cv} to {best_score_cv_grid}."
+            )
+            logger(f"Best params: {self.conf_params_xgboost.params}")
+        else:
+            logger(f"Grid search could not improve eval metric of {best_score_cv}.")
 
     def predict(self, df: pd.DataFrame) -> Tuple[np.ndarray, np.ndarray]:
         """Predict on unseen data."""
@@ -603,5 +617,5 @@ def predict(self, df: pd.DataFrame) -> Tuple[np.ndarray, np.ndarray]:
         else:
             predicted_probs = partial_probs
             predicted_classes = np.asarray([np.argmax(line) for line in partial_probs])
-        print("Finished predicting")
+        logger("Finished predicting")
         return predicted_probs, predicted_classes
diff --git a/bluecast/tests/test_experiment_tracker.py b/bluecast/tests/test_experiment_tracker.py
@@ -74,3 +74,65 @@ def test_retrieve_results_as_df(experiment_tracker):
     assert results_df["eval_scores"].tolist() == [0.95]
     assert results_df["metric_used"].tolist() == ["accuracy"]
     assert results_df["metric_higher_is_better"].tolist() == [True]
+
+
+def test_get_best_score_empty(experiment_tracker):
+    # Ensure it raises an exception when no results have been added
+    with pytest.raises(
+        ValueError, match="No results have been found in experiment tracker"
+    ):
+        experiment_tracker.get_best_score()
+
+
+def test_get_best_score_higher_is_better(experiment_tracker):
+    # Add some sample data with a higher-is-better metric
+    experiment_id = 1
+    score_category = "cv_score"
+    training_config = (
+        TrainingConfig()
+    )  # You may need to create a valid TrainingConfig instance
+    model_parameters = {"param1": 1, "param2": "abc"}
+    eval_scores = 0.95
+    metric_used = "accuracy"
+    metric_higher_is_better = True
+
+    experiment_tracker.add_results(
+        experiment_id,
+        score_category,
+        training_config,
+        model_parameters,
+        eval_scores,
+        metric_used,
+        metric_higher_is_better,
+    )
+
+    # Ensure the best score is correctly computed
+    best_score = experiment_tracker.get_best_score()
+    assert best_score == 0.95
+
+
+def test_get_best_score_lower_is_better(experiment_tracker):
+    # Add some sample data with a lower-is-better metric
+    experiment_id = 1
+    score_category = "cv_score"
+    training_config = (
+        TrainingConfig()
+    )  # You may need to create a valid TrainingConfig instance
+    model_parameters = {"param1": 1, "param2": "abc"}
+    eval_scores = 0.95
+    metric_used = "loss"  # Assuming "loss" is a metric that is lower-is-better
+    metric_higher_is_better = False
+
+    experiment_tracker.add_results(
+        experiment_id,
+        score_category,
+        training_config,
+        model_parameters,
+        eval_scores,
+        metric_used,
+        metric_higher_is_better,
+    )
+
+    # Ensure the best score is correctly computed
+    best_score = experiment_tracker.get_best_score()
+    assert best_score == 0.95