ThomasMeissnerDS · thomasmeissnercrm · Oct 15, 2023 · Oct 8, 2023 · Oct 8, 2023 · Oct 13, 2023
diff --git a/README.md b/README.md
@@ -13,9 +13,7 @@
 [![python](https://img.shields.io/badge/Python-3.10-3776AB.svg?style=flat&logo=python&logoColor=white)](https://www.python.org)
 [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg?style=flat-square)](http://makeapullrequest.com)
 
-A lightweight and fast auto-ml library. This is the successor of the
-e2eml automl library. While e2eml tried to cover many model
-architectures and a lot of different preprocessing options,
+A lightweight and fast auto-ml library.
 BlueCast focuses on a few model architectures (on default Xgboost
 only) and a few preprocessing options (only what is
 needed for Xgboost). This allows for a much faster development
@@ -40,9 +38,11 @@ the full documentation [here](https://bluecast.readthedocs.io/en/latest/).
     * [Custom preprocessing](#custom-preprocessing)
     * [Custom feature selection](#custom-feature-selection)
     * [Custom ML model](#custom-ml-model)
+    * [Using the inbuilt ExperientTracker](#using-the-inbuilt-experienttracker)
 * [Convenience features](#convenience-features)
 * [Code quality](#code-quality)
 * [Documentation](#documentation)
+* [Kaggle competition results and example notebooks](#kaggle-competition-results-and-example-notebooks)
 * [How to contribute](#how-to-contribute)
 * [Meta](#meta)
 
@@ -90,6 +90,18 @@ automl.fit(df_train, target_col="target")
 y_probs, y_classes = automl.predict(df_val)
 ```
 
+BlueCast has simple utilities to save and load your pipeline:
+
+```sh
+from bluecast.general_utils.general_utils import save_to_production, load_for_production
+
+# save pipeline including tracker
+save_to_production(automl, "/kaggle/working/", "bluecast_cv_pipeline")
+
+# in production or for further experiments this can be loaded again
+automl = load_for_production("/kaggle/working/", "bluecast_cv_pipeline")
+```
+
 ### Advanced usage
 
 #### Explanatory analysis
@@ -152,7 +164,7 @@ train-test-split, cross-validation can be enabled easily:
 
 ```sh
 from bluecast.blueprints.cast import BlueCast
-from bluecast.config.training_config import TrainingConfig, XgboostTuneParamsConfig
+from bluecast.config.training_config import TrainingConfig
 
 
 # Create a custom training config and adjust general training parameters
@@ -234,7 +246,6 @@ automl = BlueCast(
         target_column="target"
         conf_training=train_config,
         conf_xgboost=xgboost_param_config,
-
     )
 
 automl.fit(df_train, target_col="target")
@@ -392,7 +403,6 @@ y_probs, y_classes = automl.predict(df_val)
 Also this step can be customized. The following example shows how to:
 
 ```sh
-from bluecast.config.training_config import FeatureSelectionConfig
 from bluecast.config.training_config import TrainingConfig
 from bluecast.preprocessing.custom import CustomPreprocessing
 from sklearn.feature_selection import RFECV
@@ -486,6 +496,8 @@ class CustomModel(BaseClassMlModel):
     ) -> None:
         self.model = LogisticRegression()
         self.model.fit(x_train, y_train)
+        # if you wih to track experiments using an own ExperimentTracker add it here
+        # or in the fit method itself
 
     def predict(self, df: pd.DataFrame) -> Tuple[PredictedProbas, PredictedClasses]:
         predicted_probas = self.model.predict_proba(df)
@@ -517,6 +529,53 @@ predicted_probas, predicted_classes = bluecast.predict(x_test)
 
 Please note that custom ML models require user defined hyperparameter tuning. Pre-defined
 configurations are not available for custom models.
+Also note that the calculation of SHAP values only works with tree based models by
+default. For other model architectures disable SHAP values in the TrainingConfig
+via:
+
+`train_config.calculate_shap_values = True`
+
+Just instantiate a new instance of the TrainingConfig, update the param as above
+and pass the config as an argument to the BlueCast instance during instantiation.
+Feature importance can be added in the custom model definition.
+
+#### Using the inbuilt ExperientTracker
+
+For experimentation environments it can be useful to store all variables
+and results from model runs.
+BlueCast has an inbuilt experiment tracker to enhance the provided insights.
+No setup is required. BlueCast will automatically store all necessary data
+after each hyperparameter tuning trial.
+
+```sh
+# instantiate and train BlueCast
+from bluecast.blueprints.cast import BlueCast
+
+automl = BlueCast(
+        class_problem="binary",
+        target_column="target"
+    )
+
+automl.fit_eval(df_train, df_eval, y_eval, target_col="target")
+
+# access the experiment tracker
+tracker = automl.experiment_tracker
+
+# see all stored information as a Pandas DataFrame
+tracker_df = tracker.retrieve_results_as_df()
+```
+
+Now from here you could even feed selected columns back into a BlueCast
+instance and try to predict the eval_score to check the get the feature
+importance of your experiment data! Maybe you uncover hidden patterns
+for your model training.
+
+Please note that the number of stored experiments will probably be lower
+than the number of started hyperparameter tuning trials. The experiment tracker
+is skipped whenever Optuna prunes a trial.
+The experiment triggers whenever the `fit` or `fit_eval` methods of a BlueCast
+class instance are called (also within BlueCastCV). This means for custom
+models the tracker will not trigger automatically and has to be added manually.
 
 ## Convenience features
 
@@ -567,6 +626,17 @@ For new features it is expected that unit tests are added.
 
 Documentation is provided via [Read the Docs](https://bluecast.readthedocs.io/en/latest/)
 
+## Kaggle competition results and example notebooks
+
+Even though BlueCast has been designed to be a lightweight
+automl framework, it still offers the possibilities to
+reach very good performance. We tested BlueCast in Kaggle
+competitions to show case the libraries capabilities
+feature- and performance-wise.
+
+* ICR top 20% finish with over 6000 participants ([notebook](https://www.kaggle.com/code/thomasmeiner/icr-bluecast-automl-almost-bronze-ranks))
+* An advanced example covering lots of functionalities ([notebook](https://www.kaggle.com/code/thomasmeiner/ps3e23-automl-eda-outlier-detection/notebook))
+
 ## How to contribute
 
 Contributions are welcome. Please follow the following steps:

diff --git a/bluecast/blueprints/cast.py b/bluecast/blueprints/cast.py
@@ -20,6 +20,7 @@
 )
 from bluecast.evaluation.eval_metrics import eval_classifier
 from bluecast.evaluation.shap_values import shap_explanations
+from bluecast.experimentation.tracking import ExperimentTracker
 from bluecast.general_utils.general_utils import check_gpu_support, logger
 from bluecast.ml_modelling.xgboost import XgboostModel
 from bluecast.preprocessing.custom import CustomPreprocessing
@@ -44,17 +45,19 @@ class BlueCast:
     :param :class_problem: Takes a string containing the class problem type. Either "binary" or "multiclass".
     :param :target_column: Takes a string containing the name of the target column.
     :param :cat_columns: Takes a list of strings containing the names of the categorical columns. If not provided,
-    BlueCast will infer these automatically.
+        BlueCast will infer these automatically.
     :param :date_columns: Takes a list of strings containing the names of the date columns. If not provided,
-    BlueCast will infer these automatically.
+        BlueCast will infer these automatically.
     :param :time_split_column: Takes a string containing the name of the time split column. If not provided,
-    BlueCast will not split the data by time or order, but do a random split instead.
+        BlueCast will not split the data by time or order, but do a random split instead.
     :param :ml_model: Takes an instance of a XgboostModel class. If not provided, BlueCast will instantiate one.
-    This is an API to pass any model class. Inherit the baseclass from ml_modelling.base_model.BaseModel.
+        This is an API to pass any model class. Inherit the baseclass from ml_modelling.base_model.BaseModel.
     :param custom_preprocessor: Takes an instance of a CustomPreprocessing class. Allows users to inject custom
-    preprocessing steps which take place right after the train test spit.
+        preprocessing steps which take place right after the train test spit.
     :param custom_last_mile_computation: Takes an instance of a CustomPreprocessing class. Allows users to inject custom
-    preprocessing steps which take place right before the model training.
+        preprocessing steps which take place right before the model training.
+    :param experiment_tracker: Takes an instance of an ExperimentTracker class. If not provided this will be initialized
+        automatically.
     """
 
     def __init__(
@@ -73,6 +76,7 @@ def __init__(
         conf_training: Optional[TrainingConfig] = None,
         conf_xgboost: Optional[XgboostTuneParamsConfig] = None,
         conf_params_xgboost: Optional[XgboostFinalParamConfig] = None,
+        experiment_tracker: Optional[ExperimentTracker] = None,
     ):
         self.class_problem = class_problem
         self.prediction_mode: bool = False
@@ -96,6 +100,11 @@ def __init__(
         self.shap_values: Optional[np.ndarray] = None
         self.eval_metrics: Optional[Dict[str, Any]] = None
 
+        if experiment_tracker:
+            self.experiment_tracker = experiment_tracker
+        else:
+            self.experiment_tracker = ExperimentTracker()
+
     def initial_checks(self, df: pd.DataFrame) -> None:
         if not self.conf_training:
             self.conf_training = TrainingConfig()
@@ -273,6 +282,7 @@ def fit(self, df: pd.DataFrame, target_col: str) -> None:
                 conf_training=self.conf_training,
                 conf_xgboost=self.conf_xgboost,
                 conf_params_xgboost=self.conf_params_xgboost,
+                experiment_tracker=self.experiment_tracker,
             )
         self.ml_model.fit(x_train, x_test, y_train, y_test)
         if self.conf_training and self.conf_training.calculate_shap_values:

diff --git a/bluecast/blueprints/cast_cv.py b/bluecast/blueprints/cast_cv.py
@@ -9,15 +9,18 @@
     XgboostFinalParamConfig,
     XgboostTuneParamsConfig,
 )
+from bluecast.experimentation.tracking import ExperimentTracker
 from bluecast.ml_modelling.xgboost import XgboostModel
 from bluecast.preprocessing.custom import CustomPreprocessing
 from bluecast.preprocessing.feature_selection import RFECVSelector
 
 
 class BlueCastCV:
-    """Wrapper to train and predict multiple blueCast intsances.
+    """Wrapper to train and predict multiple blueCast intstances.
 
-    A custom splitter can be provided."""
+    Check the BlueCast class documentation for additional parameter details.
+    A custom splitter can be provided.
+    """
 
     def __init__(
         self,
@@ -26,6 +29,7 @@ def __init__(
         conf_training: Optional[TrainingConfig] = None,
         conf_xgboost: Optional[XgboostTuneParamsConfig] = None,
         conf_params_xgboost: Optional[XgboostFinalParamConfig] = None,
+        experiment_tracker: Optional[ExperimentTracker] = None,
         custom_last_mile_computation: Optional[CustomPreprocessing] = None,
         custom_preprocessor: Optional[CustomPreprocessing] = None,
         custom_feature_selector: Optional[
@@ -44,6 +48,11 @@ def __init__(
         self.stratifier = stratifier
         self.ml_model = ml_model
 
+        if experiment_tracker:
+            self.experiment_tracker = experiment_tracker
+        else:
+            self.experiment_tracker = ExperimentTracker()
+
     def prepare_data(
         self, df: pd.DataFrame, target: str
     ) -> Tuple[pd.DataFrame, pd.Series]:
@@ -86,6 +95,7 @@ def fit(self, df: pd.DataFrame, target_col: str) -> None:
                 conf_training=self.conf_training,
                 conf_xgboost=self.conf_xgboost,
                 conf_params_xgboost=self.conf_params_xgboost,
+                experiment_tracker=self.experiment_tracker,
                 custom_preprocessor=self.custom_preprocessor,
                 custom_feature_selector=self.custom_feature_selector,
                 custom_last_mile_computation=self.custom_last_mile_computation,
@@ -94,6 +104,9 @@ def fit(self, df: pd.DataFrame, target_col: str) -> None:
             automl.fit(X_train, target_col=target_col)
             self.bluecast_models.append(automl)
 
+            # overwrite experiment tracker to pass it into next iteration
+            self.experiment_tracker = automl.experiment_tracker
+
     def fit_eval(self, df: pd.DataFrame, target_col: str) -> None:
         """Fit multiple BlueCast instances on different data splits.
 
@@ -125,6 +138,7 @@ def fit_eval(self, df: pd.DataFrame, target_col: str) -> None:
                 conf_training=self.conf_training,
                 conf_xgboost=self.conf_xgboost,
                 conf_params_xgboost=self.conf_params_xgboost,
+                experiment_tracker=self.experiment_tracker,
                 custom_preprocessor=self.custom_preprocessor,
                 custom_feature_selector=self.custom_feature_selector,
                 custom_last_mile_computation=self.custom_last_mile_computation,
@@ -133,6 +147,9 @@ def fit_eval(self, df: pd.DataFrame, target_col: str) -> None:
             automl.fit_eval(X_train, X_val, y_val, target_col=target_col)
             self.bluecast_models.append(automl)
 
+            # overwrite experiment tracker to pass it into next iteration
+            self.experiment_tracker = automl.experiment_tracker
+
     def predict(
         self, df: pd.DataFrame, return_sub_models_preds: bool = False
     ) -> Tuple[Union[pd.DataFrame, pd.Series], Union[pd.DataFrame, pd.Series]]:

diff --git a/bluecast/config/base_classes.py b/bluecast/config/base_classes.py
@@ -0,0 +1,38 @@
+from abc import ABC, abstractmethod
+from typing import Dict, Literal, Union
+
+import pandas as pd
+
+from bluecast.config.training_config import TrainingConfig
+
+
+class BaseClassExperimentTracker(ABC):
+    """Base class for the experiment tracker.
+
+    Enforces the implementation of the add_results and retrieve_results_as_df methods.
+    """
+
+    @abstractmethod
+    def add_results(
+        self,
+        experiment_id: Union[int, str, float],
+        score_category: Literal["simple_train_test_score", "cv_score", "oof_score"],
+        training_config: TrainingConfig,
+        model_parameters: Dict[
+            Union[str, int, float, None], Union[str, int, float, None]
+        ],
+        eval_scores: Union[float, int, None],
+        metric_used: str,
+        metric_higher_is_better: bool,
+    ) -> None:
+        """
+        Add results to the ExperimentTracker class.
+        """
+        pass
+
+    @abstractmethod
+    def retrieve_results_as_df(self) -> pd.DataFrame:
+        """
+        Retrieve results from the ExperimentTracker class
+        """
+        pass
diff --git a/bluecast/config/training_config.py b/bluecast/config/training_config.py
@@ -7,15 +7,15 @@
 """
 from typing import Dict, Optional
 
+from pydantic import BaseModel
 from pydantic.dataclasses import dataclass
 
 
 class Config:
     arbitrary_types_allowed = True
 
 
-@dataclass
-class TrainingConfig:
+class TrainingConfig(BaseModel):
     """Define general training parameters.
 
     :param global_random_state: Global random state to use for reproducibility.
@@ -40,6 +40,7 @@ class TrainingConfig:
         categorical encoding is done via a ML algorithm. If False, the categorical encoding is done via a  target
         encoding in the preprocessing steps. See the ReadMe for more details.
     :param show_detailed_tuning_logs: Whether to show detailed tuning logs. Not used when custom ML model is passed.
+    :param experiment_name: Name of the experiment. Will be logged inside the ExperimentTracker.
     """
 
     global_random_state: int = 10
@@ -58,10 +59,10 @@ class TrainingConfig:
     cat_encoding_via_ml_algorithm: bool = False
     show_detailed_tuning_logs: bool = False
     optuna_sampler_n_startup_trials: int = 10
+    experiment_name: str = "new experiment"
 
 
-@dataclass
-class XgboostTuneParamsConfig:
+class XgboostTuneParamsConfig(BaseModel):
     """Define hyperparameter tuning search space."""
 
     max_depth_min: int = 2

diff --git a/bluecast/experimentation/__init__.py b/bluecast/experimentation/__init__.py