From 26c88ff214287b3e09089875915dc702210c489a Mon Sep 17 00:00:00 2001
From: Julian Keupp <julian.keupp@boehringer-ingelheim.com>
Date: Wed, 4 Dec 2024 15:41:52 +0100
Subject: [PATCH 1/7] add group kfold option in cross_validate of any
 traainable surrogate

---
 bofire/surrogates/trainable.py | 33 ++++++++++++++++++++++++++++++---
 1 file changed, 30 insertions(+), 3 deletions(-)

diff --git a/bofire/surrogates/trainable.py b/bofire/surrogates/trainable.py
index 24f261bb4..15c6811fc 100644
--- a/bofire/surrogates/trainable.py
+++ b/bofire/surrogates/trainable.py
@@ -3,7 +3,7 @@
 from typing import Any, Callable, Dict, List, Optional, Tuple
 
 import pandas as pd
-from sklearn.model_selection import KFold, StratifiedKFold
+from sklearn.model_selection import KFold, StratifiedKFold, GroupKFold
 
 from bofire.data_models.enum import OutputFilteringEnum
 from bofire.data_models.features.api import (
@@ -60,6 +60,7 @@ def cross_validate(
         include_labcodes: bool = False,
         random_state: Optional[int] = None,
         stratified_feature: Optional[str] = None,
+        group_split_column: Optional[str] = None,
         hooks: Optional[
             Dict[
                 str,
@@ -88,6 +89,7 @@ def cross_validate(
                 Defaults to None.
             stratified_feature (str, optional): The feature name to preserve the percentage of samples for each class in
                 the stratified folds. Defaults to None.
+            group_split_column (str, optional): The column name of the group id. Defaults to None.
             hooks (Dict[str, Callable[[Model, pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame], Any]], optional):
                 Dictionary of callable hooks that are called within the CV loop. The callable retrieves the current trained
                 modeld and the current CV folds in the following order: X_train, y_train, X_test, y_test. Defaults to {}.
@@ -125,6 +127,23 @@ def cross_validate(
                 raise ValueError(
                     "The feature to be stratified needs to be a DiscreteInput, CategoricalInput, CategoricalOutput, or ContinuousOutput",
                 )
+            
+        if group_split_column is not None:
+            # check if the group split column is present in the experiments
+            if group_split_column not in experiments.columns:
+                raise ValueError(
+                    f"Group split column {group_split_column} is not present in the experiments."
+                )
+            ngroups = len(experiments[group_split_column].unique())
+            if ngroups == 0:
+                raise ValueError(
+                    f"Number of unique groups {len(experiments[group_split_column].unique())} is zero."
+                )
+            # check if the number of unique groups is greater than or equal to the number of folds
+            if ngroups < folds:
+                raise ValueError(
+                    f"Number of unique groups {len(experiments[group_split_column].unique())} is less than the number of folds {folds}."
+                )
 
         # first filter the experiments based on the model setting
         experiments = self._preprocess_experiments(experiments)
@@ -149,8 +168,16 @@ def cross_validate(
 
         # instantiate kfold object
         if stratified_feature is None:
-            cv = KFold(n_splits=folds, shuffle=True, random_state=random_state)
-            cv_func = cv.split(experiments)
+            if group_split_column is not None:
+                # GROUP SPLIT FUNCTIONALITY
+                cv = GroupKFold(n_folds = folds, shuffle=True, random_state=random_state)
+                cv_func = cv.split(
+                    experiments,
+                    groups=experiments[group_split_column]
+                )
+            else:
+                cv = KFold(n_splits=folds, shuffle=True, random_state=random_state)
+                cv_func = cv.split(experiments)
         else:
             cv = StratifiedKFold(
                 n_splits=folds,

From 2d82843b379a4f8ad5e730ec819023335efe0530 Mon Sep 17 00:00:00 2001
From: Jim Boelrijk Valcon <jimboelrijk1@MacBook-Pro-van-Jim.local>
Date: Wed, 4 Dec 2024 17:23:37 +0100
Subject: [PATCH 2/7] changed to GroupShuffleSplit, added test case

---
 bofire/surrogates/trainable.py                |  4 +-
 .../bofire/surrogates/test_cross_validate.py  | 80 +++++++++++++++++++
 2 files changed, 82 insertions(+), 2 deletions(-)

diff --git a/bofire/surrogates/trainable.py b/bofire/surrogates/trainable.py
index 15c6811fc..ea2f4a105 100644
--- a/bofire/surrogates/trainable.py
+++ b/bofire/surrogates/trainable.py
@@ -3,7 +3,7 @@
 from typing import Any, Callable, Dict, List, Optional, Tuple
 
 import pandas as pd
-from sklearn.model_selection import KFold, StratifiedKFold, GroupKFold
+from sklearn.model_selection import KFold, StratifiedKFold, GroupShuffleSplit
 
 from bofire.data_models.enum import OutputFilteringEnum
 from bofire.data_models.features.api import (
@@ -170,7 +170,7 @@ def cross_validate(
         if stratified_feature is None:
             if group_split_column is not None:
                 # GROUP SPLIT FUNCTIONALITY
-                cv = GroupKFold(n_folds = folds, shuffle=True, random_state=random_state)
+                cv = GroupShuffleSplit(n_splits=folds, random_state=random_state)
                 cv_func = cv.split(
                     experiments,
                     groups=experiments[group_split_column]
diff --git a/tests/bofire/surrogates/test_cross_validate.py b/tests/bofire/surrogates/test_cross_validate.py
index 7be592273..5657f5056 100644
--- a/tests/bofire/surrogates/test_cross_validate.py
+++ b/tests/bofire/surrogates/test_cross_validate.py
@@ -469,3 +469,83 @@ def test_model_cross_validate_stratified_invalid_feature_type(key):
         match="The feature to be stratified needs to be a DiscreteInput, CategoricalInput, CategoricalOutput, or ContinuousOutput",
     ):
         model.cross_validate(experiments, folds=5, stratified_feature=key)
+
+@pytest.mark.parametrize("random_state", [1, 2])
+def test_model_cross_validate_groupfold(random_state):
+    # TODO: perhaps look into more efficient way to test this
+    inputs = Inputs(
+        features=[
+            ContinuousInput(
+                key=f"x_{i+1}",
+                bounds=(-4, 4),
+            )
+            for i in range(2)
+        ]
+        + [
+            CategoricalInput(key="cat_x_3", categories=["category1", "category2"]),
+            CategoricalDescriptorInput(
+                key="cat_x_4",
+                categories=["a", "b", "c"],
+                descriptors=["alpha"],
+                values=[[1], [2], [3]],
+            ),
+        ],
+    )
+    outputs = Outputs(features=[ContinuousOutput(key="y")])
+    experiments = pd.DataFrame(
+        [
+            [-4, -4, "category1", "a", 1, 0],
+            [-3, -3, "category1", "a", 1, 0],
+            [-2, -2, "category1", "a", 1, 0],
+            [-1, -1, "category1", "b", 1, 0],
+            [0, 0, "category1", "b", 1, 1],
+            [1, 1, "category1", "b", 1, 1],
+            [2, 2, "category1", "c", 1, 1],
+            [3, 3, "category1", "c", 1, 1],
+            [2, 3, "category1", "c", 1, 1],
+            [3, 1, "category1", "a", 1, 2],
+            [3, 4, "category1", "a", 0, 2],
+            [4, 4, "category2", "b", 0, 2],
+            [1, 4, "category2", "b", 0, 2],
+            [1, 0, "category2", "c", 0, 2],
+            [1, 2, "category2", "c", 0, 3],
+            [2, 4, "category2", "a", 1, 3],
+        ],
+        columns=["x_1", "x_2", "cat_x_3", "cat_x_4", "y", "group"],
+    )
+    experiments["valid_y"] = 1
+
+    cat0_indexes = experiments[experiments["group"] == 0].index
+    cat1_indexes = experiments[experiments["group"] == 1].index
+    cat2_indexes = experiments[experiments["group"] == 2].index
+    cat3_indexes = experiments[experiments["group"] == 3].index
+
+    all_indices = [cat0_indexes, cat1_indexes, cat2_indexes, cat3_indexes]
+
+    model = SingleTaskGPSurrogate(
+        inputs=inputs,
+        outputs=outputs,
+    )
+    model = surrogates.map(model)
+    train_cv, test_cv, hook_results = model.cross_validate(
+        experiments,
+        folds=4,
+        random_state=random_state,
+        group_split_column="group",
+    )
+
+    # gather train and test indices
+    test_indices = []
+    train_indices = []
+    for cvresults in test_cv.results:
+        test_indices.append(list(cvresults.observed.index))
+
+    for cvresults in train_cv.results:
+        train_indices.append(list(cvresults.observed.index))
+    
+    # test if the groups are only present in either the test or train indices and are grouped together
+    for test_index, train_index in zip(test_indices, train_indices):
+        for indices in all_indices:
+            test_set = set(test_index)
+            train_set = set(train_index)
+            assert test_set.issuperset(indices) or train_set.issuperset(indices)

From f573475e7cb725310736b359e200a4e709c2959e Mon Sep 17 00:00:00 2001
From: Julian Keupp <julian.keupp@boehringer-ingelheim.com>
Date: Mon, 16 Dec 2024 14:40:46 +0100
Subject: [PATCH 3/7] improve docstring & add some inline comments in test

---
 bofire/surrogates/trainable.py                 |  6 +++++-
 tests/bofire/surrogates/test_cross_validate.py | 13 ++++++++++---
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/bofire/surrogates/trainable.py b/bofire/surrogates/trainable.py
index ea2f4a105..6222793b1 100644
--- a/bofire/surrogates/trainable.py
+++ b/bofire/surrogates/trainable.py
@@ -89,7 +89,11 @@ def cross_validate(
                 Defaults to None.
             stratified_feature (str, optional): The feature name to preserve the percentage of samples for each class in
                 the stratified folds. Defaults to None.
-            group_split_column (str, optional): The column name of the group id. Defaults to None.
+            group_split_column (str, optional): The column name of the group id. 
+                This parameter is used to ensure that the splits are made such that the same group is not present in both
+                training and testing sets. This is useful in scenarios where data points are related or dependent on each
+                other, and splitting them into different sets would violate the assumption of independence. The number of
+                unique groups must be greater than or equal to the number of folds. Defaults to None. 
             hooks (Dict[str, Callable[[Model, pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame], Any]], optional):
                 Dictionary of callable hooks that are called within the CV loop. The callable retrieves the current trained
                 modeld and the current CV folds in the following order: X_train, y_train, X_test, y_test. Defaults to {}.
diff --git a/tests/bofire/surrogates/test_cross_validate.py b/tests/bofire/surrogates/test_cross_validate.py
index 5657f5056..d4f8fd52a 100644
--- a/tests/bofire/surrogates/test_cross_validate.py
+++ b/tests/bofire/surrogates/test_cross_validate.py
@@ -472,7 +472,7 @@ def test_model_cross_validate_stratified_invalid_feature_type(key):
 
 @pytest.mark.parametrize("random_state", [1, 2])
 def test_model_cross_validate_groupfold(random_state):
-    # TODO: perhaps look into more efficient way to test this
+    # Define the input features for the model
     inputs = Inputs(
         features=[
             ContinuousInput(
@@ -491,7 +491,10 @@ def test_model_cross_validate_groupfold(random_state):
             ),
         ],
     )
+    # Define the output features for the model
     outputs = Outputs(features=[ContinuousOutput(key="y")])
+    
+    # Create a DataFrame with sample experiments data
     experiments = pd.DataFrame(
         [
             [-4, -4, "category1", "a", 1, 0],
@@ -515,6 +518,7 @@ def test_model_cross_validate_groupfold(random_state):
     )
     experiments["valid_y"] = 1
 
+    # Get the indices for each group
     cat0_indexes = experiments[experiments["group"] == 0].index
     cat1_indexes = experiments[experiments["group"] == 1].index
     cat2_indexes = experiments[experiments["group"] == 2].index
@@ -522,11 +526,14 @@ def test_model_cross_validate_groupfold(random_state):
 
     all_indices = [cat0_indexes, cat1_indexes, cat2_indexes, cat3_indexes]
 
+    # Initialize the model
     model = SingleTaskGPSurrogate(
         inputs=inputs,
         outputs=outputs,
     )
     model = surrogates.map(model)
+    
+    # Perform cross-validation with group splitting
     train_cv, test_cv, hook_results = model.cross_validate(
         experiments,
         folds=4,
@@ -534,7 +541,7 @@ def test_model_cross_validate_groupfold(random_state):
         group_split_column="group",
     )
 
-    # gather train and test indices
+    # Gather train and test indices
     test_indices = []
     train_indices = []
     for cvresults in test_cv.results:
@@ -543,7 +550,7 @@ def test_model_cross_validate_groupfold(random_state):
     for cvresults in train_cv.results:
         train_indices.append(list(cvresults.observed.index))
     
-    # test if the groups are only present in either the test or train indices and are grouped together
+    # Test if the groups are only present in either the test or train indices and are grouped together
     for test_index, train_index in zip(test_indices, train_indices):
         for indices in all_indices:
             test_set = set(test_index)

From 21835c15fde2f55735dbf1b7ce5ef9210155fe26 Mon Sep 17 00:00:00 2001
From: jkeupp <julian.keupp@rub.de>
Date: Fri, 20 Dec 2024 14:08:29 +0100
Subject: [PATCH 4/7] refactor cross_validate & add tests

---
 bofire/surrogates/trainable.py                | 145 +++++++++++++-----
 .../bofire/surrogates/test_cross_validate.py  |  99 +++++++++++-
 2 files changed, 204 insertions(+), 40 deletions(-)

diff --git a/bofire/surrogates/trainable.py b/bofire/surrogates/trainable.py
index 6222793b1..04fb62e5f 100644
--- a/bofire/surrogates/trainable.py
+++ b/bofire/surrogates/trainable.py
@@ -1,9 +1,10 @@
 import warnings
 from abc import ABC, abstractmethod
-from typing import Any, Callable, Dict, List, Optional, Tuple
+from typing import Any, Callable, Dict, Generator, List, Optional, Tuple, Union
 
+import numpy as np
 import pandas as pd
-from sklearn.model_selection import KFold, StratifiedKFold, GroupShuffleSplit
+from sklearn.model_selection import GroupShuffleSplit, KFold, StratifiedKFold
 
 from bofire.data_models.enum import OutputFilteringEnum
 from bofire.data_models.features.api import (
@@ -21,6 +22,13 @@ class TrainableSurrogate(ABC):
     _output_filtering: OutputFilteringEnum = OutputFilteringEnum.ALL
 
     def fit(self, experiments: pd.DataFrame, options: Optional[Dict] = None):
+        """
+        Fit the surrogate model to the provided experiments.
+
+        Args:
+            experiments (pd.DataFrame): The experimental data to fit the model.
+            options (Optional[Dict], optional): Additional options for fitting the model. Defaults to None.
+        """
         # validate
         experiments = self.inputs.validate_experiments(experiments, strict=False)  # type: ignore
         experiments = self.outputs.validate_experiments(experiments)  # type: ignore
@@ -34,6 +42,15 @@ def fit(self, experiments: pd.DataFrame, options: Optional[Dict] = None):
         self._fit(X=X, Y=Y, **options)
 
     def _preprocess_experiments(self, experiments: pd.DataFrame) -> pd.DataFrame:
+        """
+        Preprocess the experiments based on the output filtering setting.
+
+        Args:
+            experiments (pd.DataFrame): The experimental data to preprocess.
+
+        Returns:
+            pd.DataFrame: The preprocessed experimental data.
+        """
         if self._output_filtering is None:
             return experiments
         if self._output_filtering == OutputFilteringEnum.ALL:
@@ -50,6 +67,13 @@ def _preprocess_experiments(self, experiments: pd.DataFrame) -> pd.DataFrame:
 
     @abstractmethod
     def _fit(self, X: pd.DataFrame, Y: pd.DataFrame, **kwargs):
+        """
+        Abstract method to fit the model to the provided data.
+
+        Args:
+            X (pd.DataFrame): The input features.
+            Y (pd.DataFrame): The output targets.
+        """
         pass
 
     def cross_validate(
@@ -89,11 +113,11 @@ def cross_validate(
                 Defaults to None.
             stratified_feature (str, optional): The feature name to preserve the percentage of samples for each class in
                 the stratified folds. Defaults to None.
-            group_split_column (str, optional): The column name of the group id. 
+            group_split_column (str, optional): The column name of the group id.
                 This parameter is used to ensure that the splits are made such that the same group is not present in both
                 training and testing sets. This is useful in scenarios where data points are related or dependent on each
                 other, and splitting them into different sets would violate the assumption of independence. The number of
-                unique groups must be greater than or equal to the number of folds. Defaults to None. 
+                unique groups must be greater than or equal to the number of folds. Defaults to None.
             hooks (Dict[str, Callable[[Model, pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame], Any]], optional):
                 Dictionary of callable hooks that are called within the CV loop. The callable retrieves the current trained
                 modeld and the current CV folds in the following order: X_train, y_train, X_test, y_test. Defaults to {}.
@@ -131,7 +155,7 @@ def cross_validate(
                 raise ValueError(
                     "The feature to be stratified needs to be a DiscreteInput, CategoricalInput, CategoricalOutput, or ContinuousOutput",
                 )
-            
+
         if group_split_column is not None:
             # check if the group split column is present in the experiments
             if group_split_column not in experiments.columns:
@@ -152,17 +176,7 @@ def cross_validate(
         # first filter the experiments based on the model setting
         experiments = self._preprocess_experiments(experiments)
         n = len(experiments)
-        if folds > n:
-            warnings.warn(
-                f"Training data only has {n} experiments, which is less than folds, fallback to LOOCV.",
-            )
-            folds = n
-        elif n == 0:
-            raise ValueError("Experiments is empty.")
-        elif folds < 2 and folds != -1:
-            raise ValueError("Folds must be -1 for LOO, or > 1.")
-        elif folds == -1:
-            folds = n
+        folds = self._check_valid_nfolds(folds, n)
         # preprocess hooks
         if hooks is None:
             hooks = {}
@@ -171,27 +185,13 @@ def cross_validate(
         hook_results = {key: [] for key in hooks.keys()}
 
         # instantiate kfold object
-        if stratified_feature is None:
-            if group_split_column is not None:
-                # GROUP SPLIT FUNCTIONALITY
-                cv = GroupShuffleSplit(n_splits=folds, random_state=random_state)
-                cv_func = cv.split(
-                    experiments,
-                    groups=experiments[group_split_column]
-                )
-            else:
-                cv = KFold(n_splits=folds, shuffle=True, random_state=random_state)
-                cv_func = cv.split(experiments)
-        else:
-            cv = StratifiedKFold(
-                n_splits=folds,
-                shuffle=True,
-                random_state=random_state,
-            )
-            cv_func = cv.split(
-                experiments.drop([stratified_feature], axis=1),
-                experiments[stratified_feature],
-            )
+        cv, cv_func = self._make_cv_split(
+            experiments,
+            folds,
+            stratified_feature=stratified_feature,
+            group_split_column=group_split_column,
+            random_state=random_state,
+        )
 
         key = self.outputs.get_keys()[0]  # type: ignore
         train_results = []
@@ -270,3 +270,74 @@ def cross_validate(
             CvResults(results=test_results),
             hook_results,
         )
+
+    def _check_valid_nfolds(self, folds, n):
+        """
+        Check and adjust the number of folds for cross-validation.
+
+        Args:
+            folds (int): The requested number of folds.
+            n (int): The number of experiments.
+
+        Returns:
+            int: The adjusted number of folds.
+
+        Raises:
+            ValueError: If the number of folds is invalid or if the experiments are empty.
+        """
+        if n == 0:
+            raise ValueError("Experiments is empty.")
+        if folds > n:
+            warnings.warn(
+                f"Training data only has {n} experiments, which is less than folds, fallback to LOOCV.",
+            )
+            folds = n
+        elif folds < 2 and folds != -1:
+            raise ValueError("Folds must be -1 for LOO, or > 1.")
+        elif folds == -1:
+            folds = n
+        return folds
+
+    def _make_cv_split(
+        self,
+        experiments: pd.DataFrame,
+        folds: int,
+        stratified_feature: Optional[str] = None,
+        group_split_column: Optional[str] = None,
+        random_state: Optional[int] = None,
+    ) -> Tuple[
+        Union[KFold, StratifiedKFold, GroupShuffleSplit],
+        Generator[Tuple[np.ndarray, np.ndarray], None, None],
+    ]:
+        """
+        Create the cross-validation split object.
+
+        Args:
+            experiments (pd.DataFrame): The experimental data.
+            folds (int): The number of folds.
+            random_state (Optional[int]): The random state for reproducibility.
+            stratified_feature (Optional[str]): The feature to stratify by.
+            group_split_column (Optional[str]): The column for group splitting.
+
+        Returns:
+            Tuple: The cross-validation split object and the split function.
+        """
+        if stratified_feature is None:
+            if group_split_column is not None:
+                # GROUP SPLIT FUNCTIONALITY
+                cv = GroupShuffleSplit(n_splits=folds, random_state=random_state)
+                cv_func = cv.split(experiments, groups=experiments[group_split_column])
+            else:
+                cv = KFold(n_splits=folds, shuffle=True, random_state=random_state)
+                cv_func = cv.split(experiments)
+        else:
+            cv = StratifiedKFold(
+                n_splits=folds,
+                shuffle=True,
+                random_state=random_state,
+            )
+            cv_func = cv.split(
+                experiments.drop([stratified_feature], axis=1),
+                experiments[stratified_feature],
+            )
+        return cv, cv_func
diff --git a/tests/bofire/surrogates/test_cross_validate.py b/tests/bofire/surrogates/test_cross_validate.py
index d4f8fd52a..c8be944f9 100644
--- a/tests/bofire/surrogates/test_cross_validate.py
+++ b/tests/bofire/surrogates/test_cross_validate.py
@@ -1,5 +1,6 @@
 import pandas as pd
 import pytest
+from sklearn.model_selection import GroupShuffleSplit, KFold, StratifiedKFold
 
 import bofire.surrogates.api as surrogates
 from bofire.data_models.domain.api import Inputs, Outputs
@@ -470,6 +471,7 @@ def test_model_cross_validate_stratified_invalid_feature_type(key):
     ):
         model.cross_validate(experiments, folds=5, stratified_feature=key)
 
+
 @pytest.mark.parametrize("random_state", [1, 2])
 def test_model_cross_validate_groupfold(random_state):
     # Define the input features for the model
@@ -493,7 +495,7 @@ def test_model_cross_validate_groupfold(random_state):
     )
     # Define the output features for the model
     outputs = Outputs(features=[ContinuousOutput(key="y")])
-    
+
     # Create a DataFrame with sample experiments data
     experiments = pd.DataFrame(
         [
@@ -532,7 +534,7 @@ def test_model_cross_validate_groupfold(random_state):
         outputs=outputs,
     )
     model = surrogates.map(model)
-    
+
     # Perform cross-validation with group splitting
     train_cv, test_cv, hook_results = model.cross_validate(
         experiments,
@@ -549,10 +551,101 @@ def test_model_cross_validate_groupfold(random_state):
 
     for cvresults in train_cv.results:
         train_indices.append(list(cvresults.observed.index))
-    
+
     # Test if the groups are only present in either the test or train indices and are grouped together
     for test_index, train_index in zip(test_indices, train_indices):
         for indices in all_indices:
             test_set = set(test_index)
             train_set = set(train_index)
             assert test_set.issuperset(indices) or train_set.issuperset(indices)
+
+
+def test_make_cv_split():
+    inputs = Inputs(
+        features=[
+            ContinuousInput(
+                key=f"x_{i+1}",
+                bounds=(-4, 4),
+            )
+            for i in range(2)
+        ],
+    )
+    outputs = Outputs(features=[ContinuousOutput(key="y")])
+    experiments = inputs.sample(n=10)
+    experiments["group"] = [i % 2 for i in range(10)]
+    experiments["stratified_feature"] = [
+        (i % 2) == 0 for i in range(10)
+    ]  # Add a stratified feature
+    experiments.eval("y=((x_1**2 + x_2 - 11)**2+(x_1 + x_2**2 -7)**2)", inplace=True)
+    experiments["valid_y"] = 1
+    model = SingleTaskGPSurrogate(
+        inputs=inputs,
+        outputs=outputs,
+    )
+    model = surrogates.map(model)
+
+    # Test KFold split
+    cv, cv_func = model._make_cv_split(
+        experiments,
+        folds=5,
+        random_state=1,
+        stratified_feature=None,
+        group_split_column=None,
+    )
+    assert isinstance(cv, KFold)
+    assert len(list(cv_func)) == 5
+
+    # Test StratifiedKFold split
+    cv, cv_func = model._make_cv_split(
+        experiments,
+        folds=5,
+        random_state=1,
+        stratified_feature="stratified_feature",
+        group_split_column=None,
+    )
+    assert isinstance(cv, StratifiedKFold)
+    assert len(list(cv_func)) == 5
+
+    # Test GroupShuffleSplit split
+    cv, cv_func = model._make_cv_split(
+        experiments,
+        folds=2,
+        random_state=1,
+        stratified_feature=None,
+        group_split_column="group",
+    )
+    assert isinstance(cv, GroupShuffleSplit)
+    assert len(list(cv_func)) == 2
+
+
+def test_check_valid_nfolds():
+    inputs = Inputs(
+        features=[
+            ContinuousInput(
+                key=f"x_{i+1}",
+                bounds=(-4, 4),
+            )
+            for i in range(2)
+        ],
+    )
+    outputs = Outputs(features=[ContinuousOutput(key="y")])
+    model = SingleTaskGPSurrogate(
+        inputs=inputs,
+        outputs=outputs,
+    )
+    model = surrogates.map(model)
+    # Test valid folds
+    assert model._check_valid_nfolds(5, 10) == 5
+    assert model._check_valid_nfolds(-1, 10) == 10
+
+    # Test folds greater than number of experiments
+    with pytest.warns(UserWarning):
+        assert model._check_valid_nfolds(20, 10) == 10
+
+    # Test invalid folds
+    with pytest.raises(ValueError):
+        model._check_valid_nfolds(0, 10)
+    with pytest.raises(ValueError):
+        model._check_valid_nfolds(1, 10)
+    with pytest.raises(ValueError):
+        model._check_valid_nfolds(5, 0)

From 3362d92935585ee7af3b7231f13a94aa93881309 Mon Sep 17 00:00:00 2001
From: Julian Keupp <julian.keupp@boehringer-ingelheim.com>
Date: Sun, 12 Jan 2025 23:12:18 +0100
Subject: [PATCH 5/7] imrpve tests, remove unnecessary case while checking
 group split col

---
 bofire/surrogates/trainable.py                |  6 +---
 .../bofire/surrogates/test_cross_validate.py  | 30 +++++++++++++++++++
 2 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/bofire/surrogates/trainable.py b/bofire/surrogates/trainable.py
index 04fb62e5f..d4790e389 100644
--- a/bofire/surrogates/trainable.py
+++ b/bofire/surrogates/trainable.py
@@ -163,14 +163,10 @@ def cross_validate(
                     f"Group split column {group_split_column} is not present in the experiments."
                 )
             ngroups = len(experiments[group_split_column].unique())
-            if ngroups == 0:
-                raise ValueError(
-                    f"Number of unique groups {len(experiments[group_split_column].unique())} is zero."
-                )
             # check if the number of unique groups is greater than or equal to the number of folds
             if ngroups < folds:
                 raise ValueError(
-                    f"Number of unique groups {len(experiments[group_split_column].unique())} is less than the number of folds {folds}."
+                    f"Number of unique groups {ngroups} is less than the number of folds {folds}."
                 )
 
         # first filter the experiments based on the model setting
diff --git a/tests/bofire/surrogates/test_cross_validate.py b/tests/bofire/surrogates/test_cross_validate.py
index c8be944f9..f7d407d57 100644
--- a/tests/bofire/surrogates/test_cross_validate.py
+++ b/tests/bofire/surrogates/test_cross_validate.py
@@ -560,6 +560,36 @@ def test_model_cross_validate_groupfold(random_state):
             assert test_set.issuperset(indices) or train_set.issuperset(indices)
 
 
+def test_model_cross_validate_invalid_group_split_column():
+    inputs = Inputs(
+        features=[
+            ContinuousInput(
+                key=f"x_{i+1}",
+                bounds=(-4, 4),
+            )
+            for i in range(2)
+        ],
+    )
+    outputs = Outputs(features=[ContinuousOutput(key="y")])
+    experiments = inputs.sample(n=10)
+    experiments.eval("y=((x_1**2 + x_2 - 11)**2+(x_1 + x_2**2 -7)**2)", inplace=True)
+    experiments["valid_y"] = 1
+    model = SingleTaskGPSurrogate(
+        inputs=inputs,
+        outputs=outputs,
+    )
+    model = surrogates.map(model)
+    
+    # Test with a non-existent group split column
+    with pytest.raises(ValueError, match="Group split column non_existent_column is not present in the experiments."):
+        model.cross_validate(experiments, folds=5, group_split_column="non_existent_column")
+
+    # Test with fewer unique groups than folds
+    experiments["group"] = [0, 0, 0, 0, 1, 1, 1, 1, 2, 2]
+    with pytest.raises(ValueError, match="Number of unique groups 3 is less than the number of folds 5."):
+        model.cross_validate(experiments, folds=5, group_split_column="group")
+
+
 def test_make_cv_split():
     inputs = Inputs(
         features=[

From 91a62a42632df492c35daec315995eb23aff2b6c Mon Sep 17 00:00:00 2001
From: Julian Keupp <julian.keupp@boehringer-ingelheim.com>
Date: Sun, 12 Jan 2025 23:25:31 +0100
Subject: [PATCH 6/7] add push

---
 tests/bofire/surrogates/test_cross_validate.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/bofire/surrogates/test_cross_validate.py b/tests/bofire/surrogates/test_cross_validate.py
index f7d407d57..945234d50 100644
--- a/tests/bofire/surrogates/test_cross_validate.py
+++ b/tests/bofire/surrogates/test_cross_validate.py
@@ -208,7 +208,7 @@ def test_model_cross_validate_invalid(folds):
         outputs=outputs,
     )
     model = surrogates.map(model)
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError, match="Folds must be -1 for LOO, or > 1."):
         model.cross_validate(experiments, folds=folds)
 
 
@@ -669,13 +669,13 @@ def test_check_valid_nfolds():
     assert model._check_valid_nfolds(-1, 10) == 10
 
     # Test folds greater than number of experiments
-    with pytest.warns(UserWarning):
+    with pytest.warns(UserWarning, match="Training data only has 10 experiments, which is less than folds, fallback to LOOCV."):
         assert model._check_valid_nfolds(20, 10) == 10
 
     # Test invalid folds
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError, match="Folds must be -1 for LOO, or > 1."):
         model._check_valid_nfolds(0, 10)
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError, match="Folds must be -1 for LOO, or > 1."):
         model._check_valid_nfolds(1, 10)
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError, match="Experiments is empty."):
         model._check_valid_nfolds(5, 0)

From 5c78642b4ba61480757bc6f92d98919729fc9f9c Mon Sep 17 00:00:00 2001
From: Julian Keupp <julian.keupp@boehringer-ingelheim.com>
Date: Mon, 13 Jan 2025 08:03:30 +0100
Subject: [PATCH 7/7] formatting

---
 .../bofire/surrogates/test_cross_validate.py  | 21 ++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/tests/bofire/surrogates/test_cross_validate.py b/tests/bofire/surrogates/test_cross_validate.py
index 945234d50..18c3a16cc 100644
--- a/tests/bofire/surrogates/test_cross_validate.py
+++ b/tests/bofire/surrogates/test_cross_validate.py
@@ -579,14 +579,22 @@ def test_model_cross_validate_invalid_group_split_column():
         outputs=outputs,
     )
     model = surrogates.map(model)
-    
+
     # Test with a non-existent group split column
-    with pytest.raises(ValueError, match="Group split column non_existent_column is not present in the experiments."):
-        model.cross_validate(experiments, folds=5, group_split_column="non_existent_column")
+    with pytest.raises(
+        ValueError,
+        match="Group split column non_existent_column is not present in the experiments.",
+    ):
+        model.cross_validate(
+            experiments, folds=5, group_split_column="non_existent_column"
+        )
 
     # Test with fewer unique groups than folds
     experiments["group"] = [0, 0, 0, 0, 1, 1, 1, 1, 2, 2]
-    with pytest.raises(ValueError, match="Number of unique groups 3 is less than the number of folds 5."):
+    with pytest.raises(
+        ValueError,
+        match="Number of unique groups 3 is less than the number of folds 5.",
+    ):
         model.cross_validate(experiments, folds=5, group_split_column="group")
 
 
@@ -669,7 +677,10 @@ def test_check_valid_nfolds():
     assert model._check_valid_nfolds(-1, 10) == 10
 
     # Test folds greater than number of experiments
-    with pytest.warns(UserWarning, match="Training data only has 10 experiments, which is less than folds, fallback to LOOCV."):
+    with pytest.warns(
+        UserWarning,
+        match="Training data only has 10 experiments, which is less than folds, fallback to LOOCV.",
+    ):
         assert model._check_valid_nfolds(20, 10) == 10
 
     # Test invalid folds