diff --git a/skore/src/skore/sklearn/_estimator/base.py b/skore/src/skore/sklearn/_estimator/base.py
index 975ad8f20..fffb67c11 100644
--- a/skore/src/skore/sklearn/_estimator/base.py
+++ b/skore/src/skore/sklearn/_estimator/base.py
@@ -131,28 +131,41 @@ def _get_X_y_and_data_source_hash(self, *, data_source, X=None, y=None):
             The hash of the data source. None when we are able to track the data, and
             thus relying on X_train, y_train, X_test, y_test.
         """
+        is_cluster = is_clusterer(self._parent.estimator)
         if data_source == "test":
             if not (X is None or y is None):
                 raise ValueError("X and y must be None when data_source is test.")
+            if self._parent._X_test is None or (
+                not is_cluster and self._parent._y_test is None
+            ):
+                missing_data = "X_test" if is_cluster else "X_test and y_test"
+                raise ValueError(
+                    f"No {data_source} data (i.e. {missing_data}) were provided "
+                    f"when creating the reporter. Please provide the {data_source} "
+                    "data either when creating the reporter or by setting data_source "
+                    "to 'X_y' and providing X and y."
+                )
             return self._parent._X_test, self._parent._y_test, None
         elif data_source == "train":
             if not (X is None or y is None):
                 raise ValueError("X and y must be None when data_source is train.")
-            is_cluster = is_clusterer(self._parent.estimator)
             if self._parent._X_train is None or (
                 not is_cluster and self._parent._y_train is None
             ):
                 missing_data = "X_train" if is_cluster else "X_train and y_train"
                 raise ValueError(
-                    f"No training data (i.e. {missing_data}) were provided "
-                    "when creating the reporter. Please provide the training data."
+                    f"No {data_source} data (i.e. {missing_data}) were provided "
+                    f"when creating the reporter. Please provide the {data_source} "
+                    "data either when creating the reporter or by setting data_source "
+                    "to 'X_y' and providing X and y."
                 )
             return self._parent._X_train, self._parent._y_train, None
         elif data_source == "X_y":
-            is_cluster = is_clusterer(self._parent.estimator)
             if X is None or (not is_cluster and y is None):
                 missing_data = "X" if is_cluster else "X and y"
-                raise ValueError(f"{missing_data} must be provided.")
+                raise ValueError(
+                    f"{missing_data} must be provided when data_source is X_y."
+                )
             return X, y, joblib.hash((X, y))
         else:
             raise ValueError(
diff --git a/skore/tests/unit/sklearn/test_estimator.py b/skore/tests/unit/sklearn/test_estimator.py
index ae8e3b37a..0fa3582d4 100644
--- a/skore/tests/unit/sklearn/test_estimator.py
+++ b/skore/tests/unit/sklearn/test_estimator.py
@@ -369,7 +369,7 @@ def test_estimator_report_display_binary_classification_external_data(
     when passing external data.
     """
     estimator, X_test, y_test = binary_classification_data
-    report = EstimatorReport(estimator, X_test=X_test, y_test=y_test)
+    report = EstimatorReport(estimator)
     assert hasattr(report.metrics.plot, display)
     display_first_call = getattr(report.metrics.plot, display)(
         data_source="X_y", X=X_test, y=y_test
@@ -389,7 +389,7 @@ def test_estimator_report_display_regression_external_data(
     external data.
     """
     estimator, X_test, y_test = regression_data
-    report = EstimatorReport(estimator, X_test=X_test, y_test=y_test)
+    report = EstimatorReport(estimator)
     assert hasattr(report.metrics.plot, display)
     display_first_call = getattr(report.metrics.plot, display)(
         data_source="X_y", X=X_test, y=y_test
@@ -827,7 +827,7 @@ def test_estimator_report_get_X_y_and_data_source_hash_error():
     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
 
     estimator = LogisticRegression().fit(X_train, y_train)
-    report = EstimatorReport(estimator, X_test=X_test, y_test=y_test)
+    report = EstimatorReport(estimator)
 
     err_msg = re.escape(
         "Invalid data source: unknown. Possible values are: " "test, train, X_y."
@@ -835,12 +835,15 @@ def test_estimator_report_get_X_y_and_data_source_hash_error():
     with pytest.raises(ValueError, match=err_msg):
         report.metrics.log_loss(data_source="unknown")
 
-    err_msg = re.escape(
-        "No training data (i.e. X_train and y_train) were provided "
-        "when creating the reporter. Please provide the training data."
-    )
-    with pytest.raises(ValueError, match=err_msg):
-        report.metrics.log_loss(data_source="train")
+    for data_source in ("train", "test"):
+        err_msg = re.escape(
+            f"No {data_source} data (i.e. X_{data_source} and y_{data_source}) were "
+            f"provided when creating the reporter. Please provide the {data_source} "
+            "data either when creating the reporter or by setting data_source to "
+            "'X_y' and providing X and y."
+        )
+        with pytest.raises(ValueError, match=err_msg):
+            report.metrics.log_loss(data_source=data_source)
 
     report = EstimatorReport(
         estimator, X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test
@@ -865,14 +868,18 @@ def test_estimator_report_get_X_y_and_data_source_hash_error():
             rand_score, response_method="predict", data_source="X_y"
         )
 
-    err_msg = re.escape(
-        "No training data (i.e. X_train) were provided when creating the reporter. "
-        "Please provide the training data."
-    )
-    with pytest.raises(ValueError, match=err_msg):
-        report.metrics.custom_metric(
-            rand_score, response_method="predict", data_source="train"
+    report = EstimatorReport(estimator)
+    for data_source in ("train", "test"):
+        err_msg = re.escape(
+            f"No {data_source} data (i.e. X_{data_source}) were provided when "
+            f"creating the reporter. Please provide the {data_source} data either "
+            f"when creating the reporter or by setting data_source to 'X_y' and "
+            f"providing X and y."
         )
+        with pytest.raises(ValueError, match=err_msg):
+            report.metrics.custom_metric(
+                rand_score, response_method="predict", data_source=data_source
+            )
 
 
 @pytest.mark.parametrize("data_source", ("train", "test", "X_y"))