From fae47a8749615d514209ec7e80c94adb37a0e388 Mon Sep 17 00:00:00 2001
From: AdrianSosic <adrian.sosic@merckgroup.com>
Date: Tue, 6 Aug 2024 15:35:03 +0200
Subject: [PATCH 01/15] Add hypothesis test for FPS

---
 tests/utils/test_sampling_algorithms.py | 61 ++++++++++++++++++++++++-
 1 file changed, 60 insertions(+), 1 deletion(-)

diff --git a/tests/utils/test_sampling_algorithms.py b/tests/utils/test_sampling_algorithms.py
index f9aad3bc7..48c06579a 100644
--- a/tests/utils/test_sampling_algorithms.py
+++ b/tests/utils/test_sampling_algorithms.py
@@ -2,11 +2,19 @@
 
 import math
 
+import hypothesis.extra.numpy as hnp
+import hypothesis.strategies as st
 import numpy as np
 import pandas as pd
 import pytest
+from hypothesis import example, given
+from sklearn.metrics import pairwise_distances
 
-from baybe.utils.sampling_algorithms import DiscreteSamplingMethod, sample_numerical_df
+from baybe.utils.sampling_algorithms import (
+    DiscreteSamplingMethod,
+    farthest_point_sampling,
+    sample_numerical_df,
+)
 
 
 @pytest.mark.parametrize("fraction", [0.2, 0.8, 1.0, 1.2, 2.0, 2.4, 3.5])
@@ -31,3 +39,54 @@ def test_discrete_sampling(fraction, method):
         assert len(sampled) == len(
             sampled.drop_duplicates()
         ), "Undersized sampling did not return unique points."
+
+
+@given(
+    points=hnp.arrays(
+        dtype=float,
+        shape=hnp.array_shapes(min_dims=2, max_dims=2, min_side=1),
+        # Because of the square involved in the Euclidean distance computation,
+        # we limit the floating point range to avoid overflow problems
+        elements=st.floats(min_value=-1e100, max_value=1e100, allow_nan=False),
+    )
+)
+# Explicitly test scenario with equidistant points (see comments in test body)
+@example(points=np.array([[0, 0], [0, 1], [1, 0], [1, 1]]))
+def test_farthest_point_sampling(points: np.ndarray):
+    """FPS produces the same point sequence regardless of the order in which the
+    points are provided. Also, each point fulfills the "farthest point" criterion
+    in its respective iteration.
+    """  # noqa
+    # Order the points using FPS
+    sorting_idxs = farthest_point_sampling(points, len(points))
+    target = points[sorting_idxs]
+
+    # For the ordered collection of points, it must hold:
+    # ---------------------------------------------------
+    # The minimum distance of the n_th selected point to all previously selected points
+    # must be larger than the minimum distance of any other remaining candidate point to
+    # the previously selected points – that's what makes it the "farthest point" in the
+    # respective iteration.
+    #
+    # For the check, we start with the second point (because there are otherwise no
+    # previous points) and end with the second last point (because there are otherwise
+    # no alternative candidates left):
+    dist_mat = pairwise_distances(target)
+    for i in range(1, len(dist_mat) - 1):
+        min_dist_selected_to_previous = np.min(dist_mat[i, :i])
+        min_dist_remaining_to_previous = np.min(dist_mat[i + 1 :, :i])
+        z = min_dist_selected_to_previous >= min_dist_remaining_to_previous
+        assert z
+
+    # Also, for the algorithm to be fully deterministic, the obtained result should not
+    # depend on the particular (random) order in which the points are provided. That is,
+    # running the algorithm on a permutation should still produce the same sequence of
+    # points. Note: We establish the check on the point coordinates and not the
+    # selection index, because the latter can still differ in case of duplicated points.
+    #
+    # Examples where this can make a difference is three points forming an equilateral
+    # triangle or four points spanning a unit cube. Here, tie-breaking operations such
+    # as `np.max` can lead to different results depending on the order.
+    permutation_idxs = np.random.permutation(len(points))
+    sorting_idxs = farthest_point_sampling(points[permutation_idxs], len(points))
+    assert np.array_equal(target, points[permutation_idxs][sorting_idxs])

From 2774db41ff3ad417af0a3eafc0e744f0e457b28b Mon Sep 17 00:00:00 2001
From: AdrianSosic <adrian.sosic@merckgroup.com>
Date: Tue, 6 Aug 2024 15:38:17 +0200
Subject: [PATCH 02/15] Refactor input validation

---
 baybe/utils/sampling_algorithms.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/baybe/utils/sampling_algorithms.py b/baybe/utils/sampling_algorithms.py
index 04052f483..9fa57f9fb 100644
--- a/baybe/utils/sampling_algorithms.py
+++ b/baybe/utils/sampling_algorithms.py
@@ -35,6 +35,13 @@ def farthest_point_sampling(
     Raises:
         ValueError: If an unknown initialization recommender is used.
     """
+    if np.ndim(points) != 2:
+        raise ValueError("The provided array must be two-dimensional.")
+    if len(points) == 0:
+        raise ValueError("The provided array must contain at least one row.")
+    if points.shape[-1] == 0:
+        raise ValueError("The provided input space must be at least one-dimensional.")
+
     # Compute the pairwise distances between all points
     dist_matrix = pairwise_distances(points)
 
@@ -48,11 +55,6 @@ def farthest_point_sampling(
         )
         if n_samples == 1:
             return np.random.choice(selected_point_indices, 1).tolist()
-        elif n_samples < 1:
-            raise ValueError(
-                f"Farthest point sampling must be done with >= 1 samples, but "
-                f"{n_samples=} was given."
-            )
     else:
         raise ValueError(f"unknown initialization recommender: '{initialization}'")
 

From 702a820bc6e4b08605787dda85b09171959d36c6 Mon Sep 17 00:00:00 2001
From: AdrianSosic <adrian.sosic@merckgroup.com>
Date: Tue, 6 Aug 2024 16:39:59 +0200
Subject: [PATCH 03/15] Catch edge case of completely duplicated points

---
 baybe/utils/sampling_algorithms.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/baybe/utils/sampling_algorithms.py b/baybe/utils/sampling_algorithms.py
index 9fa57f9fb..23be46be5 100644
--- a/baybe/utils/sampling_algorithms.py
+++ b/baybe/utils/sampling_algorithms.py
@@ -45,6 +45,9 @@ def farthest_point_sampling(
     # Compute the pairwise distances between all points
     dist_matrix = pairwise_distances(points)
 
+    # Avoid wrong behavior pathological situation where all points are duplicates
+    np.fill_diagonal(dist_matrix, -np.inf)
+
     # Initialize the point selection subset
     if initialization == "random":
         selected_point_indices = [np.random.randint(0, len(points))]

From 79da6f3879680ab747d20bc2842d578bfc354b93 Mon Sep 17 00:00:00 2001
From: AdrianSosic <adrian.sosic@merckgroup.com>
Date: Tue, 6 Aug 2024 16:42:08 +0200
Subject: [PATCH 04/15] Make algorithm agnostic to point order

---
 baybe/utils/sampling_algorithms.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/baybe/utils/sampling_algorithms.py b/baybe/utils/sampling_algorithms.py
index 23be46be5..2e3c10f9d 100644
--- a/baybe/utils/sampling_algorithms.py
+++ b/baybe/utils/sampling_algorithms.py
@@ -42,7 +42,11 @@ def farthest_point_sampling(
     if points.shape[-1] == 0:
         raise ValueError("The provided input space must be at least one-dimensional.")
 
-    # Compute the pairwise distances between all points
+    # Sort the points to produce the same result regardless of the input order
+    sort_idx = np.lexsort(tuple(points.T))
+    points = points[sort_idx]
+
+    # Pre-compute the pairwise distances between all points
     dist_matrix = pairwise_distances(points)
 
     # Avoid wrong behavior pathological situation where all points are duplicates
@@ -53,11 +57,9 @@ def farthest_point_sampling(
         selected_point_indices = [np.random.randint(0, len(points))]
     elif initialization == "farthest":
         idx_1d = np.argmax(dist_matrix)
-        selected_point_indices = list(
-            map(int, np.unravel_index(idx_1d, dist_matrix.shape))
-        )
+        selected_point_indices = list(np.unravel_index(idx_1d, dist_matrix.shape))
         if n_samples == 1:
-            return np.random.choice(selected_point_indices, 1).tolist()
+            return [selected_point_indices[0]]
     else:
         raise ValueError(f"unknown initialization recommender: '{initialization}'")
 
@@ -81,7 +83,8 @@ def farthest_point_sampling(
         selected_point_indices.append(selected_point_index)
         remaining_point_indices.remove(selected_point_index)
 
-    return selected_point_indices
+    # Undo the initial point reordering
+    return sort_idx[selected_point_indices]
 
 
 class DiscreteSamplingMethod(Enum):

From 7ca80a0df2942a6d01ea97b5e2bbfd0df301266b Mon Sep 17 00:00:00 2001
From: AdrianSosic <adrian.sosic@merckgroup.com>
Date: Tue, 6 Aug 2024 17:11:23 +0200
Subject: [PATCH 05/15] Improve docstrings

---
 baybe/utils/sampling_algorithms.py | 31 +++++++++++++++++-------------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/baybe/utils/sampling_algorithms.py b/baybe/utils/sampling_algorithms.py
index 2e3c10f9d..ba93c18d8 100644
--- a/baybe/utils/sampling_algorithms.py
+++ b/baybe/utils/sampling_algorithms.py
@@ -13,27 +13,32 @@ def farthest_point_sampling(
     n_samples: int = 1,
     initialization: Literal["farthest", "random"] = "farthest",
 ) -> list[int]:
-    """Sample points according to a farthest point heuristic.
+    """Select a subset of points using farthest point sampling.
 
-    Creates a subset of a collection of points by successively adding points with the
-    largest Euclidean distance to intermediate point selections encountered during
-    the algorithmic process.
+    Creates a subset of a given collection of points by successively adding points with
+    the largest Euclidean distance to intermediate point selections encountered during
+    the algorithmic process. The mechanism used for the initial point selection is
+    configurable.
 
     Args:
-        points: The points that are available for selection, represented as a 2D array
-            whose first dimension corresponds to the point index.
+        points: The points that are available for selection, represented as a 2-D array
+            of shape (n, k), where n is the number of points and k is the dimensionality
+            of the points.
         n_samples: The total number of points to be selected.
-        initialization: Determines how the first points are selected. When
-            ``"farthest"`` is chosen, the first two selected points are those with the
-            largest distance. If only a single point is requested, it is selected
-            randomly from these two. When ``"random"`` is chosen, the first point is
-            selected uniformly at random.
+        initialization: Determines how the first points are selected:
+            * ``"farthest"``: The first two selected points are those with the
+              largest distance. If only a single point is requested, a deterministic
+              choice is made based on the point coordinates.
+            * ``"random"``: The first point is selected uniformly at random.
 
     Returns:
         A list containing the positional indices of the selected points.
 
     Raises:
-        ValueError: If an unknown initialization recommender is used.
+        ValueError: If the provided array is not two-dimensional.
+        ValueError: If the array contains no points.
+        ValueError: If the input space has no dimensions.
+        ValueError: If an unknown method for initialization is specified.
     """
     if np.ndim(points) != 2:
         raise ValueError("The provided array must be two-dimensional.")
@@ -52,7 +57,7 @@ def farthest_point_sampling(
     # Avoid wrong behavior pathological situation where all points are duplicates
     np.fill_diagonal(dist_matrix, -np.inf)
 
-    # Initialize the point selection subset
+    # Initialize the point selection
     if initialization == "random":
         selected_point_indices = [np.random.randint(0, len(points))]
     elif initialization == "farthest":

From 7b008aef6c31c9c0a3724624af755ff36987e087 Mon Sep 17 00:00:00 2001
From: AdrianSosic <adrian.sosic@merckgroup.com>
Date: Tue, 6 Aug 2024 17:12:18 +0200
Subject: [PATCH 06/15] Use walrus to extract number of points

---
 baybe/utils/sampling_algorithms.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/baybe/utils/sampling_algorithms.py b/baybe/utils/sampling_algorithms.py
index ba93c18d8..a9f7f82eb 100644
--- a/baybe/utils/sampling_algorithms.py
+++ b/baybe/utils/sampling_algorithms.py
@@ -42,7 +42,7 @@ def farthest_point_sampling(
     """
     if np.ndim(points) != 2:
         raise ValueError("The provided array must be two-dimensional.")
-    if len(points) == 0:
+    if (n_points := len(points)) == 0:
         raise ValueError("The provided array must contain at least one row.")
     if points.shape[-1] == 0:
         raise ValueError("The provided input space must be at least one-dimensional.")
@@ -59,7 +59,7 @@ def farthest_point_sampling(
 
     # Initialize the point selection
     if initialization == "random":
-        selected_point_indices = [np.random.randint(0, len(points))]
+        selected_point_indices = [np.random.randint(0, n_points)]
     elif initialization == "farthest":
         idx_1d = np.argmax(dist_matrix)
         selected_point_indices = list(np.unravel_index(idx_1d, dist_matrix.shape))
@@ -69,7 +69,7 @@ def farthest_point_sampling(
         raise ValueError(f"unknown initialization recommender: '{initialization}'")
 
     # Initialize the list of remaining points
-    remaining_point_indices = list(range(len(points)))
+    remaining_point_indices = list(range(n_points))
     for idx in selected_point_indices:
         remaining_point_indices.remove(idx)
 

From c28703e110f99c6e62ebeaa2dfee3346b1caaa3c Mon Sep 17 00:00:00 2001
From: AdrianSosic <adrian.sosic@merckgroup.com>
Date: Tue, 6 Aug 2024 17:15:21 +0200
Subject: [PATCH 07/15] Update CHANGELOG.md

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 075ab5378..f85c11b08 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Fixed
 - `CategoricalParameter` and `TaskParameter` no longer incorrectly coerce a single
   string input to categories/tasks
+- `farthest_point_sampling` no longer depends on the provided point order
 
 ## [0.10.0] - 2024-08-02
 ### Breaking Changes

From fb3748b311adce2e1f5497366cb38e8233aa7d8c Mon Sep 17 00:00:00 2001
From: AdrianSosic <adrian.sosic@merckgroup.com>
Date: Wed, 7 Aug 2024 12:37:29 +0200
Subject: [PATCH 08/15] Add test case for one single FPS recommendation

---
 tests/utils/test_sampling_algorithms.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tests/utils/test_sampling_algorithms.py b/tests/utils/test_sampling_algorithms.py
index 48c06579a..27da3e4b4 100644
--- a/tests/utils/test_sampling_algorithms.py
+++ b/tests/utils/test_sampling_algorithms.py
@@ -90,3 +90,8 @@ def test_farthest_point_sampling(points: np.ndarray):
     permutation_idxs = np.random.permutation(len(points))
     sorting_idxs = farthest_point_sampling(points[permutation_idxs], len(points))
     assert np.array_equal(target, points[permutation_idxs][sorting_idxs])
+
+    # Because requesting a single point needs special treatment in FPS,
+    # we test this as additional case
+    sorting_idxs = farthest_point_sampling(points[permutation_idxs], 1)
+    assert np.array_equal(target[[0]], points[permutation_idxs][sorting_idxs])

From 6f6c33131cad7b50c04ef6f34b3ec1dff80e5a08 Mon Sep 17 00:00:00 2001
From: AdrianSosic <adrian.sosic@merckgroup.com>
Date: Wed, 7 Aug 2024 12:38:10 +0200
Subject: [PATCH 09/15] Fix index bug in case of one requested point

---
 baybe/utils/sampling_algorithms.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/baybe/utils/sampling_algorithms.py b/baybe/utils/sampling_algorithms.py
index a9f7f82eb..396400b2f 100644
--- a/baybe/utils/sampling_algorithms.py
+++ b/baybe/utils/sampling_algorithms.py
@@ -64,7 +64,7 @@ def farthest_point_sampling(
         idx_1d = np.argmax(dist_matrix)
         selected_point_indices = list(np.unravel_index(idx_1d, dist_matrix.shape))
         if n_samples == 1:
-            return [selected_point_indices[0]]
+            return [sort_idx[selected_point_indices[0]]]
     else:
         raise ValueError(f"unknown initialization recommender: '{initialization}'")
 

From ddfeb8098c93ee52c1c25ec86d4c5df4479a236e Mon Sep 17 00:00:00 2001
From: AdrianSosic <adrian.sosic@merckgroup.com>
Date: Wed, 7 Aug 2024 13:41:34 +0200
Subject: [PATCH 10/15] Fix mypy error

---
 baybe/utils/sampling_algorithms.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/baybe/utils/sampling_algorithms.py b/baybe/utils/sampling_algorithms.py
index 396400b2f..6e83dec0c 100644
--- a/baybe/utils/sampling_algorithms.py
+++ b/baybe/utils/sampling_algorithms.py
@@ -62,7 +62,9 @@ def farthest_point_sampling(
         selected_point_indices = [np.random.randint(0, n_points)]
     elif initialization == "farthest":
         idx_1d = np.argmax(dist_matrix)
-        selected_point_indices = list(np.unravel_index(idx_1d, dist_matrix.shape))
+        selected_point_indices = list(
+            map(int, np.unravel_index(idx_1d, dist_matrix.shape))
+        )
         if n_samples == 1:
             return [sort_idx[selected_point_indices[0]]]
     else:

From c719e2244263f98d5c07f0097133a95071a1758f Mon Sep 17 00:00:00 2001
From: AdrianSosic <adrian.sosic@merckgroup.com>
Date: Fri, 9 Aug 2024 14:02:12 +0200
Subject: [PATCH 11/15] Fix type of returned value

---
 baybe/utils/sampling_algorithms.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/baybe/utils/sampling_algorithms.py b/baybe/utils/sampling_algorithms.py
index 6e83dec0c..d98984804 100644
--- a/baybe/utils/sampling_algorithms.py
+++ b/baybe/utils/sampling_algorithms.py
@@ -91,7 +91,7 @@ def farthest_point_sampling(
         remaining_point_indices.remove(selected_point_index)
 
     # Undo the initial point reordering
-    return sort_idx[selected_point_indices]
+    return sort_idx[selected_point_indices].tolist()
 
 
 class DiscreteSamplingMethod(Enum):

From 926fb123fdd35fb30bebdc714bd3ca24e9f6ee74 Mon Sep 17 00:00:00 2001
From: AdrianSosic <adrian.sosic@merckgroup.com>
Date: Fri, 9 Aug 2024 14:19:21 +0200
Subject: [PATCH 12/15] Provide details on incorrect user input in error
 message

---
 baybe/utils/sampling_algorithms.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/baybe/utils/sampling_algorithms.py b/baybe/utils/sampling_algorithms.py
index d98984804..7b95edf1e 100644
--- a/baybe/utils/sampling_algorithms.py
+++ b/baybe/utils/sampling_algorithms.py
@@ -40,8 +40,11 @@ def farthest_point_sampling(
         ValueError: If the input space has no dimensions.
         ValueError: If an unknown method for initialization is specified.
     """
-    if np.ndim(points) != 2:
-        raise ValueError("The provided array must be two-dimensional.")
+    if (n_dims := np.ndim(points)) != 2:
+        raise ValueError(
+            f"The provided array must be two-dimensional but the given input had "
+            f"{n_dims} dimensions."
+        )
     if (n_points := len(points)) == 0:
         raise ValueError("The provided array must contain at least one row.")
     if points.shape[-1] == 0:

From a60513b5756a3a48c4b7800eb02f213748958b1a Mon Sep 17 00:00:00 2001
From: AdrianSosic <adrian.sosic@merckgroup.com>
Date: Fri, 9 Aug 2024 14:20:16 +0200
Subject: [PATCH 13/15] Add missing validation for number of requested samples

---
 baybe/utils/sampling_algorithms.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/baybe/utils/sampling_algorithms.py b/baybe/utils/sampling_algorithms.py
index 7b95edf1e..67b836755 100644
--- a/baybe/utils/sampling_algorithms.py
+++ b/baybe/utils/sampling_algorithms.py
@@ -49,6 +49,11 @@ def farthest_point_sampling(
         raise ValueError("The provided array must contain at least one row.")
     if points.shape[-1] == 0:
         raise ValueError("The provided input space must be at least one-dimensional.")
+    if n_samples > n_points:
+        raise ValueError(
+            f"The number of requested samples ({n_samples}) cannot be larger than the "
+            f"total number of points provided ({n_points})."
+        )
 
     # Sort the points to produce the same result regardless of the input order
     sort_idx = np.lexsort(tuple(points.T))

From f0c07e85a3b161bba55a3b937c31580fce5e39af Mon Sep 17 00:00:00 2001
From: AdrianSosic <adrian.sosic@merckgroup.com>
Date: Fri, 9 Aug 2024 14:22:30 +0200
Subject: [PATCH 14/15] Raise warning in pathological case

---
 baybe/utils/sampling_algorithms.py      | 8 +++++++-
 tests/utils/test_sampling_algorithms.py | 8 ++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/baybe/utils/sampling_algorithms.py b/baybe/utils/sampling_algorithms.py
index 67b836755..4ea2ae929 100644
--- a/baybe/utils/sampling_algorithms.py
+++ b/baybe/utils/sampling_algorithms.py
@@ -1,5 +1,6 @@
 """A collection of point sampling algorithms."""
 
+import warnings
 from enum import Enum
 from typing import Literal
 
@@ -55,6 +56,11 @@ def farthest_point_sampling(
             f"total number of points provided ({n_points})."
         )
 
+    # Catch the pathological case upfront
+    if len(np.unique(points, axis=0)) == 1:
+        warnings.warn("All points are identical.", UserWarning)
+        return list(range(n_samples))
+
     # Sort the points to produce the same result regardless of the input order
     sort_idx = np.lexsort(tuple(points.T))
     points = points[sort_idx]
@@ -62,7 +68,7 @@ def farthest_point_sampling(
     # Pre-compute the pairwise distances between all points
     dist_matrix = pairwise_distances(points)
 
-    # Avoid wrong behavior pathological situation where all points are duplicates
+    # Avoid wrong behavior situations where all (remaining) points are duplicates
     np.fill_diagonal(dist_matrix, -np.inf)
 
     # Initialize the point selection
diff --git a/tests/utils/test_sampling_algorithms.py b/tests/utils/test_sampling_algorithms.py
index 27da3e4b4..3a46b96cd 100644
--- a/tests/utils/test_sampling_algorithms.py
+++ b/tests/utils/test_sampling_algorithms.py
@@ -95,3 +95,11 @@ def test_farthest_point_sampling(points: np.ndarray):
     # we test this as additional case
     sorting_idxs = farthest_point_sampling(points[permutation_idxs], 1)
     assert np.array_equal(target[[0]], points[permutation_idxs][sorting_idxs])
+
+
+def test_farthest_point_sampling_pathological_case():
+    """FPS executed on a degenerate point cloud raises a warning."""
+    points = np.ones((3, 3))
+    with pytest.warns(UserWarning, match="identical"):
+        selection = farthest_point_sampling(points, 2)
+    assert selection == [0, 1]

From ed960a468c91b6d92421f91ed5f8d416e60f8eb9 Mon Sep 17 00:00:00 2001
From: AdrianSosic <adrian.sosic@merckgroup.com>
Date: Fri, 9 Aug 2024 14:37:19 +0200
Subject: [PATCH 15/15] Add backticks to docstring

---
 baybe/utils/sampling_algorithms.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/baybe/utils/sampling_algorithms.py b/baybe/utils/sampling_algorithms.py
index 4ea2ae929..6adf5da23 100644
--- a/baybe/utils/sampling_algorithms.py
+++ b/baybe/utils/sampling_algorithms.py
@@ -23,8 +23,8 @@ def farthest_point_sampling(
 
     Args:
         points: The points that are available for selection, represented as a 2-D array
-            of shape (n, k), where n is the number of points and k is the dimensionality
-            of the points.
+            of shape ``(n, k)``, where ``n`` is the number of points and ``k`` is the
+            dimensionality of the points.
         n_samples: The total number of points to be selected.
         initialization: Determines how the first points are selected:
             * ``"farthest"``: The first two selected points are those with the