From 83b751df14347843def6a133c0f2588446a5c8fd Mon Sep 17 00:00:00 2001
From: Nicolas Kaenzig <nkaenzig@gmail.com>
Date: Mon, 7 Oct 2024 09:12:40 +0000
Subject: [PATCH 01/11] updated stratified split logic to accepts ratios that
 don't sum up to 1

---
 src/eva/core/data/splitting/stratified.py     | 18 ++++++----
 .../core/data/splitting/test_stratified.py    | 35 +++++++++++++++----
 2 files changed, 40 insertions(+), 13 deletions(-)

diff --git a/src/eva/core/data/splitting/stratified.py b/src/eva/core/data/splitting/stratified.py
index ad9377a7..8537e7a3 100644
--- a/src/eva/core/data/splitting/stratified.py
+++ b/src/eva/core/data/splitting/stratified.py
@@ -28,10 +28,11 @@ def stratified_split(
     """
     if len(samples) != len(targets):
         raise ValueError("The number of samples and targets must be equal.")
-    if train_ratio + val_ratio + (test_ratio or 0) != 1:
-        raise ValueError("The sum of the ratios must be equal to 1.")
+    if train_ratio + val_ratio + (test_ratio or 0) > 1.0:
+        raise ValueError("The sum of the ratios must be lower or equal to 1.")
 
-    np.random.seed(seed)
+    use_all_samples = train_ratio + val_ratio + test_ratio == 1
+    random_generator = np.random.default_rng(seed)
     unique_classes, y_indices = np.unique(targets, return_inverse=True)
     n_classes = unique_classes.shape[0]
 
@@ -39,18 +40,23 @@ def stratified_split(
 
     for c in range(n_classes):
         class_indices = np.where(y_indices == c)[0]
-        np.random.shuffle(class_indices)
+        random_generator.shuffle(class_indices)
 
         n_train = int(np.floor(train_ratio * len(class_indices))) or 1
         n_val = (
             len(class_indices) - n_train
-            if test_ratio == 0.0
+            if test_ratio == 0.0 and use_all_samples
             else int(np.floor(val_ratio * len(class_indices))) or 1
         )
 
         train_indices.extend(class_indices[:n_train])
         val_indices.extend(class_indices[n_train : n_train + n_val])
         if test_ratio > 0.0:
-            test_indices.extend(class_indices[n_train + n_val :])
+            n_test = (
+                len(class_indices) - n_train - n_val
+                if use_all_samples
+                else int(np.floor(test_ratio * len(class_indices))) or 1
+            )
+            test_indices.extend(class_indices[n_train + n_val : n_train + n_val + n_test])
 
     return train_indices, val_indices, test_indices or None
diff --git a/tests/eva/core/data/splitting/test_stratified.py b/tests/eva/core/data/splitting/test_stratified.py
index 2b65ccd8..ef3857fb 100644
--- a/tests/eva/core/data/splitting/test_stratified.py
+++ b/tests/eva/core/data/splitting/test_stratified.py
@@ -1,5 +1,7 @@
 """Tests for the stratified split function."""
 
+from typing import List
+
 import pytest
 
 from eva.core.data import splitting
@@ -12,10 +14,11 @@
         ([0] * 50 + [1] * 50, 0.7, 0.15, 0.15),
         ([0] * 30 + [1] * 70, 0.8, 0.2, 0.0),
         ([0] * 30 + [1] * 70, 0.7, 0.15, 0.15),
+        ([0] * 30 + [1] * 70, 0.2, 0.1, 0.15),
     ],
 )
 def test_stratification(
-    targets: list[int], train_ratio: float, val_ratio: float, test_ratio: float
+    targets: List[int], train_ratio: float, val_ratio: float, test_ratio: float
 ):
     """Tests if the stratified split maintains the class proportions."""
     samples = list(range(len(targets)))
@@ -31,21 +34,22 @@ def test_stratification(
         assert train_classes.count(c) == pytest.approx(expected_train_proportion, abs=1)
         assert val_classes.count(c) == pytest.approx(expected_val_proportion, abs=1)
 
-    assert len(train_indices) + len(val_indices) + len(test_indices or []) == len(samples)
+    if train_ratio + val_ratio + test_ratio == 1:
+        assert len(train_indices) + len(val_indices) + len(test_indices or []) == len(samples)
 
 
-@pytest.mark.parametrize("train_ratio, val_ratio, test_ratio", [(0.6, 0.3, 0.0), (0.6, 0.4, 0.3)])
+@pytest.mark.parametrize("train_ratio, val_ratio, test_ratio", [(0.6, 0.5, 0.0), (0.6, 0.0, 0.7)])
 def test_invalid_ratio_sums(train_ratio: float, val_ratio: float, test_ratio: float):
     """Tests if the function raises an error when the ratios do not sum to 1."""
     samples = list(range(100))
     targets = [0] * 50 + [1] * 50
-    expected_error = "The sum of the ratios must be equal to 1."
+    expected_error = "The sum of the ratios must be lower or equal to 1"
     with pytest.raises(ValueError, match=expected_error):
         splitting.stratified_split(samples, targets, train_ratio, val_ratio, test_ratio)
 
 
 @pytest.mark.parametrize("seed1, seed2", [(42, 43), (123, 124), (999, 1000)])
-def test_different_seeds_produce_different_outputs(seed1, seed2):
+def test_different_seeds_produce_different_outputs(seed1: int, seed2: int):
     """Tests if different seeds produce different train, validation, and test indices."""
     samples = list(range(100))
     targets = [0] * 50 + [1] * 50
@@ -57,8 +61,20 @@ def test_different_seeds_produce_different_outputs(seed1, seed2):
     assert test1 != test2, "Different seeds should produce different test indices"
 
 
-@pytest.mark.parametrize("seed", [42, 123, 999])
-def test_same_seed_produces_same_outputs(seed):
+@pytest.mark.parametrize(
+    "seed, train_expected_indices, val_expected_indices, test_expected_indices",
+    [
+        (42, [5, 25, 20, 49], [3, 44, 30, 10], [0, 12, 14, 48]),
+        (123, [15, 38, 41, 7], [29, 44, 46, 37], [16, 9, 12, 45]),
+        (999, [49, 10, 1, 25], [24, 39, 3, 37], [0, 28, 13, 16]),
+    ],
+)
+def test_same_seed_produces_same_outputs(
+    seed: int,
+    train_expected_indices: List[int],
+    val_expected_indices: List[int],
+    test_expected_indices: List[int],
+):
     """Tests if the same seed produces the same train, validation, and test indices."""
     samples = list(range(100))
     targets = [0] * 50 + [1] * 50
@@ -68,3 +84,8 @@ def test_same_seed_produces_same_outputs(seed):
     assert train1 == train2, "Same seed should produce the same train indices"
     assert val1 == val2, "Same seed should produce the same validation indices"
     assert test1 == test2, "Same seed should produce the same test indices"
+    assert isinstance(test1, list)
+
+    assert train1[: len(train_expected_indices)] == train_expected_indices, "Unexpected indices"
+    assert val1[: len(val_expected_indices)] == val_expected_indices, "Unexpected indices"
+    assert test1[: len(test_expected_indices)] == test_expected_indices, "Unexpected indices"

From 7704c3571a1dca7211c8933756e3d18954dc7b61 Mon Sep 17 00:00:00 2001
From: Nicolas Kaenzig <nkaenzig@gmail.com>
Date: Mon, 7 Oct 2024 09:13:18 +0000
Subject: [PATCH 02/11] added PANDATiny dataset class

---
 .../offline/classification/panda_tiny.yaml    | 133 ++++++++++++++++++
 src/eva/vision/data/datasets/__init__.py      |   2 +
 .../data/datasets/classification/__init__.py  |   3 +-
 .../data/datasets/classification/panda.py     |  13 ++
 4 files changed, 150 insertions(+), 1 deletion(-)
 create mode 100644 configs/vision/pathology/offline/classification/panda_tiny.yaml

diff --git a/configs/vision/pathology/offline/classification/panda_tiny.yaml b/configs/vision/pathology/offline/classification/panda_tiny.yaml
new file mode 100644
index 00000000..94db80d1
--- /dev/null
+++ b/configs/vision/pathology/offline/classification/panda_tiny.yaml
@@ -0,0 +1,133 @@
+---
+trainer:
+  class_path: eva.Trainer
+  init_args:
+    n_runs: &N_RUNS ${oc.env:N_RUNS, 5}
+    default_root_dir: &OUTPUT_ROOT ${oc.env:OUTPUT_ROOT, logs/${oc.env:MODEL_NAME, dino_vits16}/offline/panda}
+    max_epochs: &MAX_EPOCHS ${oc.env:MAX_EPOCHS, 49}
+    callbacks:
+      - class_path: eva.callbacks.ConfigurationLogger
+      - class_path: lightning.pytorch.callbacks.TQDMProgressBar
+        init_args:
+          refresh_rate: ${oc.env:TQDM_REFRESH_RATE, 1}
+      - class_path: lightning.pytorch.callbacks.LearningRateMonitor
+        init_args:
+          logging_interval: epoch
+      - class_path: lightning.pytorch.callbacks.ModelCheckpoint
+        init_args:
+          filename: best
+          save_last: true
+          save_top_k: 1
+          monitor: &MONITOR_METRIC ${oc.env:MONITOR_METRIC, val/MulticlassAccuracy}
+          mode: &MONITOR_METRIC_MODE ${oc.env:MONITOR_METRIC_MODE, max}
+      - class_path: lightning.pytorch.callbacks.EarlyStopping
+        init_args:
+          min_delta: 0
+          patience: ${oc.env:PATIENCE, 8}
+          monitor: *MONITOR_METRIC
+          mode: *MONITOR_METRIC_MODE
+      - class_path: eva.callbacks.ClassificationEmbeddingsWriter
+        init_args:
+          output_dir: &DATASET_EMBEDDINGS_ROOT ${oc.env:EMBEDDINGS_ROOT, ./data/embeddings/${oc.env:MODEL_NAME, dino_vits16}/panda}
+          dataloader_idx_map:
+            0: train
+            1: val
+            2: test
+          metadata_keys: ["wsi_id"]
+          backbone:
+            class_path: eva.vision.models.ModelFromRegistry
+            init_args:
+              model_name: ${oc.env:MODEL_NAME, universal/vit_small_patch16_224_dino}
+              model_extra_kwargs: ${oc.env:MODEL_EXTRA_KWARGS, null}
+          overwrite: false
+    logger:
+      - class_path: lightning.pytorch.loggers.TensorBoardLogger
+        init_args:
+          save_dir: *OUTPUT_ROOT
+          name: ""
+model:
+  class_path: eva.HeadModule
+  init_args:
+    head:
+      class_path: eva.vision.models.networks.ABMIL
+      init_args:
+        input_size: ${oc.env:IN_FEATURES, 384}
+        output_size: &NUM_CLASSES 6
+        projected_input_size: 128
+    criterion: torch.nn.CrossEntropyLoss
+    optimizer:
+      class_path: torch.optim.AdamW
+      init_args:
+        lr: ${oc.env:LR_VALUE, 0.001}
+        betas: [0.9, 0.999]
+    lr_scheduler:
+      class_path: torch.optim.lr_scheduler.CosineAnnealingLR
+      init_args:
+        T_max: *MAX_EPOCHS
+        eta_min: 0.0
+    metrics:
+      common:
+        - class_path: eva.metrics.AverageLoss
+        - class_path: eva.metrics.MulticlassClassificationMetrics
+          init_args:
+            num_classes: *NUM_CLASSES
+data:
+  class_path: eva.DataModule
+  init_args:
+    datasets:
+      train:
+        class_path: eva.datasets.MultiEmbeddingsClassificationDataset
+        init_args: &DATASET_ARGS
+          root: *DATASET_EMBEDDINGS_ROOT
+          manifest_file: manifest.csv
+          split: train
+          embeddings_transforms:
+            class_path: eva.core.data.transforms.Pad2DTensor
+            init_args:
+              pad_size: &N_PATCHES 200
+      val:
+        class_path: eva.datasets.MultiEmbeddingsClassificationDataset
+        init_args:
+          <<: *DATASET_ARGS
+          split: val
+      test:
+        class_path: eva.datasets.MultiEmbeddingsClassificationDataset
+        init_args:
+          <<: *DATASET_ARGS
+          split: test
+      predict:
+        - class_path: eva.vision.datasets.PANDATiny
+          init_args: &PREDICT_DATASET_ARGS
+            root: ${oc.env:DATA_ROOT, ./data/panda/prostate-cancer-grade-assessment}
+            sampler:
+              class_path: eva.vision.data.wsi.patching.samplers.ForegroundGridSampler
+              init_args:
+                max_samples: *N_PATCHES
+            width: 224
+            height: 224
+            target_mpp: 0.5
+            split: train
+            image_transforms:
+              class_path: eva.vision.data.transforms.common.ResizeAndCrop
+              init_args:
+                size: ${oc.env:RESIZE_DIM, 224}
+                mean: ${oc.env:NORMALIZE_MEAN, [0.485, 0.456, 0.406]} 
+                std: ${oc.env:NORMALIZE_STD, [0.229, 0.224, 0.225]}
+        - class_path: eva.vision.datasets.PANDATiny
+          init_args:
+            <<: *PREDICT_DATASET_ARGS
+            split: val
+        - class_path: eva.vision.datasets.PANDATiny
+          init_args:
+            <<: *PREDICT_DATASET_ARGS
+            split: test
+    dataloaders:
+      train:
+        batch_size: &BATCH_SIZE ${oc.env:BATCH_SIZE, 32}
+        shuffle: true
+      val:
+        batch_size: *BATCH_SIZE
+      test:
+        batch_size: *BATCH_SIZE
+      predict:
+        batch_size: &PREDICT_BATCH_SIZE ${oc.env:PREDICT_BATCH_SIZE, 64}
diff --git a/src/eva/vision/data/datasets/__init__.py b/src/eva/vision/data/datasets/__init__.py
index bec918af..a70a5477 100644
--- a/src/eva/vision/data/datasets/__init__.py
+++ b/src/eva/vision/data/datasets/__init__.py
@@ -6,6 +6,7 @@
     MHIST,
     PANDA,
     Camelyon16,
+    PANDATiny,
     PatchCamelyon,
     WsiClassificationDataset,
 )
@@ -28,6 +29,7 @@
     "CRC",
     "MHIST",
     "PANDA",
+    "PANDATiny",
     "Camelyon16",
     "PatchCamelyon",
     "WsiClassificationDataset",
diff --git a/src/eva/vision/data/datasets/classification/__init__.py b/src/eva/vision/data/datasets/classification/__init__.py
index c9daabbe..76b9241b 100644
--- a/src/eva/vision/data/datasets/classification/__init__.py
+++ b/src/eva/vision/data/datasets/classification/__init__.py
@@ -4,7 +4,7 @@
 from eva.vision.data.datasets.classification.camelyon16 import Camelyon16
 from eva.vision.data.datasets.classification.crc import CRC
 from eva.vision.data.datasets.classification.mhist import MHIST
-from eva.vision.data.datasets.classification.panda import PANDA
+from eva.vision.data.datasets.classification.panda import PANDA, PANDATiny
 from eva.vision.data.datasets.classification.patch_camelyon import PatchCamelyon
 from eva.vision.data.datasets.classification.wsi import WsiClassificationDataset
 
@@ -15,5 +15,6 @@
     "PatchCamelyon",
     "WsiClassificationDataset",
     "PANDA",
+    "PANDATiny",
     "Camelyon16",
 ]
diff --git a/src/eva/vision/data/datasets/classification/panda.py b/src/eva/vision/data/datasets/classification/panda.py
index a7e180f6..fa5ddc74 100644
--- a/src/eva/vision/data/datasets/classification/panda.py
+++ b/src/eva/vision/data/datasets/classification/panda.py
@@ -182,3 +182,16 @@ def _get_target_from_path(self, file_path: str) -> int:
 
     def _get_id_from_path(self, file_path: str) -> str:
         return os.path.basename(file_path).replace(".tiff", "")
+
+
+class PANDATiny(PANDA):
+    """Tiny version of the PANDA dataset for quicker benchmarking."""
+
+    _train_split_ratio: float = 0.1
+    """Train split ratio."""
+
+    _val_split_ratio: float = 0.05
+    """Validation split ratio."""
+
+    _test_split_ratio: float = 0.05
+    """Test split ratio."""

From 8edee37ecabb810f1d5569f6e34804e40a12d7e1 Mon Sep 17 00:00:00 2001
From: Nicolas Kaenzig <nkaenzig@gmail.com>
Date: Mon, 7 Oct 2024 11:26:07 +0000
Subject: [PATCH 03/11] updated random split logic & test

---
 .../advanced/replicate_evaluations.md         |  6 ++---
 src/eva/core/data/splitting/random.py         | 11 +++++---
 tests/eva/core/data/splitting/test_random.py  | 26 ++++++++++++++++---
 3 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/docs/user-guide/advanced/replicate_evaluations.md b/docs/user-guide/advanced/replicate_evaluations.md
index eb3bcb58..6a47135a 100644
--- a/docs/user-guide/advanced/replicate_evaluations.md
+++ b/docs/user-guide/advanced/replicate_evaluations.md
@@ -144,10 +144,10 @@ were released on [HuggingFace](https://huggingface.co/bioptimus/H-optimus-0).
 
 ```
 MODEL_NAME=pathology/bioptimus_h_optimus_0 \
-NORMALIZE_MEAN=[0.707223, 0.578729, 0.703617] \
-NORMALIZE_STD=[0.211883, 0.230117, 0.177517] \
+NORMALIZE_MEAN=[0.707223,0.578729,0.703617] \
+NORMALIZE_STD=[0.211883,0.230117,0.177517] \
 IN_FEATURES=1024 \
-eva predict_fit --config configs/vision/pathology/offline/<task>.yaml
+eva predict_fit --config configs/vision/pathology/offline/panda_tiny.yaml
 ```
 
 
diff --git a/src/eva/core/data/splitting/random.py b/src/eva/core/data/splitting/random.py
index 274a1412..922716bb 100644
--- a/src/eva/core/data/splitting/random.py
+++ b/src/eva/core/data/splitting/random.py
@@ -24,12 +24,15 @@ def random_split(
     Returns:
         The indices of the train, validation, and test sets as lists.
     """
-    if train_ratio + val_ratio + (test_ratio or 0) != 1:
-        raise ValueError("The sum of the ratios must be equal to 1.")
+    total_ratio = train_ratio + val_ratio + test_ratio
+    if total_ratio > 1.0:
+        raise ValueError("The sum of the ratios must be lower or equal to 1.")
 
     random_generator = np.random.default_rng(seed)
-    n_samples = len(samples)
-    indices = random_generator.permutation(n_samples)
+    n_samples = int(total_ratio*len(samples))
+    indices = random_generator.permutation(len(samples))[:n_samples]
+
+    n_samples = int(total_ratio*len(samples))
 
     n_train = int(np.floor(train_ratio * n_samples))
     n_val = n_samples - n_train if test_ratio == 0.0 else int(np.floor(val_ratio * n_samples)) or 1
diff --git a/tests/eva/core/data/splitting/test_random.py b/tests/eva/core/data/splitting/test_random.py
index e31396f3..21e57e9b 100644
--- a/tests/eva/core/data/splitting/test_random.py
+++ b/tests/eva/core/data/splitting/test_random.py
@@ -1,6 +1,7 @@
 """Tests for the random split function."""
 
 import pytest
+from typing import List
 
 from eva.core.data import splitting
 
@@ -32,11 +33,11 @@ def test_split_ratios(n_samples: int, train_ratio: float, val_ratio: float, test
     assert len(train_indices) + len(val_indices) + len(test_indices or []) == n_samples
 
 
-@pytest.mark.parametrize("train_ratio, val_ratio, test_ratio", [(0.6, 0.3, 0.0), (0.6, 0.4, 0.3)])
+@pytest.mark.parametrize("train_ratio, val_ratio, test_ratio", [(0.6, 0.7, 0.0), (0.6, 0.4, 0.3)])
 def test_invalid_ratio_sums(train_ratio: float, val_ratio: float, test_ratio: float):
     """Tests if the function raises an error when the ratios do not sum to 1."""
     samples = list(range(100))
-    expected_error = "The sum of the ratios must be equal to 1."
+    expected_error = "The sum of the ratios must be lower or equal to 1"
     with pytest.raises(ValueError, match=expected_error):
         splitting.random_split(samples, train_ratio, val_ratio, test_ratio)
 
@@ -53,8 +54,20 @@ def test_different_seeds_produce_different_outputs(seed1, seed2):
     assert test1 != test2, "Different seeds should produce different test indices"
 
 
-@pytest.mark.parametrize("seed", [42, 123, 999])
-def test_same_seed_produces_same_outputs(seed):
+@pytest.mark.parametrize(
+    "seed, train_expected_indices, val_expected_indices, test_expected_indices",
+    [
+        (42, [59, 21, 56, 18], [69, 15, 48, 55], [49, 6, 90, 11]),
+        (123, [21, 71, 92, 23], [89, 14, 64, 4], [45, 75, 62, 6]),
+        (999, [47, 42, 57, 50], [41, 3, 81, 61], [45, 6, 56, 67]),
+    ],
+)
+def test_same_seed_produces_same_outputs(
+    seed: int,
+    train_expected_indices: List[int],
+    val_expected_indices: List[int],
+    test_expected_indices: List[int],
+):
     """Tests if the same seed produces the same train, validation, and test indices."""
     samples = list(range(100))
     train1, val1, test1 = splitting.random_split(samples, 0.6, 0.2, 0.2, seed=seed)
@@ -63,6 +76,11 @@ def test_same_seed_produces_same_outputs(seed):
     assert train1 == train2, "Same seed should produce the same train indices"
     assert val1 == val2, "Same seed should produce the same validation indices"
     assert test1 == test2, "Same seed should produce the same test indices"
+    assert isinstance(test1, list)
+
+    assert train1[: len(train_expected_indices)] == train_expected_indices, "Unexpected indices"
+    assert val1[: len(val_expected_indices)] == val_expected_indices, "Unexpected indices"
+    assert test1[: len(test_expected_indices)] == test_expected_indices, "Unexpected indices"
 
 
 def test_no_test_set():

From ad4b53987f6aa2496d13442aca2efd1d82ba6f4b Mon Sep 17 00:00:00 2001
From: Nicolas Kaenzig <nkaenzig@gmail.com>
Date: Mon, 7 Oct 2024 12:15:06 +0000
Subject: [PATCH 04/11] fix linting

---
 src/eva/core/data/splitting/random.py        | 4 +---
 tests/eva/core/data/splitting/test_random.py | 3 ++-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/eva/core/data/splitting/random.py b/src/eva/core/data/splitting/random.py
index 922716bb..101b9315 100644
--- a/src/eva/core/data/splitting/random.py
+++ b/src/eva/core/data/splitting/random.py
@@ -29,11 +29,9 @@ def random_split(
         raise ValueError("The sum of the ratios must be lower or equal to 1.")
 
     random_generator = np.random.default_rng(seed)
-    n_samples = int(total_ratio*len(samples))
+    n_samples = int(total_ratio * len(samples))
     indices = random_generator.permutation(len(samples))[:n_samples]
 
-    n_samples = int(total_ratio*len(samples))
-
     n_train = int(np.floor(train_ratio * n_samples))
     n_val = n_samples - n_train if test_ratio == 0.0 else int(np.floor(val_ratio * n_samples)) or 1
 
diff --git a/tests/eva/core/data/splitting/test_random.py b/tests/eva/core/data/splitting/test_random.py
index 21e57e9b..21a376c7 100644
--- a/tests/eva/core/data/splitting/test_random.py
+++ b/tests/eva/core/data/splitting/test_random.py
@@ -1,8 +1,9 @@
 """Tests for the random split function."""
 
-import pytest
 from typing import List
 
+import pytest
+
 from eva.core.data import splitting
 
 

From 2403486fb7070664542c0528643a24794db826e0 Mon Sep 17 00:00:00 2001
From: Nicolas Kaenzig <nkaenzig@gmail.com>
Date: Mon, 7 Oct 2024 13:19:40 +0000
Subject: [PATCH 05/11] fixed docs

---
 docs/user-guide/advanced/replicate_evaluations.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/user-guide/advanced/replicate_evaluations.md b/docs/user-guide/advanced/replicate_evaluations.md
index 6a47135a..b080b15e 100644
--- a/docs/user-guide/advanced/replicate_evaluations.md
+++ b/docs/user-guide/advanced/replicate_evaluations.md
@@ -147,7 +147,7 @@ MODEL_NAME=pathology/bioptimus_h_optimus_0 \
 NORMALIZE_MEAN=[0.707223,0.578729,0.703617] \
 NORMALIZE_STD=[0.211883,0.230117,0.177517] \
 IN_FEATURES=1024 \
-eva predict_fit --config configs/vision/pathology/offline/panda_tiny.yaml
+eva predict_fit --config configs/vision/pathology/offline/<task>.yaml
 ```
 
 

From 09d4c08de220ac4e81972ae7e424180af85fa954 Mon Sep 17 00:00:00 2001
From: Nicolas Kaenzig <nkaenzig@gmail.com>
Date: Mon, 7 Oct 2024 13:30:21 +0000
Subject: [PATCH 06/11] renamed tiny to small

---
 .../classification/{panda_tiny.yaml => panda_small.yaml}    | 6 +++---
 src/eva/vision/data/datasets/__init__.py                    | 4 ++--
 src/eva/vision/data/datasets/classification/__init__.py     | 4 ++--
 src/eva/vision/data/datasets/classification/panda.py        | 2 +-
 4 files changed, 8 insertions(+), 8 deletions(-)
 rename configs/vision/pathology/offline/classification/{panda_tiny.yaml => panda_small.yaml} (96%)

diff --git a/configs/vision/pathology/offline/classification/panda_tiny.yaml b/configs/vision/pathology/offline/classification/panda_small.yaml
similarity index 96%
rename from configs/vision/pathology/offline/classification/panda_tiny.yaml
rename to configs/vision/pathology/offline/classification/panda_small.yaml
index 94db80d1..74a0e438 100644
--- a/configs/vision/pathology/offline/classification/panda_tiny.yaml
+++ b/configs/vision/pathology/offline/classification/panda_small.yaml
@@ -96,7 +96,7 @@ data:
           <<: *DATASET_ARGS
           split: test
       predict:
-        - class_path: eva.vision.datasets.PANDATiny
+        - class_path: eva.vision.datasets.PANDASmall
           init_args: &PREDICT_DATASET_ARGS
             root: ${oc.env:DATA_ROOT, ./data/panda/prostate-cancer-grade-assessment}
             sampler:
@@ -113,11 +113,11 @@ data:
                 size: ${oc.env:RESIZE_DIM, 224}
                 mean: ${oc.env:NORMALIZE_MEAN, [0.485, 0.456, 0.406]} 
                 std: ${oc.env:NORMALIZE_STD, [0.229, 0.224, 0.225]}
-        - class_path: eva.vision.datasets.PANDATiny
+        - class_path: eva.vision.datasets.PANDASmall
           init_args:
             <<: *PREDICT_DATASET_ARGS
             split: val
-        - class_path: eva.vision.datasets.PANDATiny
+        - class_path: eva.vision.datasets.PANDASmall
           init_args:
             <<: *PREDICT_DATASET_ARGS
             split: test
diff --git a/src/eva/vision/data/datasets/__init__.py b/src/eva/vision/data/datasets/__init__.py
index a70a5477..5c31edc8 100644
--- a/src/eva/vision/data/datasets/__init__.py
+++ b/src/eva/vision/data/datasets/__init__.py
@@ -6,7 +6,7 @@
     MHIST,
     PANDA,
     Camelyon16,
-    PANDATiny,
+    PANDASmall,
     PatchCamelyon,
     WsiClassificationDataset,
 )
@@ -29,7 +29,7 @@
     "CRC",
     "MHIST",
     "PANDA",
-    "PANDATiny",
+    "PANDASmall",
     "Camelyon16",
     "PatchCamelyon",
     "WsiClassificationDataset",
diff --git a/src/eva/vision/data/datasets/classification/__init__.py b/src/eva/vision/data/datasets/classification/__init__.py
index 76b9241b..33b4c775 100644
--- a/src/eva/vision/data/datasets/classification/__init__.py
+++ b/src/eva/vision/data/datasets/classification/__init__.py
@@ -4,7 +4,7 @@
 from eva.vision.data.datasets.classification.camelyon16 import Camelyon16
 from eva.vision.data.datasets.classification.crc import CRC
 from eva.vision.data.datasets.classification.mhist import MHIST
-from eva.vision.data.datasets.classification.panda import PANDA, PANDATiny
+from eva.vision.data.datasets.classification.panda import PANDA, PANDASmall
 from eva.vision.data.datasets.classification.patch_camelyon import PatchCamelyon
 from eva.vision.data.datasets.classification.wsi import WsiClassificationDataset
 
@@ -15,6 +15,6 @@
     "PatchCamelyon",
     "WsiClassificationDataset",
     "PANDA",
-    "PANDATiny",
+    "PANDASmall",
     "Camelyon16",
 ]
diff --git a/src/eva/vision/data/datasets/classification/panda.py b/src/eva/vision/data/datasets/classification/panda.py
index fa5ddc74..df65aede 100644
--- a/src/eva/vision/data/datasets/classification/panda.py
+++ b/src/eva/vision/data/datasets/classification/panda.py
@@ -184,7 +184,7 @@ def _get_id_from_path(self, file_path: str) -> str:
         return os.path.basename(file_path).replace(".tiff", "")
 
 
-class PANDATiny(PANDA):
+class PANDASmall(PANDA):
     """Tiny version of the PANDA dataset for quicker benchmarking."""
 
     _train_split_ratio: float = 0.1

From e36f0053f2c200a524071eff18deee3c9b3d04b0 Mon Sep 17 00:00:00 2001
From: Nicolas Kaenzig <nkaenzig@gmail.com>
Date: Mon, 7 Oct 2024 13:30:56 +0000
Subject: [PATCH 07/11] tiny -> small

---
 src/eva/vision/data/datasets/classification/panda.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/eva/vision/data/datasets/classification/panda.py b/src/eva/vision/data/datasets/classification/panda.py
index df65aede..ffa00ab3 100644
--- a/src/eva/vision/data/datasets/classification/panda.py
+++ b/src/eva/vision/data/datasets/classification/panda.py
@@ -185,7 +185,7 @@ def _get_id_from_path(self, file_path: str) -> str:
 
 
 class PANDASmall(PANDA):
-    """Tiny version of the PANDA dataset for quicker benchmarking."""
+    """Small version of the PANDA dataset for quicker benchmarking."""
 
     _train_split_ratio: float = 0.1
     """Train split ratio."""

From f4c6510d8eb6afd8ed11c365b7f0ddb381611997 Mon Sep 17 00:00:00 2001
From: Nicolas Kaenzig <nkaenzig@gmail.com>
Date: Mon, 7 Oct 2024 13:54:32 +0000
Subject: [PATCH 08/11] use local random generator in samplers

---
 src/eva/vision/data/wsi/patching/samplers/_utils.py | 10 ++--------
 src/eva/vision/data/wsi/patching/samplers/random.py |  6 ++++--
 2 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/src/eva/vision/data/wsi/patching/samplers/_utils.py b/src/eva/vision/data/wsi/patching/samplers/_utils.py
index af8418df..f1fa3b7e 100644
--- a/src/eva/vision/data/wsi/patching/samplers/_utils.py
+++ b/src/eva/vision/data/wsi/patching/samplers/_utils.py
@@ -1,14 +1,8 @@
-import random
 from typing import Tuple
 
 import numpy as np
 
 
-def set_seed(seed: int) -> None:
-    random.seed(seed)
-    np.random.seed(seed)
-
-
 def get_grid_coords_and_indices(
     layer_shape: Tuple[int, int],
     width: int,
@@ -33,8 +27,8 @@ def get_grid_coords_and_indices(
 
     indices = list(range(len(x_y)))
     if shuffle:
-        set_seed(seed)
-        np.random.shuffle(indices)
+        random_generator = np.random.default_rng(seed)
+        random_generator.shuffle(indices)
     return x_y, indices
 
 
diff --git a/src/eva/vision/data/wsi/patching/samplers/random.py b/src/eva/vision/data/wsi/patching/samplers/random.py
index 09ae5729..b37a3a3e 100644
--- a/src/eva/vision/data/wsi/patching/samplers/random.py
+++ b/src/eva/vision/data/wsi/patching/samplers/random.py
@@ -18,6 +18,7 @@ def __init__(self, n_samples: int = 1, seed: int = 42):
         """Initializes the sampler."""
         self.seed = seed
         self.n_samples = n_samples
+        self.random_generator = random.Random(seed)  # nosec
 
     def sample(
         self,
@@ -33,9 +34,10 @@ def sample(
             layer_shape: The shape of the layer.
         """
         _utils.validate_dimensions(width, height, layer_shape)
-        _utils.set_seed(self.seed)
 
         x_max, y_max = layer_shape[0], layer_shape[1]
         for _ in range(self.n_samples):
-            x, y = random.randint(0, x_max - width), random.randint(0, y_max - height)  # nosec
+            x, y = self.random_generator.randint(0, x_max - width), self.random_generator.randint(
+                0, y_max - height
+            )
             yield x, y

From 0532e3a33a76137ad6e2776ded85178e6978880a Mon Sep 17 00:00:00 2001
From: Nicolas Kaenzig <nkaenzig@gmail.com>
Date: Mon, 7 Oct 2024 14:10:27 +0000
Subject: [PATCH 09/11] updated grid sampler unit tests

---
 .../patching/samplers/test_foreground_grid.py | 20 +++++++++-----
 .../data/wsi/patching/samplers/test_grid.py   | 26 +++++++++++++++++--
 2 files changed, 37 insertions(+), 9 deletions(-)

diff --git a/tests/eva/vision/data/wsi/patching/samplers/test_foreground_grid.py b/tests/eva/vision/data/wsi/patching/samplers/test_foreground_grid.py
index 9a5510ac..c87ee8f1 100644
--- a/tests/eva/vision/data/wsi/patching/samplers/test_foreground_grid.py
+++ b/tests/eva/vision/data/wsi/patching/samplers/test_foreground_grid.py
@@ -38,11 +38,17 @@ def test_length(min_foreground_ratio: float, max_samples: int, expected_n_sample
     assert len(x_y) == expected_n_samples
 
 
-@pytest.mark.parametrize("n_samples, seed", [(10, 8), (22, 42)])
-def test_same_seed(n_samples: int, seed: int) -> None:
+@pytest.mark.parametrize(
+    "max_samples, seed, x_y_expected",
+    [
+        (7, 42, [(12, 0), (24, 12), (12, 12), (0, 12), (12, 24)]),
+        (10, 8, [(12, 0), (12, 24), (24, 12), (0, 12), (12, 12)]),
+    ],
+)
+def test_same_seed(max_samples: int, seed: int, x_y_expected: list) -> None:
     """Tests if the sampler returns the same samples for the same seed."""
     sampler = samplers.ForegroundGridSampler(
-        max_samples=n_samples, seed=seed, min_foreground_ratio=0.5
+        max_samples=max_samples, seed=seed, min_foreground_ratio=0.5
     )
 
     x_y_1 = list(sampler.sample(**TEST_ARGS))
@@ -51,11 +57,11 @@ def test_same_seed(n_samples: int, seed: int) -> None:
     assert x_y_1 == x_y_2
 
 
-@pytest.mark.parametrize("n_samples, seed_1, seed_2", [(3, 1, 2), (5, 3, 4)])
-def test_different_seed(n_samples: int, seed_1: int, seed_2: int) -> None:
+@pytest.mark.parametrize("max_samples, seed_1, seed_2", [(3, 1, 2), (5, 3, 4)])
+def test_different_seed(max_samples: int, seed_1: int, seed_2: int) -> None:
     """Tests if the sampler returns different samples for different seeds."""
-    sampler_1 = samplers.ForegroundGridSampler(max_samples=n_samples, seed=seed_1)
-    sampler_2 = samplers.ForegroundGridSampler(max_samples=n_samples, seed=seed_2)
+    sampler_1 = samplers.ForegroundGridSampler(max_samples=max_samples, seed=seed_1)
+    sampler_2 = samplers.ForegroundGridSampler(max_samples=max_samples, seed=seed_2)
 
     x_y_1 = list(sampler_1.sample(**TEST_ARGS))
     x_y_2 = list(sampler_2.sample(**TEST_ARGS))
diff --git a/tests/eva/vision/data/wsi/patching/samplers/test_grid.py b/tests/eva/vision/data/wsi/patching/samplers/test_grid.py
index efeecf54..d88ea0e3 100644
--- a/tests/eva/vision/data/wsi/patching/samplers/test_grid.py
+++ b/tests/eva/vision/data/wsi/patching/samplers/test_grid.py
@@ -19,8 +19,29 @@ def test_length(max_samples: int, expected_n_samples: int) -> None:
     assert len(x_y) == expected_n_samples
 
 
-@pytest.mark.parametrize("max_samples, seed", [(10, 8), (22, 42)])
-def test_same_seed(max_samples: int, seed: int) -> None:
+@pytest.mark.parametrize(
+    "max_samples, seed, x_y_expected",
+    [
+        (7, 42, [(50, 90), (20, 10), (50, 60), (10, 80), (30, 30), (40, 20), (50, 0)]),
+        (
+            10,
+            8,
+            [
+                (10, 50),
+                (20, 60),
+                (40, 20),
+                (90, 30),
+                (10, 60),
+                (0, 40),
+                (90, 40),
+                (70, 20),
+                (80, 0),
+                (60, 30),
+            ],
+        ),
+    ],
+)
+def test_same_seed(max_samples: int, seed: int, x_y_expected: list) -> None:
     """Tests if the sampler returns the same samples for the same seed."""
     sampler = samplers.GridSampler(max_samples=max_samples, seed=seed)
 
@@ -28,6 +49,7 @@ def test_same_seed(max_samples: int, seed: int) -> None:
     x_y_2 = list(sampler.sample(**TEST_ARGS))
 
     assert x_y_1 == x_y_2
+    assert x_y_1 == x_y_expected
 
 
 @pytest.mark.parametrize("max_samples, seed_1, seed_2", [(3, 1, 2), (5, 3, 4)])

From adbd2498815be7bd11448df5523469c41ce824a4 Mon Sep 17 00:00:00 2001
From: Nicolas Kaenzig <nkaenzig@gmail.com>
Date: Mon, 7 Oct 2024 14:13:49 +0000
Subject: [PATCH 10/11] fixed sampler unit tests

---
 .../data/wsi/patching/samplers/test_grid.py   |  7 ++--
 .../data/wsi/patching/samplers/test_random.py | 32 ++++++++++++++++---
 2 files changed, 31 insertions(+), 8 deletions(-)

diff --git a/tests/eva/vision/data/wsi/patching/samplers/test_grid.py b/tests/eva/vision/data/wsi/patching/samplers/test_grid.py
index d88ea0e3..42cbdbf8 100644
--- a/tests/eva/vision/data/wsi/patching/samplers/test_grid.py
+++ b/tests/eva/vision/data/wsi/patching/samplers/test_grid.py
@@ -43,10 +43,11 @@ def test_length(max_samples: int, expected_n_samples: int) -> None:
 )
 def test_same_seed(max_samples: int, seed: int, x_y_expected: list) -> None:
     """Tests if the sampler returns the same samples for the same seed."""
-    sampler = samplers.GridSampler(max_samples=max_samples, seed=seed)
+    sampler_1 = samplers.GridSampler(max_samples=max_samples, seed=seed)
+    sampler_2 = samplers.GridSampler(max_samples=max_samples, seed=seed)
 
-    x_y_1 = list(sampler.sample(**TEST_ARGS))
-    x_y_2 = list(sampler.sample(**TEST_ARGS))
+    x_y_1 = list(sampler_1.sample(**TEST_ARGS))
+    x_y_2 = list(sampler_2.sample(**TEST_ARGS))
 
     assert x_y_1 == x_y_2
     assert x_y_1 == x_y_expected
diff --git a/tests/eva/vision/data/wsi/patching/samplers/test_random.py b/tests/eva/vision/data/wsi/patching/samplers/test_random.py
index 85110a6c..09c1d279 100644
--- a/tests/eva/vision/data/wsi/patching/samplers/test_random.py
+++ b/tests/eva/vision/data/wsi/patching/samplers/test_random.py
@@ -17,13 +17,35 @@ def test_length(n_samples: int) -> None:
     assert len(x_y) == n_samples
 
 
-@pytest.mark.parametrize("n_samples, seed", [(10, 8), (22, 42)])
-def test_same_seed(n_samples: int, seed: int) -> None:
+@pytest.mark.parametrize(
+    "n_samples, seed, x_y_expected",
+    [
+        (7, 42, [(81, 14), (3, 35), (31, 28), (17, 13), (86, 69), (11, 75), (54, 4)]),
+        (
+            10,
+            8,
+            [
+                (29, 47),
+                (48, 16),
+                (24, 90),
+                (5, 10),
+                (17, 31),
+                (64, 26),
+                (51, 82),
+                (3, 58),
+                (62, 58),
+                (49, 63),
+            ],
+        ),
+    ],
+)
+def test_same_seed(n_samples: int, seed: int, x_y_expected: int) -> None:
     """Tests if the sampler returns the same samples for the same seed."""
-    sampler = samplers.RandomSampler(n_samples=n_samples, seed=seed)
+    sampler_1 = samplers.RandomSampler(n_samples=n_samples, seed=seed)
+    sampler_2 = samplers.RandomSampler(n_samples=n_samples, seed=seed)
 
-    x_y_1 = list(sampler.sample(**TEST_ARGS))
-    x_y_2 = list(sampler.sample(**TEST_ARGS))
+    x_y_1 = list(sampler_1.sample(**TEST_ARGS))
+    x_y_2 = list(sampler_2.sample(**TEST_ARGS))
 
     assert x_y_1 == x_y_2
 

From e2762e0c5671a8eaaf4d51bdf00c84376387b66f Mon Sep 17 00:00:00 2001
From: Nicolas Kaenzig <nkaenzig@gmail.com>
Date: Mon, 7 Oct 2024 14:30:11 +0000
Subject: [PATCH 11/11] updated panda unittest

---
 .../data/datasets/classification/test_panda.py   | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/tests/eva/vision/data/datasets/classification/test_panda.py b/tests/eva/vision/data/datasets/classification/test_panda.py
index 6b901344..783cc341 100644
--- a/tests/eva/vision/data/datasets/classification/test_panda.py
+++ b/tests/eva/vision/data/datasets/classification/test_panda.py
@@ -58,10 +58,16 @@ def test_filenames(root: str, split: Literal["train", "val", "test"]):
     assert len(filenames) == len(dataset.datasets)
 
 
-def test_same_split_same_seed(root: str):
+def test_same_split_same_seed(root: str, seed: int = 42):
     """Test that the generated split is deterministic when using the same seed."""
-    dataset1 = datasets.PANDA(root=root, split="train", seed=42, **DEFAULT_ARGS)
-    dataset2 = datasets.PANDA(root=root, split="train", seed=42, **DEFAULT_ARGS)
+    sampler1 = samplers.GridSampler(seed=seed)
+    sampler2 = samplers.GridSampler(seed=seed)
+    dataset1 = datasets.PANDA(
+        root=root, split="train", seed=seed, **(DEFAULT_ARGS | {"sampler": sampler1})
+    )
+    dataset2 = datasets.PANDA(
+        root=root, split="train", seed=seed, **(DEFAULT_ARGS | {"sampler": sampler2})
+    )
     _setup_datasets(dataset1, dataset2)
 
     assert len(dataset1) == len(dataset2)
@@ -70,6 +76,10 @@ def test_same_split_same_seed(root: str):
     for i in range(len(dataset1)):
         assert np.allclose(dataset1[i][1], dataset2[i][1])
 
+    expected_coords = [[(96, 160), (160, 64), (64, 64), (96, 0), (0, 224)]] * len(dataset1.datasets)
+    for i in range(len(dataset1.datasets)):
+        assert dataset1.datasets[i]._coords.x_y[: len(expected_coords[i])] == expected_coords[i]
+
 
 def test_different_seed_different_split(root: str):
     """Test that the generated split is different when using a different seed."""