From 84d93d7fb03a4986fda507ec1aeb6814efd76a0e Mon Sep 17 00:00:00 2001 From: Shion Date: Fri, 1 Sep 2023 23:47:57 +0900 Subject: [PATCH 01/19] functional and metric class initial commit --- src/torchmetrics/clustering/__init__.py | 2 + src/torchmetrics/clustering/dunn_index.py | 120 ++++++++++++++++++ .../functional/clustering/__init__.py | 2 + .../functional/clustering/dunn_index.py | 83 ++++++++++++ .../functional/clustering/utils.py | 26 ++-- 5 files changed, 221 insertions(+), 12 deletions(-) create mode 100644 src/torchmetrics/clustering/dunn_index.py create mode 100644 src/torchmetrics/functional/clustering/dunn_index.py diff --git a/src/torchmetrics/clustering/__init__.py b/src/torchmetrics/clustering/__init__.py index 483c1347a6b..d84cb403526 100644 --- a/src/torchmetrics/clustering/__init__.py +++ b/src/torchmetrics/clustering/__init__.py @@ -11,11 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from torchmetrics.clustering.dunn_index import DunnIndex from torchmetrics.clustering.mutual_info_score import MutualInfoScore from torchmetrics.clustering.normalized_mutual_info_score import NormalizedMutualInfoScore from torchmetrics.clustering.rand_score import RandScore __all__ = [ + "DunnIndex", "MutualInfoScore", "NormalizedMutualInfoScore", "RandScore", diff --git a/src/torchmetrics/clustering/dunn_index.py b/src/torchmetrics/clustering/dunn_index.py new file mode 100644 index 00000000000..206eac20e63 --- /dev/null +++ b/src/torchmetrics/clustering/dunn_index.py @@ -0,0 +1,120 @@ +# Copyright The Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, List, Optional, Sequence, Union + +from torch import Tensor + +from torchmetrics.functional.clustering.dunn_index import dunn_index +from torchmetrics.metric import Metric +from torchmetrics.utilities.data import dim_zero_cat +from torchmetrics.utilities.imports import _MATPLOTLIB_AVAILABLE +from torchmetrics.utilities.plot import _AX_TYPE, _PLOT_OUT_TYPE + +if not _MATPLOTLIB_AVAILABLE: + __doctest_skip__ = ["DunnIndex.plot"] + + +class DunnIndex(Metric): + r"""Compute `Dunn Index`_. + + .. math:: + DI_m = \frac{\min_{1\leq i>> import torch + >>> from torchmetrics.clustering import DunnIndex + >>> preds = torch.tensor([2, 1, 0, 1, 0]) + >>> target = torch.tensor([0, 2, 1, 1, 0]) + >>> dun_index = DunnIndex() + >>> dunn_index(preds, target) + tensor(0.5004) + + """ + + is_differentiable: bool = True + higher_is_better: bool = True + full_state_update: bool = True + plot_lower_bound: float = 0.0 + x: List[Tensor] + labels: List[Tensor] + contingency: Tensor + + def __init__(self, **kwargs: Any) -> None: + super().__init__(**kwargs) + + self.add_state("x", default=[], dist_reduce_fx="cat") + self.add_state("labels", default=[], dist_reduce_fx="cat") + + def update(self, x: Tensor, labels: Tensor) -> None: + """Update state with predictions and targets.""" + self.x.append(x) + self.labels.append(labels) + + def compute(self) -> Tensor: + """Compute mutual information over state.""" + return dunn_index(dim_zero_cat(self.x), dim_zero_cat(self.labels)) + + def plot(self, val: Union[Tensor, Sequence[Tensor], None] = None, ax: Optional[_AX_TYPE] = None) -> _PLOT_OUT_TYPE: + """Plot a single or multiple values from the metric. + + Args: + val: Either a single result from calling `metric.forward` or `metric.compute` or a list of these results. + If no value is provided, will automatically call `metric.compute` and plot that result. + ax: An matplotlib axis object. If provided will add plot to that axis + + Returns: + Figure and Axes object + + Raises: + ModuleNotFoundError: + If `matplotlib` is not installed + + .. plot:: + :scale: 75 + + >>> # Example plotting a single value + >>> import torch + >>> from torchmetrics.clustering import DunnIndex + >>> metric = DunnIndex() + >>> metric.update(torch.randint(0, 4, (10,)), torch.randint(0, 4, (10,))) + >>> fig_, ax_ = metric.plot(metric.compute()) + + .. plot:: + :scale: 75 + + >>> # Example plotting multiple values + >>> import torch + >>> from torchmetrics.clustering import DunnIndex + >>> metric = DunnIndex() + >>> for _ in range(10): + ... metric.update(torch.randint(0, 4, (10,)), torch.randint(0, 4, (10,))) + >>> fig_, ax_ = metric.plot(metric.compute()) + + """ + return self._plot(val, ax) diff --git a/src/torchmetrics/functional/clustering/__init__.py b/src/torchmetrics/functional/clustering/__init__.py index 93dd9ebc973..0d7fb2cbdca 100644 --- a/src/torchmetrics/functional/clustering/__init__.py +++ b/src/torchmetrics/functional/clustering/__init__.py @@ -11,11 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from torchmetrics.functional.clustering.dunn_index import dunn_index from torchmetrics.functional.clustering.mutual_info_score import mutual_info_score from torchmetrics.functional.clustering.normalized_mutual_info_score import normalized_mutual_info_score from torchmetrics.functional.clustering.rand_score import rand_score __all__ = [ + "dunn_index", "mutual_info_score", "normalized_mutual_info_score", "rand_score", diff --git a/src/torchmetrics/functional/clustering/dunn_index.py b/src/torchmetrics/functional/clustering/dunn_index.py new file mode 100644 index 00000000000..2674cda81c5 --- /dev/null +++ b/src/torchmetrics/functional/clustering/dunn_index.py @@ -0,0 +1,83 @@ +# Copyright The Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from itertools import combinations +from typing import Tuple + +import torch +from torch import Tensor + + +def _dunn_index_update(x: Tensor, labels: Tensor, p: float) -> Tuple[Tensor, Tensor]: + """Update and return variables required to compute the Dunn index. + + Args: + x: feature vectors of shape (n_samples, n_features) + labels: cluster labels + p: p-norm (distance metric) + + Returns: + intercluster_distance: intercluster distances + max_intracluster_distance: max intracluster distances + + """ + unique_labels, inverse_indices = labels.unique(return_inverse=True) + clusters = [x[inverse_indices == label_idx] for label_idx in range(len(unique_labels))] + centroids = [c.mean(dim=0) for c in clusters] + + intercluster_distance = torch.linalg.vector_norm( + torch.stack([a - b for a, b in combinations(centroids, 2)], dim=0), ord=p, dim=1 + ) + + max_intracluster_distance = torch.stack( + [torch.linalg.vector_norm(ci - mu, ord=p, dim=1).max() for ci, mu in zip(clusters, centroids)] + ) + + return intercluster_distance, max_intracluster_distance + + +def _dunn_index_compute(intercluster_distance: Tensor, max_intracluster_distance: Tensor) -> Tensor: + """Compute the Dunn index based on updated state. + + Args: + intercluster_distance: intercluster distances + max_intracluster_distance: max intracluster distances + + Returns: + dunn_index: Dunn index + + """ + return intercluster_distance.min() / max_intracluster_distance.max() + + +def dunn_index(x: Tensor, labels: Tensor, p: float = 2) -> Tensor: + """Compute the Dunn index. + + Args: + x: feature vectors + labels: cluster labels + p: p-norm used for distance metric + + Returns: + dunn_index: Dunn index + + Example: + >>> from torchmetrics.functional.clustering import dunn_index + >>> x = torch.tensor([0, 3, 2, 2, 1]) + >>> labels = torch.tensor([1, 3, 2, 0, 1]) + >>> dunn_index(preds, target) + tensor(1.0) + + """ + pairwise_distance, max_distance = _dunn_index_update(x, labels, p) + return _dunn_index_compute(pairwise_distance, max_distance) diff --git a/src/torchmetrics/functional/clustering/utils.py b/src/torchmetrics/functional/clustering/utils.py index 23ece71cbaf..39e0a399be8 100644 --- a/src/torchmetrics/functional/clustering/utils.py +++ b/src/torchmetrics/functional/clustering/utils.py @@ -151,6 +151,18 @@ def calculate_contingency_matrix( return contingency +def _is_real_discrete_label(x: Tensor) -> bool: + """Check if tensor of labels is real and discrete. + + Args: + x: tensor + + """ + if x.ndim != 1: + raise ValueError(f"Expected arguments to be 1-d tensors but got {x.ndim}-d tensors.") + return not (torch.is_floating_point(x) or torch.is_complex(x)) + + def check_cluster_labels(preds: Tensor, target: Tensor) -> None: """Check shape of input tensors and if they are real, discrete tensors. @@ -160,18 +172,8 @@ def check_cluster_labels(preds: Tensor, target: Tensor) -> None: """ _check_same_shape(preds, target) - if preds.ndim != 1: - raise ValueError(f"Expected arguments to be 1d tensors but got {preds.ndim} and {target.ndim}") - if ( - torch.is_floating_point(preds) - or torch.is_complex(preds) - or torch.is_floating_point(target) - or torch.is_complex(target) - ): - raise ValueError( - f"Expected real, discrete values but received {preds.dtype} for" - f"predictions and {target.dtype} for target labels instead." - ) + if not (_is_real_discrete_label(preds) and _is_real_discrete_label(target)): + raise ValueError(f"Expected real, discrete values for x but received {preds.dtype} and {target.dtype}.") def calcualte_pair_cluster_confusion_matrix( From edb4b2289f09f084119c17347f169b59a69e2c1f Mon Sep 17 00:00:00 2001 From: Shion Date: Fri, 1 Sep 2023 23:48:44 +0900 Subject: [PATCH 02/19] docs initial commit --- docs/source/clustering/dunn_index.rst | 21 +++++++++++++++++++++ docs/source/links.rst | 1 + 2 files changed, 22 insertions(+) create mode 100644 docs/source/clustering/dunn_index.rst diff --git a/docs/source/clustering/dunn_index.rst b/docs/source/clustering/dunn_index.rst new file mode 100644 index 00000000000..69246661a60 --- /dev/null +++ b/docs/source/clustering/dunn_index.rst @@ -0,0 +1,21 @@ +.. customcarditem:: + :header: Dunn Index + :image: https://pl-flash-data.s3.amazonaws.com/assets/thumbnails/clustering.svg + :tags: Clustering + +.. include:: ../links.rst + +########## +Dunn Index +########## + +Module Interface +________________ + +.. autoclass:: torchmetrics.clustering.DunnIndex + :exclude-members: update, compute + +Functional Interface +____________________ + +.. autofunction:: torchmetrics.functional.clustering.dunn_index diff --git a/docs/source/links.rst b/docs/source/links.rst index 78a2b34d764..e6c85b2994a 100644 --- a/docs/source/links.rst +++ b/docs/source/links.rst @@ -154,3 +154,4 @@ .. _Normalized Mutual Information Score: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.normalized_mutual_info_score.html .. _pycocotools: https://github.com/cocodataset/cocoapi/tree/master/PythonAPI/pycocotools .. _Rand Score: https://link.springer.com/article/10.1007/BF01908075 +.. _Dunn Index: https://en.wikipedia.org/wiki/Dunn_index From 38daa944292632f118df00d496751b0a4e27d29e Mon Sep 17 00:00:00 2001 From: Shion Date: Sun, 3 Sep 2023 23:17:25 +0900 Subject: [PATCH 03/19] euclidean functional passing --- CHANGELOG.md | 2 + tests/unittests/clustering/inputs.py | 26 ++++++ tests/unittests/clustering/test_dunn_index.py | 79 +++++++++++++++++++ 3 files changed, 107 insertions(+) create mode 100644 tests/unittests/clustering/inputs.py create mode 100644 tests/unittests/clustering/test_dunn_index.py diff --git a/CHANGELOG.md b/CHANGELOG.md index bb2856d5501..15e9b4eabb9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added `NormalizedMutualInfoScore` metric to cluster package ([#2029](https://github.com/Lightning-AI/torchmetrics/pull/2029) +- Added `DunnIndex` metric to cluster package ([#200]()) + ### Changed - diff --git a/tests/unittests/clustering/inputs.py b/tests/unittests/clustering/inputs.py new file mode 100644 index 00000000000..7f909426db2 --- /dev/null +++ b/tests/unittests/clustering/inputs.py @@ -0,0 +1,26 @@ +from collections import namedtuple + +import torch +from sklearn.datasets import make_blobs + +Input = namedtuple("Input", ["x", "labels"]) + +NUM_BATCHES = 4 +NUM_SAMPLES = 50 +NUM_FEATURES = 2 +NUM_CLASSES = 3 + + +def _batch_blobs(num_batches, num_samples, num_features, num_classes): + x = [] + labels = [] + + for _ in range(num_batches): + _x, _labels = make_blobs(num_samples, num_features, centers=num_classes) + x.append(torch.tensor(_x)) + labels.append(torch.tensor(_labels)) + + return Input(x=torch.stack(x), labels=torch.stack(labels)) + + +_input_blobs = _batch_blobs(NUM_BATCHES, NUM_SAMPLES, NUM_FEATURES, NUM_CLASSES) diff --git a/tests/unittests/clustering/test_dunn_index.py b/tests/unittests/clustering/test_dunn_index.py new file mode 100644 index 00000000000..cf4cde9a677 --- /dev/null +++ b/tests/unittests/clustering/test_dunn_index.py @@ -0,0 +1,79 @@ +# Copyright The Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from functools import partial +from itertools import combinations + +import numpy as np +import pytest +from torchmetrics.clustering.dunn_index import DunnIndex +from torchmetrics.functional.clustering.dunn_index import dunn_index + +from unittests.clustering.inputs import _input_blobs +from unittests.helpers import seed_all +from unittests.helpers.testers import MetricTester + +seed_all(42) + + +def _np_dunn_index(x, labels, p): + unique_labels, inverse_indices = np.unique(labels, return_inverse=True) + clusters = [x[inverse_indices == label_idx] for label_idx in range(len(unique_labels))] + centroids = [c.mean(axis=0) for c in clusters] + + intercluster_distance = np.linalg.norm( + np.stack([a - b for a, b in combinations(centroids, 2)], axis=0), ord=p, axis=1 + ) + + max_intracluster_distance = np.stack( + [np.linalg.norm(ci - mu, ord=p, axis=1).max() for ci, mu in zip(clusters, centroids)] + ) + + return intercluster_distance.min() / max_intracluster_distance.max() + + +@pytest.mark.parametrize( + "x, labels", + [ + (_input_blobs.x, _input_blobs.labels), + ], +) +@pytest.mark.parametrize( + "p", + [1, 2], +) +class TestDunnIndex(MetricTester): + """Test class for `DunnIndex` metric.""" + + atol = 1e-5 + + @pytest.mark.parametrize("ddp", [True, False]) + def test_dunn_index(self, x, labels, p, ddp): + """Test class implementation of metric.""" + self.run_class_metric_test( + ddp=ddp, + preds=x, + target=labels, + metric_class=DunnIndex, + reference_metric=partial(_np_dunn_index, p=p), + metric_args={"p": p}, + ) + + def test_dunn_index_functional(self, x, labels, p): + """Test functional implementation of metric.""" + self.run_functional_metric_test( + preds=x, + target=labels, + metric_functional=dunn_index, + reference_metric=partial(_np_dunn_index, p=p), + ) From 23d2e27b07d61b8b83b0e7e9205393ff938edeaf Mon Sep 17 00:00:00 2001 From: Shion Matsumoto Date: Mon, 4 Sep 2023 21:13:31 +0900 Subject: [PATCH 04/19] Apply suggestions from code review Co-authored-by: Nicki Skafte Detlefsen --- src/torchmetrics/clustering/dunn_index.py | 12 ++++++------ src/torchmetrics/functional/clustering/dunn_index.py | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/torchmetrics/clustering/dunn_index.py b/src/torchmetrics/clustering/dunn_index.py index 206eac20e63..6a683928a66 100644 --- a/src/torchmetrics/clustering/dunn_index.py +++ b/src/torchmetrics/clustering/dunn_index.py @@ -36,8 +36,8 @@ class DunnIndex(Metric): As input to ``forward`` and ``update`` the metric accepts the following input: - - ``preds`` (:class:`~torch.Tensor`): single integer tensor with shape ``(N,)`` with predicted cluster labels - - ``target`` (:class:`~torch.Tensor`): single integer tensor with shape ``(N,)`` with ground truth cluster labels + - ``data`` (:class:`~torch.Tensor`): float tensor with shape ``(N,d)`` with the embedded data. ``d`` is the dimensionality of the embedding space. + - ``labels`` (:class:`~torch.Tensor`): single integer tensor with shape ``(N,)`` with cluster labels As output of ``forward`` and ``compute`` the metric returns the following output: @@ -68,17 +68,17 @@ class DunnIndex(Metric): def __init__(self, **kwargs: Any) -> None: super().__init__(**kwargs) - self.add_state("x", default=[], dist_reduce_fx="cat") + self.add_state("data", default=[], dist_reduce_fx="cat") self.add_state("labels", default=[], dist_reduce_fx="cat") - def update(self, x: Tensor, labels: Tensor) -> None: + def update(self, data: Tensor, labels: Tensor) -> None: """Update state with predictions and targets.""" - self.x.append(x) + self.data.append(data) self.labels.append(labels) def compute(self) -> Tensor: """Compute mutual information over state.""" - return dunn_index(dim_zero_cat(self.x), dim_zero_cat(self.labels)) + return dunn_index(dim_zero_cat(self.data), dim_zero_cat(self.labels)) def plot(self, val: Union[Tensor, Sequence[Tensor], None] = None, ax: Optional[_AX_TYPE] = None) -> _PLOT_OUT_TYPE: """Plot a single or multiple values from the metric. diff --git a/src/torchmetrics/functional/clustering/dunn_index.py b/src/torchmetrics/functional/clustering/dunn_index.py index 2674cda81c5..bfe0052cbba 100644 --- a/src/torchmetrics/functional/clustering/dunn_index.py +++ b/src/torchmetrics/functional/clustering/dunn_index.py @@ -60,11 +60,11 @@ def _dunn_index_compute(intercluster_distance: Tensor, max_intracluster_distance return intercluster_distance.min() / max_intracluster_distance.max() -def dunn_index(x: Tensor, labels: Tensor, p: float = 2) -> Tensor: +def dunn_index(data: Tensor, labels: Tensor, p: float = 2) -> Tensor: """Compute the Dunn index. Args: - x: feature vectors + data: feature vectors labels: cluster labels p: p-norm used for distance metric @@ -79,5 +79,5 @@ def dunn_index(x: Tensor, labels: Tensor, p: float = 2) -> Tensor: tensor(1.0) """ - pairwise_distance, max_distance = _dunn_index_update(x, labels, p) + pairwise_distance, max_distance = _dunn_index_update(data, labels, p) return _dunn_index_compute(pairwise_distance, max_distance) From b2188cba779624e4ebed931da341595688209767 Mon Sep 17 00:00:00 2001 From: Shion Date: Mon, 4 Sep 2023 21:54:49 +0900 Subject: [PATCH 05/19] Create inputs.py for clustering tests (#2045) Create inputs.py for clustering tests --- src/torchmetrics/clustering/dunn_index.py | 13 +++-- .../functional/clustering/dunn_index.py | 8 +-- tests/unittests/clustering/inputs.py | 55 +++++++++++++++---- tests/unittests/clustering/test_dunn_index.py | 18 +++--- .../clustering/test_mutual_info_score.py | 39 +++---------- .../test_normalized_mutual_info_score.py | 30 ++-------- tests/unittests/clustering/test_rand_score.py | 36 +++--------- 7 files changed, 86 insertions(+), 113 deletions(-) diff --git a/src/torchmetrics/clustering/dunn_index.py b/src/torchmetrics/clustering/dunn_index.py index 6a683928a66..d85c368ef2b 100644 --- a/src/torchmetrics/clustering/dunn_index.py +++ b/src/torchmetrics/clustering/dunn_index.py @@ -36,7 +36,8 @@ class DunnIndex(Metric): As input to ``forward`` and ``update`` the metric accepts the following input: - - ``data`` (:class:`~torch.Tensor`): float tensor with shape ``(N,d)`` with the embedded data. ``d`` is the dimensionality of the embedding space. + - ``data`` (:class:`~torch.Tensor`): float tensor with shape ``(N,d)`` with the embedded data. + ``d`` is the dimensionality of the embedding space. - ``labels`` (:class:`~torch.Tensor`): single integer tensor with shape ``(N,)`` with cluster labels As output of ``forward`` and ``compute`` the metric returns the following output: @@ -49,10 +50,10 @@ class DunnIndex(Metric): Example: >>> import torch >>> from torchmetrics.clustering import DunnIndex - >>> preds = torch.tensor([2, 1, 0, 1, 0]) - >>> target = torch.tensor([0, 2, 1, 1, 0]) - >>> dun_index = DunnIndex() - >>> dunn_index(preds, target) + >>> data = torch.tensor([2, 1, 0, 1, 0]) + >>> labels = torch.tensor([0, 2, 1, 1, 0]) + >>> dunn_index = DunnIndex() + >>> dunn_index(data, labels) tensor(0.5004) """ @@ -61,7 +62,7 @@ class DunnIndex(Metric): higher_is_better: bool = True full_state_update: bool = True plot_lower_bound: float = 0.0 - x: List[Tensor] + data: List[Tensor] labels: List[Tensor] contingency: Tensor diff --git a/src/torchmetrics/functional/clustering/dunn_index.py b/src/torchmetrics/functional/clustering/dunn_index.py index bfe0052cbba..f6eaad0b721 100644 --- a/src/torchmetrics/functional/clustering/dunn_index.py +++ b/src/torchmetrics/functional/clustering/dunn_index.py @@ -18,11 +18,11 @@ from torch import Tensor -def _dunn_index_update(x: Tensor, labels: Tensor, p: float) -> Tuple[Tensor, Tensor]: +def _dunn_index_update(data: Tensor, labels: Tensor, p: float) -> Tuple[Tensor, Tensor]: """Update and return variables required to compute the Dunn index. Args: - x: feature vectors of shape (n_samples, n_features) + data: feature vectors of shape (n_samples, n_features) labels: cluster labels p: p-norm (distance metric) @@ -32,7 +32,7 @@ def _dunn_index_update(x: Tensor, labels: Tensor, p: float) -> Tuple[Tensor, Ten """ unique_labels, inverse_indices = labels.unique(return_inverse=True) - clusters = [x[inverse_indices == label_idx] for label_idx in range(len(unique_labels))] + clusters = [data[inverse_indices == label_idx] for label_idx in range(len(unique_labels))] centroids = [c.mean(dim=0) for c in clusters] intercluster_distance = torch.linalg.vector_norm( @@ -73,7 +73,7 @@ def dunn_index(data: Tensor, labels: Tensor, p: float = 2) -> Tensor: Example: >>> from torchmetrics.functional.clustering import dunn_index - >>> x = torch.tensor([0, 3, 2, 2, 1]) + >>> data = torch.tensor([0, 3, 2, 2, 1]) >>> labels = torch.tensor([1, 3, 2, 0, 1]) >>> dunn_index(preds, target) tensor(1.0) diff --git a/tests/unittests/clustering/inputs.py b/tests/unittests/clustering/inputs.py index 7f909426db2..a6cea481576 100644 --- a/tests/unittests/clustering/inputs.py +++ b/tests/unittests/clustering/inputs.py @@ -1,26 +1,57 @@ +# Copyright The Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from collections import namedtuple import torch from sklearn.datasets import make_blobs -Input = namedtuple("Input", ["x", "labels"]) +from unittests import BATCH_SIZE, EXTRA_DIM, NUM_BATCHES, NUM_CLASSES +from unittests.helpers import seed_all -NUM_BATCHES = 4 -NUM_SAMPLES = 50 -NUM_FEATURES = 2 -NUM_CLASSES = 3 +seed_all(42) -def _batch_blobs(num_batches, num_samples, num_features, num_classes): - x = [] - labels = [] +# extrinsic input for clustering metrics that requires predicted clustering labels and target clustering labels +ExtrinsicInput = namedtuple("ExtrinsicInput", ["preds", "target"]) + +# intrinsic input for clustering metrics that requires only predicted clustering labels and the cluster embeddings +IntrinsicInput = namedtuple("IntinsicInput", ["data", "labels"]) + +def _batch_blobs(num_batches, num_samples, num_features, num_classes): + data, labels = [], [] for _ in range(num_batches): - _x, _labels = make_blobs(num_samples, num_features, centers=num_classes) - x.append(torch.tensor(_x)) + _data, _labels = make_blobs(num_samples, num_features, centers=num_classes) + data.append(torch.tensor(_data)) labels.append(torch.tensor(_labels)) - return Input(x=torch.stack(x), labels=torch.stack(labels)) + return IntrinsicInput(data=torch.stack(data), labels=torch.stack(labels)) + + +_single_target_extrinsic1 = ExtrinsicInput( + preds=torch.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE)), + target=torch.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE)), +) + +_single_target_extrinsic2 = ExtrinsicInput( + preds=torch.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE)), + target=torch.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE)), +) +_float_inputs_extrinsic = ExtrinsicInput( + preds=torch.rand((NUM_BATCHES, BATCH_SIZE)), target=torch.rand((NUM_BATCHES, BATCH_SIZE)) +) -_input_blobs = _batch_blobs(NUM_BATCHES, NUM_SAMPLES, NUM_FEATURES, NUM_CLASSES) +_single_target_intrinsic1 = _batch_blobs(NUM_BATCHES, BATCH_SIZE, EXTRA_DIM, NUM_CLASSES) +_single_target_intrinsic2 = _batch_blobs(NUM_BATCHES, BATCH_SIZE, EXTRA_DIM, NUM_CLASSES) diff --git a/tests/unittests/clustering/test_dunn_index.py b/tests/unittests/clustering/test_dunn_index.py index cf4cde9a677..d881ff991d3 100644 --- a/tests/unittests/clustering/test_dunn_index.py +++ b/tests/unittests/clustering/test_dunn_index.py @@ -19,7 +19,10 @@ from torchmetrics.clustering.dunn_index import DunnIndex from torchmetrics.functional.clustering.dunn_index import dunn_index -from unittests.clustering.inputs import _input_blobs +from unittests.clustering.inputs import ( + _single_target_intrinsic1, + _single_target_intrinsic2, +) from unittests.helpers import seed_all from unittests.helpers.testers import MetricTester @@ -43,9 +46,10 @@ def _np_dunn_index(x, labels, p): @pytest.mark.parametrize( - "x, labels", + "data, labels", [ - (_input_blobs.x, _input_blobs.labels), + (_single_target_intrinsic1.data, _single_target_intrinsic1.labels), + (_single_target_intrinsic2.data, _single_target_intrinsic2.labels), ], ) @pytest.mark.parametrize( @@ -58,21 +62,21 @@ class TestDunnIndex(MetricTester): atol = 1e-5 @pytest.mark.parametrize("ddp", [True, False]) - def test_dunn_index(self, x, labels, p, ddp): + def test_dunn_index(self, data, labels, p, ddp): """Test class implementation of metric.""" self.run_class_metric_test( ddp=ddp, - preds=x, + preds=data, target=labels, metric_class=DunnIndex, reference_metric=partial(_np_dunn_index, p=p), metric_args={"p": p}, ) - def test_dunn_index_functional(self, x, labels, p): + def test_dunn_index_functional(self, data, labels, p): """Test functional implementation of metric.""" self.run_functional_metric_test( - preds=x, + preds=data, target=labels, metric_functional=dunn_index, reference_metric=partial(_np_dunn_index, p=p), diff --git a/tests/unittests/clustering/test_mutual_info_score.py b/tests/unittests/clustering/test_mutual_info_score.py index c4e0e56f38e..49522d50ce9 100644 --- a/tests/unittests/clustering/test_mutual_info_score.py +++ b/tests/unittests/clustering/test_mutual_info_score.py @@ -11,44 +11,25 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from collections import namedtuple - import pytest import torch from sklearn.metrics import mutual_info_score as sklearn_mutual_info_score from torchmetrics.clustering.mutual_info_score import MutualInfoScore from torchmetrics.functional.clustering.mutual_info_score import mutual_info_score -from unittests import BATCH_SIZE, NUM_BATCHES +from unittests import BATCH_SIZE, NUM_CLASSES +from unittests.clustering.inputs import _float_inputs_extrinsic, _single_target_extrinsic1, _single_target_extrinsic2 from unittests.helpers import seed_all from unittests.helpers.testers import MetricTester seed_all(42) -Input = namedtuple("Input", ["preds", "target"]) -NUM_CLASSES = 10 - -_single_target_inputs1 = Input( - preds=torch.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE)), - target=torch.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE)), -) - -_single_target_inputs2 = Input( - preds=torch.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE)), - target=torch.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE)), -) - -_float_inputs = Input( - preds=torch.rand((NUM_BATCHES, BATCH_SIZE)), - target=torch.rand((NUM_BATCHES, BATCH_SIZE)), -) - @pytest.mark.parametrize( "preds, target", [ - (_single_target_inputs1.preds, _single_target_inputs1.target), - (_single_target_inputs2.preds, _single_target_inputs2.target), + (_single_target_extrinsic1.preds, _single_target_extrinsic1.target), + (_single_target_extrinsic2.preds, _single_target_extrinsic2.target), ], ) class TestMutualInfoScore(MetricTester): @@ -87,18 +68,14 @@ def test_mutual_info_score_functional_single_cluster(): def test_mutual_info_score_functional_raises_invalid_task(): """Check that metric rejects continuous-valued inputs.""" - preds, target = _float_inputs + preds, target = _float_inputs_extrinsic with pytest.raises(ValueError, match=r"Expected *"): mutual_info_score(preds, target) -@pytest.mark.parametrize( - ("preds", "target"), - [ - (_single_target_inputs1.preds, _single_target_inputs1.target), - ], -) -def test_mutual_info_score_functional_is_symmetric(preds, target): +def test_mutual_info_score_functional_is_symmetric( + preds=_single_target_extrinsic1.preds, target=_single_target_extrinsic1.target +): """Check that the metric funtional is symmetric.""" for p, t in zip(preds, target): assert torch.allclose(mutual_info_score(p, t), mutual_info_score(t, p)) diff --git a/tests/unittests/clustering/test_normalized_mutual_info_score.py b/tests/unittests/clustering/test_normalized_mutual_info_score.py index 97d40d2e66a..095bc5963d2 100644 --- a/tests/unittests/clustering/test_normalized_mutual_info_score.py +++ b/tests/unittests/clustering/test_normalized_mutual_info_score.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from collections import namedtuple from functools import partial import pytest @@ -20,36 +19,19 @@ from torchmetrics.clustering import NormalizedMutualInfoScore from torchmetrics.functional.clustering import normalized_mutual_info_score -from unittests import BATCH_SIZE, NUM_BATCHES +from unittests import BATCH_SIZE, NUM_CLASSES +from unittests.clustering.inputs import _float_inputs_extrinsic, _single_target_extrinsic1, _single_target_extrinsic2 from unittests.helpers import seed_all from unittests.helpers.testers import MetricTester seed_all(42) -Input = namedtuple("Input", ["preds", "target"]) -NUM_CLASSES = 10 - -_single_target_inputs1 = Input( - preds=torch.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE)), - target=torch.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE)), -) - -_single_target_inputs2 = Input( - preds=torch.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE)), - target=torch.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE)), -) - -_float_inputs = Input( - preds=torch.rand((NUM_BATCHES, BATCH_SIZE)), - target=torch.rand((NUM_BATCHES, BATCH_SIZE)), -) - @pytest.mark.parametrize( "preds, target", [ - (_single_target_inputs1.preds, _single_target_inputs1.target), - (_single_target_inputs2.preds, _single_target_inputs2.target), + (_single_target_extrinsic1.preds, _single_target_extrinsic1.target), + (_single_target_extrinsic2.preds, _single_target_extrinsic2.target), ], ) @pytest.mark.parametrize( @@ -96,7 +78,7 @@ def test_normalized_mutual_info_score_functional_single_cluster(average_method): @pytest.mark.parametrize("average_method", ["min", "geometric", "arithmetic", "max"]) def test_normalized_mutual_info_score_functional_raises_invalid_task(average_method): """Check that metric rejects continuous-valued inputs.""" - preds, target = _float_inputs + preds, target = _float_inputs_extrinsic with pytest.raises(ValueError, match=r"Expected *"): normalized_mutual_info_score(preds, target, average_method) @@ -106,7 +88,7 @@ def test_normalized_mutual_info_score_functional_raises_invalid_task(average_met ["min", "geometric", "arithmetic", "max"], ) def test_normalized_mutual_info_score_functional_is_symmetric( - average_method, preds=_single_target_inputs1.preds, target=_single_target_inputs1.target + average_method, preds=_single_target_extrinsic1.preds, target=_single_target_extrinsic1.target ): """Check that the metric funtional is symmetric.""" for p, t in zip(preds, target): diff --git a/tests/unittests/clustering/test_rand_score.py b/tests/unittests/clustering/test_rand_score.py index d00fd421d34..08df4ff5e5e 100644 --- a/tests/unittests/clustering/test_rand_score.py +++ b/tests/unittests/clustering/test_rand_score.py @@ -11,44 +11,24 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from collections import namedtuple - import pytest import torch from sklearn.metrics import rand_score as sklearn_rand_score from torchmetrics.clustering.rand_score import RandScore from torchmetrics.functional.clustering.rand_score import rand_score -from unittests import BATCH_SIZE, NUM_BATCHES +from unittests.clustering.inputs import _float_inputs_extrinsic, _single_target_extrinsic1, _single_target_extrinsic2 from unittests.helpers import seed_all from unittests.helpers.testers import MetricTester seed_all(42) -Input = namedtuple("Input", ["preds", "target"]) -NUM_CLASSES = 10 - -_single_target_inputs1 = Input( - preds=torch.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE)), - target=torch.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE)), -) - -_single_target_inputs2 = Input( - preds=torch.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE)), - target=torch.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE)), -) - -_float_inputs = Input( - preds=torch.rand((NUM_BATCHES, BATCH_SIZE)), - target=torch.rand((NUM_BATCHES, BATCH_SIZE)), -) - @pytest.mark.parametrize( "preds, target", [ - (_single_target_inputs1.preds, _single_target_inputs1.target), - (_single_target_inputs2.preds, _single_target_inputs2.target), + (_single_target_extrinsic1.preds, _single_target_extrinsic1.target), + (_single_target_extrinsic2.preds, _single_target_extrinsic2.target), ], ) class TestRandScore(MetricTester): @@ -79,16 +59,14 @@ def test_rand_score_functional(self, preds, target): def test_rand_score_functional_raises_invalid_task(): """Check that metric rejects continuous-valued inputs.""" - preds, target = _float_inputs + preds, target = _float_inputs_extrinsic with pytest.raises(ValueError, match=r"Expected *"): rand_score(preds, target) -@pytest.mark.parametrize( - ("preds", "target"), - [(_single_target_inputs1.preds, _single_target_inputs1.target)], -) -def test_rand_score_functional_is_symmetric(preds, target): +def test_rand_score_functional_is_symmetric( + preds=_single_target_extrinsic1.preds, target=_single_target_extrinsic1.target +): """Check that the metric funtional is symmetric.""" for p, t in zip(preds, target): assert torch.allclose(rand_score(p, t), rand_score(t, p)) From 954f12fb27bd2581baf27f1d2cc990671265231f Mon Sep 17 00:00:00 2001 From: Shion Date: Mon, 4 Sep 2023 22:00:39 +0900 Subject: [PATCH 06/19] euclidean functional passing --- tests/unittests/clustering/inputs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unittests/clustering/inputs.py b/tests/unittests/clustering/inputs.py index a6cea481576..1ba29ecf0d3 100644 --- a/tests/unittests/clustering/inputs.py +++ b/tests/unittests/clustering/inputs.py @@ -53,5 +53,6 @@ def _batch_blobs(num_batches, num_samples, num_features, num_classes): preds=torch.rand((NUM_BATCHES, BATCH_SIZE)), target=torch.rand((NUM_BATCHES, BATCH_SIZE)) ) +# intrinsic input for clustering metrics that requires only predicted clustering labels and the cluster embeddings _single_target_intrinsic1 = _batch_blobs(NUM_BATCHES, BATCH_SIZE, EXTRA_DIM, NUM_CLASSES) _single_target_intrinsic2 = _batch_blobs(NUM_BATCHES, BATCH_SIZE, EXTRA_DIM, NUM_CLASSES) From d00b52689d4dc5d6c619abe591884740131356d1 Mon Sep 17 00:00:00 2001 From: Shion Date: Mon, 4 Sep 2023 22:32:39 +0900 Subject: [PATCH 07/19] fix docstring examples --- src/torchmetrics/clustering/dunn_index.py | 31 +++++++------------ .../functional/clustering/dunn_index.py | 8 ++--- tests/unittests/clustering/inputs.py | 2 +- tests/unittests/clustering/test_dunn_index.py | 3 +- 4 files changed, 18 insertions(+), 26 deletions(-) diff --git a/src/torchmetrics/clustering/dunn_index.py b/src/torchmetrics/clustering/dunn_index.py index d85c368ef2b..e4f80b390c3 100644 --- a/src/torchmetrics/clustering/dunn_index.py +++ b/src/torchmetrics/clustering/dunn_index.py @@ -50,11 +50,11 @@ class DunnIndex(Metric): Example: >>> import torch >>> from torchmetrics.clustering import DunnIndex - >>> data = torch.tensor([2, 1, 0, 1, 0]) - >>> labels = torch.tensor([0, 2, 1, 1, 0]) - >>> dunn_index = DunnIndex() + >>> data = torch.tensor([[0, 0], [0.5, 0], [1, 0], [0.5, 1]]) + >>> labels = torch.tensor([0, 0, 0, 1]) + >>> dunn_index = DunnIndex(p=2) >>> dunn_index(data, labels) - tensor(0.5004) + tensor(2.) """ @@ -64,10 +64,10 @@ class DunnIndex(Metric): plot_lower_bound: float = 0.0 data: List[Tensor] labels: List[Tensor] - contingency: Tensor - def __init__(self, **kwargs: Any) -> None: + def __init__(self, p: float = 2, **kwargs: Any) -> None: super().__init__(**kwargs) + self.p = p self.add_state("data", default=[], dist_reduce_fx="cat") self.add_state("labels", default=[], dist_reduce_fx="cat") @@ -79,7 +79,7 @@ def update(self, data: Tensor, labels: Tensor) -> None: def compute(self) -> Tensor: """Compute mutual information over state.""" - return dunn_index(dim_zero_cat(self.data), dim_zero_cat(self.labels)) + return dunn_index(dim_zero_cat(self.data), dim_zero_cat(self.labels), self.p) def plot(self, val: Union[Tensor, Sequence[Tensor], None] = None, ax: Optional[_AX_TYPE] = None) -> _PLOT_OUT_TYPE: """Plot a single or multiple values from the metric. @@ -102,19 +102,10 @@ def plot(self, val: Union[Tensor, Sequence[Tensor], None] = None, ax: Optional[_ >>> # Example plotting a single value >>> import torch >>> from torchmetrics.clustering import DunnIndex - >>> metric = DunnIndex() - >>> metric.update(torch.randint(0, 4, (10,)), torch.randint(0, 4, (10,))) - >>> fig_, ax_ = metric.plot(metric.compute()) - - .. plot:: - :scale: 75 - - >>> # Example plotting multiple values - >>> import torch - >>> from torchmetrics.clustering import DunnIndex - >>> metric = DunnIndex() - >>> for _ in range(10): - ... metric.update(torch.randint(0, 4, (10,)), torch.randint(0, 4, (10,))) + >>> data = torch.tensor([[0, 0], [0.5, 0], [1, 0], [0.5, 1]]) + >>> labels = torch.tensor([0, 0, 0, 1]) + >>> metric = DunnIndex(p=2) + >>> metric.update(data, labels) >>> fig_, ax_ = metric.plot(metric.compute()) """ diff --git a/src/torchmetrics/functional/clustering/dunn_index.py b/src/torchmetrics/functional/clustering/dunn_index.py index f6eaad0b721..0f6c7f5346b 100644 --- a/src/torchmetrics/functional/clustering/dunn_index.py +++ b/src/torchmetrics/functional/clustering/dunn_index.py @@ -73,10 +73,10 @@ def dunn_index(data: Tensor, labels: Tensor, p: float = 2) -> Tensor: Example: >>> from torchmetrics.functional.clustering import dunn_index - >>> data = torch.tensor([0, 3, 2, 2, 1]) - >>> labels = torch.tensor([1, 3, 2, 0, 1]) - >>> dunn_index(preds, target) - tensor(1.0) + >>> data = torch.tensor([[0, 0], [0.5, 0], [1, 0], [0.5, 1]]) + >>> labels = torch.tensor([0, 0, 0, 1]) + >>> dunn_index(data, labels) + tensor(2.) """ pairwise_distance, max_distance = _dunn_index_update(data, labels, p) diff --git a/tests/unittests/clustering/inputs.py b/tests/unittests/clustering/inputs.py index 1ba29ecf0d3..97fc1533dad 100644 --- a/tests/unittests/clustering/inputs.py +++ b/tests/unittests/clustering/inputs.py @@ -26,7 +26,7 @@ ExtrinsicInput = namedtuple("ExtrinsicInput", ["preds", "target"]) # intrinsic input for clustering metrics that requires only predicted clustering labels and the cluster embeddings -IntrinsicInput = namedtuple("IntinsicInput", ["data", "labels"]) +IntrinsicInput = namedtuple("IntrinsicInput", ["data", "labels"]) def _batch_blobs(num_batches, num_samples, num_features, num_classes): diff --git a/tests/unittests/clustering/test_dunn_index.py b/tests/unittests/clustering/test_dunn_index.py index d881ff991d3..3170382d696 100644 --- a/tests/unittests/clustering/test_dunn_index.py +++ b/tests/unittests/clustering/test_dunn_index.py @@ -54,7 +54,7 @@ def _np_dunn_index(x, labels, p): ) @pytest.mark.parametrize( "p", - [1, 2], + [0, 1, 2], ) class TestDunnIndex(MetricTester): """Test class for `DunnIndex` metric.""" @@ -80,4 +80,5 @@ def test_dunn_index_functional(self, data, labels, p): target=labels, metric_functional=dunn_index, reference_metric=partial(_np_dunn_index, p=p), + p=p, ) From b62a8b2641de8f5d32d455a518cb81c579888832 Mon Sep 17 00:00:00 2001 From: Shion Date: Mon, 4 Sep 2023 22:53:41 +0900 Subject: [PATCH 08/19] New metric: Calinski Harabasz Score (#2036) * docs * functional * module * tests * changelog * try another link * mypy * remove broken link * change image * use new inputs * fix * fix flaky tests --------- Co-authored-by: Daniel Stancl <46073029+stancld@users.noreply.github.com> Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> --- CHANGELOG.md | 10 +- .../clustering/calinski_harabasz_score.rst | 21 +++ docs/source/clustering/mutual_info_score.rst | 2 +- .../normalized_mutual_info_score.rst | 2 +- docs/source/clustering/rand_score.rst | 2 +- src/torchmetrics/clustering/__init__.py | 2 + .../clustering/calinski_harabasz_score.py | 126 ++++++++++++++++++ src/torchmetrics/detection/giou.py | 2 +- .../functional/clustering/__init__.py | 2 + .../clustering/calinski_harabasz_score.py | 73 ++++++++++ .../test_calinski_harabasz_score.py | 56 ++++++++ .../image/test_perceptual_path_length.py | 2 + tests/unittests/utilities/test_plot.py | 3 +- 13 files changed, 295 insertions(+), 8 deletions(-) create mode 100644 docs/source/clustering/calinski_harabasz_score.rst create mode 100644 src/torchmetrics/clustering/calinski_harabasz_score.py create mode 100644 src/torchmetrics/functional/clustering/calinski_harabasz_score.py create mode 100644 tests/unittests/clustering/test_calinski_harabasz_score.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 9d3c6a15ed1..68ce6b2c64b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,13 +11,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added -- Added `MutualInformationScore` metric to cluster package ([#2008](https://github.com/Lightning-AI/torchmetrics/pull/2008) +- Added `MutualInformationScore` metric to cluster package ([#2008](https://github.com/Lightning-AI/torchmetrics/pull/2008)) -- Added `RandScore` metric to cluster package ([#2025](https://github.com/Lightning-AI/torchmetrics/pull/2025) +- Added `RandScore` metric to cluster package ([#2025](https://github.com/Lightning-AI/torchmetrics/pull/2025)) -- Added `NormalizedMutualInfoScore` metric to cluster package ([#2029](https://github.com/Lightning-AI/torchmetrics/pull/2029) +- Added `CalinskiHarabaszScore` metric to cluster package ([#2036](https://github.com/Lightning-AI/torchmetrics/pull/2036)) + + +- Added `NormalizedMutualInfoScore` metric to cluster package ([#2029](https://github.com/Lightning-AI/torchmetrics/pull/2029)) + - Added `DunnIndex` metric to cluster package ([#200]()) diff --git a/docs/source/clustering/calinski_harabasz_score.rst b/docs/source/clustering/calinski_harabasz_score.rst new file mode 100644 index 00000000000..dbf9d40c404 --- /dev/null +++ b/docs/source/clustering/calinski_harabasz_score.rst @@ -0,0 +1,21 @@ +.. customcarditem:: + :header: Calinski Harabasz Score + :image: https://pl-flash-data.s3.amazonaws.com/assets/thumbnails/default.svg + :tags: Clustering + +.. include:: ../links.rst + +####################### +Calinski Harabasz Score +####################### + +Module Interface +________________ + +.. autoclass:: torchmetrics.clustering.CalinskiHarabaszScore + :exclude-members: update, compute + +Functional Interface +____________________ + +.. autofunction:: torchmetrics.functional.clustering.calinski_harabasz_score diff --git a/docs/source/clustering/mutual_info_score.rst b/docs/source/clustering/mutual_info_score.rst index e5adf06eaa9..ea67c1f21e7 100644 --- a/docs/source/clustering/mutual_info_score.rst +++ b/docs/source/clustering/mutual_info_score.rst @@ -1,6 +1,6 @@ .. customcarditem:: :header: Mutual Information Score - :image: https://pl-flash-data.s3.amazonaws.com/assets/thumbnails/clustering.svg + :image: https://pl-flash-data.s3.amazonaws.com/assets/thumbnails/default.svg :tags: Clustering .. include:: ../links.rst diff --git a/docs/source/clustering/normalized_mutual_info_score.rst b/docs/source/clustering/normalized_mutual_info_score.rst index 45a196a9187..0b2856d833c 100644 --- a/docs/source/clustering/normalized_mutual_info_score.rst +++ b/docs/source/clustering/normalized_mutual_info_score.rst @@ -1,6 +1,6 @@ .. customcarditem:: :header: Normalized Mutual Information Score - :image: https://pl-flash-data.s3.amazonaws.com/assets/thumbnails/clustering.svg + :image: https://pl-flash-data.s3.amazonaws.com/assets/thumbnails/default.svg :tags: Clustering .. include:: ../links.rst diff --git a/docs/source/clustering/rand_score.rst b/docs/source/clustering/rand_score.rst index 62650c2d454..ca4df3543a5 100644 --- a/docs/source/clustering/rand_score.rst +++ b/docs/source/clustering/rand_score.rst @@ -1,6 +1,6 @@ .. customcarditem:: :header: Rand Score - :image: https://pl-flash-data.s3.amazonaws.com/assets/thumbnails/clustering.svg + :image: https://pl-flash-data.s3.amazonaws.com/assets/thumbnails/default.svg :tags: Clustering .. include:: ../links.rst diff --git a/src/torchmetrics/clustering/__init__.py b/src/torchmetrics/clustering/__init__.py index d84cb403526..6f4e67e1197 100644 --- a/src/torchmetrics/clustering/__init__.py +++ b/src/torchmetrics/clustering/__init__.py @@ -11,12 +11,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from torchmetrics.clustering.calinski_harabasz_score import CalinskiHarabaszScore from torchmetrics.clustering.dunn_index import DunnIndex from torchmetrics.clustering.mutual_info_score import MutualInfoScore from torchmetrics.clustering.normalized_mutual_info_score import NormalizedMutualInfoScore from torchmetrics.clustering.rand_score import RandScore __all__ = [ + "CalinskiHarabaszScore", "DunnIndex", "MutualInfoScore", "NormalizedMutualInfoScore", diff --git a/src/torchmetrics/clustering/calinski_harabasz_score.py b/src/torchmetrics/clustering/calinski_harabasz_score.py new file mode 100644 index 00000000000..f7c78af990d --- /dev/null +++ b/src/torchmetrics/clustering/calinski_harabasz_score.py @@ -0,0 +1,126 @@ +# Copyright The Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, List, Optional, Sequence, Union + +from torch import Tensor + +from torchmetrics.functional.clustering.calinski_harabasz_score import calinski_harabasz_score +from torchmetrics.metric import Metric +from torchmetrics.utilities.data import dim_zero_cat +from torchmetrics.utilities.imports import _MATPLOTLIB_AVAILABLE +from torchmetrics.utilities.plot import _AX_TYPE, _PLOT_OUT_TYPE + +if not _MATPLOTLIB_AVAILABLE: + __doctest_skip__ = ["CalinskiHarabaszScore.plot"] + + +class CalinskiHarabaszScore(Metric): + r"""Compute Calinski Harabasz Score (also known as variance ratio criterion) for clustering algorithms. + + .. math:: + CHS(X, L) = \frac{B(X, L) \cdot (n_\text{samples} - n_\text{labels})}{W(X, L) \cdot (n_\text{labels} - 1)} + + where :math:`B(X, L)` is the between-cluster dispersion, which is the squared distance between the cluster centers + and the dataset mean, weighted by the size of the clusters, :math:`n_\text{samples}` is the number of samples, + :math:`n_\text{labels}` is the number of labels, and :math:`W(X, L)` is the within-cluster dispersion e.g. the + sum of squared distances between each samples and its closest cluster center. + + This clustering metric is an intrinsic measure, because it does not rely on ground truth labels for the evaluation. + Instead it examines how well the clusters are separated from each other. The score is higher when clusters are dense + and well separated, which relates to a standard concept of a cluster. + + As input to ``forward`` and ``update`` the metric accepts the following input: + + - ``data`` (:class:`~torch.Tensor`): float tensor with shape ``(N,d)`` with the embedded data. ``d`` is the + dimensionality of the embedding space. + - ``labels`` (:class:`~torch.Tensor`): single integer tensor with shape ``(N,)`` with cluster labels + + As output of ``forward`` and ``compute`` the metric returns the following output: + + - ``chs`` (:class:`~torch.Tensor`): A tensor with the Calinski Harabasz Score + + Args: + kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info. + + Example: + >>> import torch + >>> from torchmetrics.clustering import CalinskiHarabaszScore + >>> _ = torch.manual_seed(42) + >>> data = torch.randn(10, 3) + >>> labels = torch.randint(3, (10,)) + >>> metric = CalinskiHarabaszScore() + >>> metric(data, labels) + tensor(3.0053) + + """ + is_differentiable: bool = True + higher_is_better: bool = True + full_state_update: bool = False + plot_lower_bound: float = 0.0 + data: List[Tensor] + labels: List[Tensor] + + def __init__(self, **kwargs: Any) -> None: + super().__init__(**kwargs) + + self.add_state("data", default=[], dist_reduce_fx="cat") + self.add_state("labels", default=[], dist_reduce_fx="cat") + + def update(self, data: Tensor, labels: Tensor) -> None: + """Update metric state with new data and labels.""" + self.data.append(data) + self.labels.append(labels) + + def compute(self) -> Tensor: + """Compute the Calinski Harabasz Score over all data and labels.""" + return calinski_harabasz_score(dim_zero_cat(self.data), dim_zero_cat(self.labels)) + + def plot(self, val: Union[Tensor, Sequence[Tensor], None] = None, ax: Optional[_AX_TYPE] = None) -> _PLOT_OUT_TYPE: + """Plot a single or multiple values from the metric. + + Args: + val: Either a single result from calling `metric.forward` or `metric.compute` or a list of these results. + If no value is provided, will automatically call `metric.compute` and plot that result. + ax: An matplotlib axis object. If provided will add plot to that axis + + Returns: + Figure and Axes object + + Raises: + ModuleNotFoundError: + If `matplotlib` is not installed + + .. plot:: + :scale: 75 + + >>> # Example plotting a single value + >>> import torch + >>> from torchmetrics.clustering import RandScore + >>> metric = RandScore() + >>> metric.update(torch.randint(0, 4, (10,)), torch.randint(0, 4, (10,))) + >>> fig_, ax_ = metric.plot(metric.compute()) + + .. plot:: + :scale: 75 + + >>> # Example plotting multiple values + >>> import torch + >>> from torchmetrics.clustering import RandScore + >>> metric = RandScore() + >>> for _ in range(10): + ... metric.update(torch.randint(0, 4, (10,)), torch.randint(0, 4, (10,))) + >>> fig_, ax_ = metric.plot(metric.compute()) + + """ + return self._plot(val, ax) diff --git a/src/torchmetrics/detection/giou.py b/src/torchmetrics/detection/giou.py index 43edd76c0c5..4c69f0842e2 100644 --- a/src/torchmetrics/detection/giou.py +++ b/src/torchmetrics/detection/giou.py @@ -174,7 +174,7 @@ def plot( ... ] >>> target = lambda : [ ... { - ... "boxes": torch.tensor([[300.00, 100.00, 315.00, 150.00]]) + torch.randint(-10, 10, (1, 4)), + ... "boxes": torch.tensor([[300.00, 100.00, 335.00, 150.00]]) + torch.randint(-10, 10, (1, 4)), ... "labels": torch.tensor([5]), ... } ... ] diff --git a/src/torchmetrics/functional/clustering/__init__.py b/src/torchmetrics/functional/clustering/__init__.py index 0d7fb2cbdca..08656e9e5e4 100644 --- a/src/torchmetrics/functional/clustering/__init__.py +++ b/src/torchmetrics/functional/clustering/__init__.py @@ -11,12 +11,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from torchmetrics.functional.clustering.calinski_harabasz_score import calinski_harabasz_score from torchmetrics.functional.clustering.dunn_index import dunn_index from torchmetrics.functional.clustering.mutual_info_score import mutual_info_score from torchmetrics.functional.clustering.normalized_mutual_info_score import normalized_mutual_info_score from torchmetrics.functional.clustering.rand_score import rand_score __all__ = [ + "calinski_harabasz_score", "dunn_index", "mutual_info_score", "normalized_mutual_info_score", diff --git a/src/torchmetrics/functional/clustering/calinski_harabasz_score.py b/src/torchmetrics/functional/clustering/calinski_harabasz_score.py new file mode 100644 index 00000000000..244a657f42e --- /dev/null +++ b/src/torchmetrics/functional/clustering/calinski_harabasz_score.py @@ -0,0 +1,73 @@ +# Copyright The Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +from torch import Tensor + + +def _calinski_harabasz_score_validate_input(data: Tensor, labels: Tensor) -> None: + """Validate that the input data and labels have correct shape and type.""" + if data.ndim != 2: + raise ValueError(f"Expected 2D data, got {data.ndim}D data instead") + if not data.is_floating_point(): + raise ValueError(f"Expected floating point data, got {data.dtype} data instead") + if labels.ndim != 1: + raise ValueError(f"Expected 1D labels, got {labels.ndim}D labels instead") + + +def calinski_harabasz_score(data: Tensor, labels: Tensor) -> Tensor: + """Compute the Calinski Harabasz Score (also known as variance ratio criterion) for clustering algorithms. + + Args: + data: float tensor with shape ``(N,d)`` with the embedded data. + labels: single integer tensor with shape ``(N,)`` with cluster labels + + Returns: + Scalar tensor with the Calinski Harabasz Score + + Example: + >>> import torch + >>> from torchmetrics.functional.clustering import calinski_harabasz_score + >>> _ = torch.manual_seed(42) + >>> data = torch.randn(10, 3) + >>> labels = torch.randint(0, 2, (10,)) + >>> calinski_harabasz_score(data, labels) + tensor(3.4998) + + """ + _calinski_harabasz_score_validate_input(data, labels) + + # convert to zero indexed labels + unique_labels, labels = torch.unique(labels, return_inverse=True) + n_labels = len(unique_labels) + + n_samples = data.shape[0] + + if not 1 < n_labels < n_samples: + raise ValueError( + "Number of detected clusters must be greater than one and less than the number of samples." + f"Got {n_labels} clusters and {n_samples} samples." + ) + + mean = data.mean(dim=0) + between_cluster_dispersion = torch.tensor(0.0, device=data.device) + within_cluster_dispersion = torch.tensor(0.0, device=data.device) + for k in range(n_labels): + cluster_k = data[labels == k, :] + mean_k = cluster_k.mean(dim=0) + between_cluster_dispersion += ((mean_k - mean) ** 2).sum() * cluster_k.shape[0] + within_cluster_dispersion += ((cluster_k - mean_k) ** 2).sum() + + if within_cluster_dispersion == 0: + return torch.tensor(1.0, device=data.device, dtype=torch.float32) + return between_cluster_dispersion * (n_samples - n_labels) / (within_cluster_dispersion * (n_labels - 1.0)) diff --git a/tests/unittests/clustering/test_calinski_harabasz_score.py b/tests/unittests/clustering/test_calinski_harabasz_score.py new file mode 100644 index 00000000000..cc063d1ebb5 --- /dev/null +++ b/tests/unittests/clustering/test_calinski_harabasz_score.py @@ -0,0 +1,56 @@ +# Copyright The Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pytest +from sklearn.metrics import calinski_harabasz_score as sklearn_calinski_harabasz_score +from torchmetrics.clustering.calinski_harabasz_score import CalinskiHarabaszScore +from torchmetrics.functional.clustering.calinski_harabasz_score import calinski_harabasz_score + +from unittests.clustering.inputs import _single_target_intrinsic1, _single_target_intrinsic2 +from unittests.helpers import seed_all +from unittests.helpers.testers import MetricTester + +seed_all(42) + + +@pytest.mark.parametrize( + "preds, target", + [ + (_single_target_intrinsic1.preds, _single_target_intrinsic1.target), + (_single_target_intrinsic2.preds, _single_target_intrinsic2.target), + ], +) +class TestCalinskiHarabaszScore(MetricTester): + """Test class for `CalinskiHarabaszScore` metric.""" + + atol = 1e-5 + + @pytest.mark.parametrize("ddp", [True, False]) + def test_calinski_harabasz_score(self, preds, target, ddp): + """Test class implementation of metric.""" + self.run_class_metric_test( + ddp=ddp, + preds=preds, + target=target, + metric_class=CalinskiHarabaszScore, + reference_metric=sklearn_calinski_harabasz_score, + ) + + def test_calinski_harabasz_score_functional(self, preds, target): + """Test functional implementation of metric.""" + self.run_functional_metric_test( + preds=preds, + target=target, + metric_functional=calinski_harabasz_score, + reference_metric=sklearn_calinski_harabasz_score, + ) diff --git a/tests/unittests/image/test_perceptual_path_length.py b/tests/unittests/image/test_perceptual_path_length.py index 0f76ce51372..1f16a8c71f2 100644 --- a/tests/unittests/image/test_perceptual_path_length.py +++ b/tests/unittests/image/test_perceptual_path_length.py @@ -164,6 +164,7 @@ def num_classes(self): ), ], ) +@skip_on_running_out_of_memory() def test_raises_error_on_wrong_generator(generator, errortype, match): """Test that appropriate errors are raised on wrong generator.""" with pytest.raises(errortype, match=match): @@ -176,6 +177,7 @@ def test_raises_error_on_wrong_generator(generator, errortype, match): @pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch_fidelity") @pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine") +@skip_on_running_out_of_memory() def test_compare(): """Test against torch_fidelity. diff --git a/tests/unittests/utilities/test_plot.py b/tests/unittests/utilities/test_plot.py index f03cdab50ed..926c7082bc0 100644 --- a/tests/unittests/utilities/test_plot.py +++ b/tests/unittests/utilities/test_plot.py @@ -91,7 +91,7 @@ MultilabelROC, MultilabelSpecificity, ) -from torchmetrics.clustering import MutualInfoScore, NormalizedMutualInfoScore, RandScore +from torchmetrics.clustering import CalinskiHarabaszScore, MutualInfoScore, NormalizedMutualInfoScore, RandScore from torchmetrics.detection import PanopticQuality from torchmetrics.detection.mean_ap import MeanAveragePrecision from torchmetrics.functional.audio import scale_invariant_signal_noise_ratio @@ -617,6 +617,7 @@ pytest.param(TranslationEditRate, _text_input_3, _text_input_4, id="translation edit rate"), pytest.param(MutualInfoScore, _nominal_input, _nominal_input, id="mutual info score"), pytest.param(RandScore, _nominal_input, _nominal_input, id="rand score"), + pytest.param(CalinskiHarabaszScore, lambda: torch.randn(100, 3), _nominal_input, id="calinski harabasz score"), pytest.param(NormalizedMutualInfoScore, _nominal_input, _nominal_input, id="normalized mutual info score"), ], ) From 979b8a69f6cba8bf1f3af245c5fe609a0be94410 Mon Sep 17 00:00:00 2001 From: Shion Date: Mon, 4 Sep 2023 23:04:03 +0900 Subject: [PATCH 09/19] fix inputs to calinski harabasz --- .../clustering/test_calinski_harabasz_score.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/unittests/clustering/test_calinski_harabasz_score.py b/tests/unittests/clustering/test_calinski_harabasz_score.py index cc063d1ebb5..98c86218b04 100644 --- a/tests/unittests/clustering/test_calinski_harabasz_score.py +++ b/tests/unittests/clustering/test_calinski_harabasz_score.py @@ -24,10 +24,10 @@ @pytest.mark.parametrize( - "preds, target", + "data, labels", [ - (_single_target_intrinsic1.preds, _single_target_intrinsic1.target), - (_single_target_intrinsic2.preds, _single_target_intrinsic2.target), + (_single_target_intrinsic1.data, _single_target_intrinsic1.labels), + (_single_target_intrinsic2.data, _single_target_intrinsic2.labels), ], ) class TestCalinskiHarabaszScore(MetricTester): @@ -36,21 +36,21 @@ class TestCalinskiHarabaszScore(MetricTester): atol = 1e-5 @pytest.mark.parametrize("ddp", [True, False]) - def test_calinski_harabasz_score(self, preds, target, ddp): + def test_calinski_harabasz_score(self, data, labels, ddp): """Test class implementation of metric.""" self.run_class_metric_test( ddp=ddp, - preds=preds, - target=target, + preds=data, + target=labels, metric_class=CalinskiHarabaszScore, reference_metric=sklearn_calinski_harabasz_score, ) - def test_calinski_harabasz_score_functional(self, preds, target): + def test_calinski_harabasz_score_functional(self, data, labels): """Test functional implementation of metric.""" self.run_functional_metric_test( - preds=preds, - target=target, + preds=data, + target=labels, metric_functional=calinski_harabasz_score, reference_metric=sklearn_calinski_harabasz_score, ) From 83f2f91d8e0df86d9b46971c488ac7593b6a17d2 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 4 Sep 2023 14:16:02 +0000 Subject: [PATCH 10/19] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/unittests/clustering/inputs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unittests/clustering/inputs.py b/tests/unittests/clustering/inputs.py index 983ae0edb1a..15b24298f7d 100644 --- a/tests/unittests/clustering/inputs.py +++ b/tests/unittests/clustering/inputs.py @@ -54,4 +54,4 @@ def _batch_blobs(num_batches, num_samples, num_features, num_classes): ) _single_target_intrinsic1 = _batch_blobs(NUM_BATCHES, BATCH_SIZE, EXTRA_DIM, NUM_CLASSES) -_single_target_intrinsic2 = _batch_blobs(NUM_BATCHES, BATCH_SIZE, EXTRA_DIM, NUM_CLASSES) \ No newline at end of file +_single_target_intrinsic2 = _batch_blobs(NUM_BATCHES, BATCH_SIZE, EXTRA_DIM, NUM_CLASSES) From 7813a2d134080d004ca51fc0262fd9d37bd99062 Mon Sep 17 00:00:00 2001 From: Nicki Skafte Detlefsen Date: Wed, 6 Sep 2023 07:58:09 +0200 Subject: [PATCH 11/19] add to docs --- src/torchmetrics/clustering/dunn_index.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/torchmetrics/clustering/dunn_index.py b/src/torchmetrics/clustering/dunn_index.py index e4f80b390c3..11b8b5cf3f5 100644 --- a/src/torchmetrics/clustering/dunn_index.py +++ b/src/torchmetrics/clustering/dunn_index.py @@ -34,6 +34,10 @@ class DunnIndex(Metric): Where :math:`C_i` is a cluster of tensors, :math:`C_j` is a cluster of tensors, and :math:`\delta(C_i,C_j)` is the intercluster distance metric for :math:`m` clusters. + This clustering metric is an intrinsic measure, because it does not rely on ground truth labels for the evaluation. + Instead it examines how well the clusters are separated from each other. The score is higher when clusters are dense + and well separated, which relates to a standard concept of a cluster. + As input to ``forward`` and ``update`` the metric accepts the following input: - ``data`` (:class:`~torch.Tensor`): float tensor with shape ``(N,d)`` with the embedded data. From 9d31052107342c77f21e0e73ca4234cf01c5dd9c Mon Sep 17 00:00:00 2001 From: Nicki Skafte Detlefsen Date: Wed, 6 Sep 2023 08:00:12 +0200 Subject: [PATCH 12/19] add plot testing --- tests/unittests/utilities/test_plot.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/unittests/utilities/test_plot.py b/tests/unittests/utilities/test_plot.py index 926c7082bc0..24ba6743e48 100644 --- a/tests/unittests/utilities/test_plot.py +++ b/tests/unittests/utilities/test_plot.py @@ -91,7 +91,13 @@ MultilabelROC, MultilabelSpecificity, ) -from torchmetrics.clustering import CalinskiHarabaszScore, MutualInfoScore, NormalizedMutualInfoScore, RandScore +from torchmetrics.clustering import ( + CalinskiHarabaszScore, + DunnIndex, + MutualInfoScore, + NormalizedMutualInfoScore, + RandScore, +) from torchmetrics.detection import PanopticQuality from torchmetrics.detection.mean_ap import MeanAveragePrecision from torchmetrics.functional.audio import scale_invariant_signal_noise_ratio @@ -619,6 +625,7 @@ pytest.param(RandScore, _nominal_input, _nominal_input, id="rand score"), pytest.param(CalinskiHarabaszScore, lambda: torch.randn(100, 3), _nominal_input, id="calinski harabasz score"), pytest.param(NormalizedMutualInfoScore, _nominal_input, _nominal_input, id="normalized mutual info score"), + pytest.param(DunnIndex, lambda: torch.randn(100, 3), _nominal_input, id="dunn index"), ], ) @pytest.mark.parametrize("num_vals", [1, 3]) From 9b31de063fc3477ab13b503f6423a2f5d5a00e9e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 6 Sep 2023 06:00:47 +0000 Subject: [PATCH 13/19] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/unittests/utilities/test_plot.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/unittests/utilities/test_plot.py b/tests/unittests/utilities/test_plot.py index 24ba6743e48..54298a8f68d 100644 --- a/tests/unittests/utilities/test_plot.py +++ b/tests/unittests/utilities/test_plot.py @@ -92,10 +92,10 @@ MultilabelSpecificity, ) from torchmetrics.clustering import ( - CalinskiHarabaszScore, - DunnIndex, - MutualInfoScore, - NormalizedMutualInfoScore, + CalinskiHarabaszScore, + DunnIndex, + MutualInfoScore, + NormalizedMutualInfoScore, RandScore, ) from torchmetrics.detection import PanopticQuality From 90bb263e7aced91619a5d3247db91bba67a0cf05 Mon Sep 17 00:00:00 2001 From: SkafteNicki Date: Wed, 6 Sep 2023 08:11:55 +0200 Subject: [PATCH 14/19] replace vector_norm with norm --- src/torchmetrics/functional/clustering/dunn_index.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/torchmetrics/functional/clustering/dunn_index.py b/src/torchmetrics/functional/clustering/dunn_index.py index 0f6c7f5346b..4fc90bd1b5d 100644 --- a/src/torchmetrics/functional/clustering/dunn_index.py +++ b/src/torchmetrics/functional/clustering/dunn_index.py @@ -35,12 +35,12 @@ def _dunn_index_update(data: Tensor, labels: Tensor, p: float) -> Tuple[Tensor, clusters = [data[inverse_indices == label_idx] for label_idx in range(len(unique_labels))] centroids = [c.mean(dim=0) for c in clusters] - intercluster_distance = torch.linalg.vector_norm( + intercluster_distance = torch.linalg.norm( torch.stack([a - b for a, b in combinations(centroids, 2)], dim=0), ord=p, dim=1 ) max_intracluster_distance = torch.stack( - [torch.linalg.vector_norm(ci - mu, ord=p, dim=1).max() for ci, mu in zip(clusters, centroids)] + [torch.linalg.norm(ci - mu, ord=p, dim=1).max() for ci, mu in zip(clusters, centroids)] ) return intercluster_distance, max_intracluster_distance From d9a2b83a945345a8fb36872d06b935c1c728565d Mon Sep 17 00:00:00 2001 From: Nicki Skafte Detlefsen Date: Wed, 6 Sep 2023 08:56:32 +0200 Subject: [PATCH 15/19] fix doc reference --- src/torchmetrics/functional/clustering/dunn_index.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/torchmetrics/functional/clustering/dunn_index.py b/src/torchmetrics/functional/clustering/dunn_index.py index 4fc90bd1b5d..b3b8d5df50c 100644 --- a/src/torchmetrics/functional/clustering/dunn_index.py +++ b/src/torchmetrics/functional/clustering/dunn_index.py @@ -54,7 +54,7 @@ def _dunn_index_compute(intercluster_distance: Tensor, max_intracluster_distance max_intracluster_distance: max intracluster distances Returns: - dunn_index: Dunn index + scalar tensor with the dunn index """ return intercluster_distance.min() / max_intracluster_distance.max() @@ -69,7 +69,7 @@ def dunn_index(data: Tensor, labels: Tensor, p: float = 2) -> Tensor: p: p-norm used for distance metric Returns: - dunn_index: Dunn index + scalar tensor with the dunn index Example: >>> from torchmetrics.functional.clustering import dunn_index From 9b5bd37579919d8eafd221fae010b71ad6005dcf Mon Sep 17 00:00:00 2001 From: Shion Matsumoto Date: Wed, 6 Sep 2023 21:53:47 +0900 Subject: [PATCH 16/19] Update src/torchmetrics/functional/clustering/utils.py Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> --- src/torchmetrics/functional/clustering/utils.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/torchmetrics/functional/clustering/utils.py b/src/torchmetrics/functional/clustering/utils.py index 39e0a399be8..910350acf3e 100644 --- a/src/torchmetrics/functional/clustering/utils.py +++ b/src/torchmetrics/functional/clustering/utils.py @@ -153,10 +153,6 @@ def calculate_contingency_matrix( def _is_real_discrete_label(x: Tensor) -> bool: """Check if tensor of labels is real and discrete. - - Args: - x: tensor - """ if x.ndim != 1: raise ValueError(f"Expected arguments to be 1-d tensors but got {x.ndim}-d tensors.") From 655ff744af0076200b9e3d53eb4d9cc3c2fc37d3 Mon Sep 17 00:00:00 2001 From: Shion Matsumoto Date: Wed, 6 Sep 2023 21:53:56 +0900 Subject: [PATCH 17/19] Update tests/unittests/clustering/test_dunn_index.py Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> --- tests/unittests/clustering/test_dunn_index.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unittests/clustering/test_dunn_index.py b/tests/unittests/clustering/test_dunn_index.py index 3170382d696..e7d4d31d136 100644 --- a/tests/unittests/clustering/test_dunn_index.py +++ b/tests/unittests/clustering/test_dunn_index.py @@ -29,7 +29,7 @@ seed_all(42) -def _np_dunn_index(x, labels, p): +def _np_dunn_index(data, labels, ord): unique_labels, inverse_indices = np.unique(labels, return_inverse=True) clusters = [x[inverse_indices == label_idx] for label_idx in range(len(unique_labels))] centroids = [c.mean(axis=0) for c in clusters] From d43f70928d1d45dfd92320ffaaca7f4b8878e54e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 6 Sep 2023 12:54:24 +0000 Subject: [PATCH 18/19] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/torchmetrics/functional/clustering/utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/torchmetrics/functional/clustering/utils.py b/src/torchmetrics/functional/clustering/utils.py index 910350acf3e..f5581ac3429 100644 --- a/src/torchmetrics/functional/clustering/utils.py +++ b/src/torchmetrics/functional/clustering/utils.py @@ -152,8 +152,7 @@ def calculate_contingency_matrix( def _is_real_discrete_label(x: Tensor) -> bool: - """Check if tensor of labels is real and discrete. - """ + """Check if tensor of labels is real and discrete.""" if x.ndim != 1: raise ValueError(f"Expected arguments to be 1-d tensors but got {x.ndim}-d tensors.") return not (torch.is_floating_point(x) or torch.is_complex(x)) From c8eb436017df47c50f25e19e3a6840d8dcd4e724 Mon Sep 17 00:00:00 2001 From: Shion Date: Wed, 6 Sep 2023 22:03:25 +0900 Subject: [PATCH 19/19] switch ord to p. ord is python builtin --- tests/unittests/clustering/test_dunn_index.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unittests/clustering/test_dunn_index.py b/tests/unittests/clustering/test_dunn_index.py index e7d4d31d136..b035dc3d48a 100644 --- a/tests/unittests/clustering/test_dunn_index.py +++ b/tests/unittests/clustering/test_dunn_index.py @@ -29,9 +29,9 @@ seed_all(42) -def _np_dunn_index(data, labels, ord): +def _np_dunn_index(data, labels, p): unique_labels, inverse_indices = np.unique(labels, return_inverse=True) - clusters = [x[inverse_indices == label_idx] for label_idx in range(len(unique_labels))] + clusters = [data[inverse_indices == label_idx] for label_idx in range(len(unique_labels))] centroids = [c.mean(axis=0) for c in clusters] intercluster_distance = np.linalg.norm(