Fix tie breaking in ndcg metric (#2031)

* fix implementation * add tests * chlog --------- Co-authored-by: Jirka Borovec <[email protected]> Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> (cherry picked from commit 1caaf28)
Lightning-AI · Sep 9, 2023 · 4eccd40 · 4eccd40
1 parent 39c40e3
commit 4eccd40
Show file tree

Hide file tree

Showing 3 changed files with 71 additions and 15 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -15,7 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Fixed
 
-- Fixed performance issues in `RecallAtFixedPrecision` for large batch sizes ([#2042](https://github.com/Lightning-AI/torchmetrics/pull/2042))
+- Fixed tie breaking in ndcg metric ([#2031](https://github.com/Lightning-AI/torchmetrics/pull/2031))
 
 
 - Fixed bug in `BootStrapper` when very few samples were evaluated that could lead to crash ([#2052](https://github.com/Lightning-AI/torchmetrics/pull/2052))
@@ -24,11 +24,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Fixed bug when creating multiple plots that lead to not all plots being shown ([#2060](https://github.com/Lightning-AI/torchmetrics/pull/2060))
 
 
+- Fixed performance issues in `RecallAtFixedPrecision` for large batch sizes ([#2042](https://github.com/Lightning-AI/torchmetrics/pull/2042))
+
+
 ## [1.1.1] - 2023-08-29
 
 ### Added
 
-- Added `average` argument to `MeanAveragePrecision` ([#2018](https://github.com/Lightning-AI/torchmetrics/pull/2018)
+- Added `average` argument to `MeanAveragePrecision` ([#2018](https://github.com/Lightning-AI/torchmetrics/pull/2018))
 
 ### Fixed
 

diff --git a/src/torchmetrics/functional/retrieval/ndcg.py b/src/torchmetrics/functional/retrieval/ndcg.py
@@ -19,10 +19,53 @@
 from torchmetrics.utilities.checks import _check_retrieval_functional_inputs
 
 
-def _dcg(target: Tensor) -> Tensor:
-    """Compute Discounted Cumulative Gain for input tensor."""
-    denom = torch.log2(torch.arange(target.shape[-1], device=target.device) + 2.0)
-    return (target / denom).sum(dim=-1)
+def _tie_average_dcg(target: Tensor, preds: Tensor, discount_cumsum: Tensor) -> Tensor:
+    """Translated version of sklearns `_tie_average_dcg` function.
+
+    Args:
+        target: ground truth about each document relevance.
+        preds: estimated probabilities of each document to be relevant.
+        discount_cumsum: cumulative sum of the discount.
+
+    Returns:
+        The cumulative gain of the tied elements.
+
+    """
+    _, inv, counts = torch.unique(-preds, return_inverse=True, return_counts=True)
+    ranked = torch.zeros_like(counts, dtype=torch.float32)
+    ranked.scatter_add_(0, inv, target.to(dtype=ranked.dtype))
+    ranked = ranked / counts
+    groups = counts.cumsum(dim=0) - 1
+    discount_sums = torch.zeros_like(counts, dtype=torch.float32)
+    discount_sums[0] = discount_cumsum[groups[0]]
+    discount_sums[1:] = discount_cumsum[groups].diff()
+    return (ranked * discount_sums).sum()
+
+
+def _dcg_sample_scores(target: Tensor, preds: Tensor, top_k: int, ignore_ties: bool) -> Tensor:
+    """Translated version of sklearns `_dcg_sample_scores` function.
+
+    Args:
+        target: ground truth about each document relevance.
+        preds: estimated probabilities of each document to be relevant.
+        top_k: consider only the top k elements
+        ignore_ties: If True, ties are ignored. If False, ties are averaged.
+
+    Returns:
+        The cumulative gain
+
+    """
+    discount = 1.0 / (torch.log2(torch.arange(target.shape[-1], device=target.device) + 2.0))
+    discount[top_k:] = 0.0
+
+    if ignore_ties:
+        ranking = preds.argsort(descending=True)
+        ranked = target[ranking]
+        cumulative_gain = (discount * ranked).sum()
+    else:
+        discount_cumsum = discount.cumsum(dim=-1)
+        cumulative_gain = _tie_average_dcg(target, preds, discount_cumsum)
+    return cumulative_gain
 
 
 def retrieval_normalized_dcg(preds: Tensor, target: Tensor, top_k: Optional[int] = None) -> Tensor:
@@ -59,15 +102,12 @@ def retrieval_normalized_dcg(preds: Tensor, target: Tensor, top_k: Optional[int]
     if not (isinstance(top_k, int) and top_k > 0):
         raise ValueError("`top_k` has to be a positive integer or None")
 
-    sorted_target = target[torch.argsort(preds, dim=-1, descending=True)][:top_k]
-    ideal_target = torch.sort(target, descending=True)[0][:top_k]
-
-    ideal_dcg = _dcg(ideal_target)
-    target_dcg = _dcg(sorted_target)
+    gain = _dcg_sample_scores(target, preds, top_k, ignore_ties=False)
+    normalized_gain = _dcg_sample_scores(target, target, top_k, ignore_ties=True)
 
     # filter undefined scores
-    all_irrelevant = ideal_dcg == 0
-    target_dcg[all_irrelevant] = 0
-    target_dcg[~all_irrelevant] /= ideal_dcg[~all_irrelevant]
+    all_irrelevant = normalized_gain == 0
+    gain[all_irrelevant] = 0
+    gain[~all_irrelevant] /= normalized_gain[~all_irrelevant]
 
-    return target_dcg.mean()
+    return gain.mean()
diff --git a/tests/unittests/retrieval/test_ndcg.py b/tests/unittests/retrieval/test_ndcg.py
@@ -15,6 +15,7 @@
 
 import numpy as np
 import pytest
+import torch
 from sklearn.metrics import ndcg_score
 from torch import Tensor
 from torchmetrics.functional.retrieval.ndcg import retrieval_normalized_dcg
@@ -185,3 +186,15 @@ def test_arguments_functional_metric(self, preds: Tensor, target: Tensor, messag
             exception_type=ValueError,
             kwargs_update=metric_args,
         )
+
+
+def test_corner_case_with_tied_scores():
+    """See issue: https://github.com/Lightning-AI/torchmetrics/issues/2022."""
+    target = torch.tensor([[10, 0, 0, 1, 5]])
+    preds = torch.tensor([[0.1, 0, 0, 0, 0.1]])
+
+    for k in [1, 3, 5]:
+        assert torch.allclose(
+            retrieval_normalized_dcg(preds, target, top_k=k),
+            torch.tensor([ndcg_score(target, preds, k=k)], dtype=torch.float32),
+        )