pytorch · kamalojasv181 · Jul 23, 2022 · Jul 23, 2022 · Jul 23, 2022 · Jul 23, 2022
diff --git a/ignite/metrics/__init__.py b/ignite/metrics/__init__.py
@@ -19,6 +19,7 @@
 from ignite.metrics.precision import Precision
 from ignite.metrics.psnr import PSNR
 from ignite.metrics.recall import Recall
+from ignite.metrics.recsys.ndcg import NDCG
 from ignite.metrics.root_mean_squared_error import RootMeanSquaredError
 from ignite.metrics.running_average import RunningAverage
 from ignite.metrics.ssim import SSIM

diff --git a/ignite/metrics/recsys/__init__.py b/ignite/metrics/recsys/__init__.py
@@ -0,0 +1,5 @@
+from ignite.metrics.recsys.ndcg import NDCG
+
+__all__ = [
+    "NDCG",
+]
diff --git a/ignite/metrics/recsys/ndcg.py b/ignite/metrics/recsys/ndcg.py
@@ -0,0 +1,115 @@
+from typing import Callable, Optional, Sequence, Union
+
+import torch
+
+from ignite.exceptions import NotComputableError
+from ignite.metrics.metric import Metric
+
+__all__ = ["NDCG"]
+
+
+def _tie_averaged_dcg(
+    y_pred: torch.Tensor,
+    y_true: torch.Tensor,
+    discount_cumsum: torch.Tensor,
+    device: Union[str, torch.device] = torch.device("cpu"),
+) -> torch.Tensor:
+
+    _, inv, counts = torch.unique(-y_pred, return_inverse=True, return_counts=True)
+    ranked = torch.zeros(counts.shape[0]).to(device)
+    ranked.index_put_([inv], y_true, accumulate=True)
+    ranked /= counts
+    groups = torch.cumsum(counts, dim=-1) - 1
+    discount_sums = torch.empty(counts.shape[0]).to(device)
+    discount_sums[0] = discount_cumsum[groups[0]]
+    discount_sums[1:] = torch.diff(discount_cumsum[groups])
+
+    return torch.sum(torch.mul(ranked, discount_sums))
+
+
+def _dcg_sample_scores(
+    y_pred: torch.Tensor,
+    y_true: torch.Tensor,
+    k: Optional[int] = None,
+    log_base: Union[int, float] = 2,
+    ignore_ties: bool = False,
+    device: Union[str, torch.device] = torch.device("cpu"),
+) -> torch.Tensor:
+
+    discount = torch.log(torch.tensor(log_base)) / torch.log(torch.arange(y_true.shape[1]) + 2)
+    discount = discount.to(device)
+
+    if k is not None:
+        discount[k:] = 0.0
+
+    if ignore_ties:
+        ranking = torch.argsort(y_pred, descending=True)
+        ranked = y_true[torch.arange(ranking.shape[0]).reshape(-1, 1), ranking].to(device)
+        discounted_gains = torch.mm(ranked, discount.reshape(-1, 1))
+
+    else:
+        discount_cumsum = torch.cumsum(discount, dim=-1)
+        discounted_gains = torch.tensor(
+            [_tie_averaged_dcg(y_p, y_t, discount_cumsum, device) for y_p, y_t in zip(y_pred, y_true)], device=device
+        )
+
+    return discounted_gains
+
+
+def _ndcg_sample_scores(
+    y_pred: torch.Tensor,
+    y_true: torch.Tensor,
+    k: Optional[int] = None,
+    log_base: Union[int, float] = 2,
+    ignore_ties: bool = False,
+    device: Union[str, torch.device] = torch.device("cpu"),
+) -> torch.Tensor:
+
+    gain = _dcg_sample_scores(y_pred, y_true, k=k, log_base=log_base, ignore_ties=ignore_ties, device=device)
+    if not ignore_ties:
+        gain = gain.unsqueeze(dim=-1)
+    normalizing_gain = _dcg_sample_scores(y_true, y_true, k=k, log_base=log_base, ignore_ties=True, device=device)
+    all_relevant = normalizing_gain != 0
+    normalized_gain = gain[all_relevant] / normalizing_gain[all_relevant]
+    return normalized_gain
+
+
+class NDCG(Metric):
+    def __init__(
+        self,
+        output_transform: Callable = lambda x: x,
+        device: Union[str, torch.device] = torch.device("cpu"),
+        k: Optional[int] = None,
+        log_base: Union[int, float] = 2,
+        exponential: bool = False,
+        ignore_ties: bool = False,
+    ):
+
+        assert log_base != 1 or log_base <= 0, f"Illegal value {log_base} for log_base"
+        self.log_base = log_base
+        self.k = k
+        self.exponential = exponential
+        super(NDCG, self).__init__(output_transform=output_transform, device=device)
+        self.ignore_ties = ignore_ties
+
+    def reset(self) -> None:
+
+        self.num_examples = 0
+        self.ndcg = torch.tensor(0.0, device=self._device)
+
+    def update(self, output: Sequence[torch.Tensor]) -> None:
+
+        y_pred, y_true = output[0].detach(), output[1].detach()
+
+        if self.exponential:
+            y_true = 2 ** y_true - 1
+
+        gain = _ndcg_sample_scores(y_pred, y_true, k=self.k, log_base=self.log_base, device=self._device)
+        self.ndcg += torch.sum(gain)
+        self.num_examples += y_pred.shape[0]
+
+    def compute(self) -> float:
+        if self.num_examples == 0:
+            raise NotComputableError("NGCD must have at least one example before it can be computed.")
+
+        return (self.ndcg / self.num_examples).item()
diff --git a/tests/ignite/metrics/test_ndcg.py b/tests/ignite/metrics/test_ndcg.py
@@ -0,0 +1,187 @@
+import numpy as np
+import pytest
+import torch
+from sklearn.metrics import ndcg_score
+from sklearn.metrics._ranking import _dcg_sample_scores
+
+from ignite.exceptions import NotComputableError
+from ignite.metrics.recsys.ndcg import NDCG
+
+
+@pytest.mark.parametrize(
+    "y_pred, y_true",
+    [
+        (torch.tensor([[0.1, 0.2, 0.3, 0.4, 0.5]]), torch.tensor([[1.0, 2.0, 3.0, 4.0, 5.0]])),
+        (torch.tensor([[3.7, 4.8, 3.9, 4.3, 4.9]]), torch.tensor([[2.9, 5.6, 3.8, 7.9, 6.2]])),
+        (
+            torch.tensor([[0.1, 0.2, 0.3, 0.4, 0.5], [3.7, 4.8, 3.9, 4.3, 4.9], [3.7, 4.8, 3.9, 4.3, 4.9]]),
+            torch.tensor([[1.0, 2.0, 3.0, 4.0, 5.0], [1.2, 4.5, 8.9, 5.6, 7.2], [2.9, 5.6, 3.8, 7.9, 6.2]]),
+        ),
+        (torch.tensor([[3.7, 3.7, 3.7, 3.7, 3.7]]), torch.tensor([[1.0, 2.0, 3.0, 4.0, 5.0]])),
+        (
+            torch.tensor([[3.7, 3.7, 3.7, 3.7, 3.7], [3.7, 3.7, 3.7, 3.7, 3.9]]),
+            torch.tensor([[1.0, 2.0, 3.0, 4.0, 5.0], [1.0, 2.0, 3.0, 4.0, 5.0]]),
+        ),
+    ],
+)
+@pytest.mark.parametrize("k", [None, 2, 3])
+def test_output_cpu(y_pred, y_true, k):
+
+    device = "cpu"
+
+    ndcg = NDCG(k=k, device=device)
+    ndcg.update([y_pred, y_true])
+    result_ignite = ndcg.compute()
+    result_sklearn = ndcg_score(y_true.numpy(), y_pred.numpy(), k=k)
+
+    np.testing.assert_allclose(np.array(result_ignite), result_sklearn, rtol=2e-7)
+
+
+@pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
+@pytest.mark.parametrize(
+    "y_pred, y_true",
+    [
+        (torch.tensor([[0.1, 0.2, 0.3, 0.4, 0.5]]), torch.tensor([[1.0, 2.0, 3.0, 4.0, 5.0]])),
+        (torch.tensor([[3.7, 4.8, 3.9, 4.3, 4.9]]), torch.tensor([[2.9, 5.6, 3.8, 7.9, 6.2]])),
+        (
+            torch.tensor([[0.1, 0.2, 0.3, 0.4, 0.5], [3.7, 4.8, 3.9, 4.3, 4.9], [3.7, 4.8, 3.9, 4.3, 4.9]]),
+            torch.tensor([[1.0, 2.0, 3.0, 4.0, 5.0], [1.2, 4.5, 8.9, 5.6, 7.2], [2.9, 5.6, 3.8, 7.9, 6.2]]),
+        ),
+        (torch.tensor([[3.7, 3.7, 3.7, 3.7, 3.7]]), torch.tensor([[1.0, 2.0, 3.0, 4.0, 5.0]])),
+        (
+            torch.tensor([[3.7, 3.7, 3.7, 3.7, 3.7], [3.7, 3.7, 3.7, 3.7, 3.9]]),
+            torch.tensor([[1.0, 2.0, 3.0, 4.0, 5.0], [1.0, 2.0, 3.0, 4.0, 5.0]]),
+        ),
+    ],
+)
+@pytest.mark.parametrize("k", [None, 2, 3])
+def test_output_gpu(y_pred, y_true, k):
+
+    device = "cuda"
+    y_pred = y_pred.to(device)
+    y_true = y_true.to(device)
+    ndcg = NDCG(k=k, device=device)
+    ndcg.update([y_pred, y_true])
+    result_ignite = ndcg.compute()
+    result_sklearn = ndcg_score(y_true.cpu().numpy(), y_pred.cpu().numpy(), k=k)
+
+    np.testing.assert_allclose(np.array(result_ignite), result_sklearn, rtol=2e-7)
+
+
+def test_reset():
+
+    y_true = torch.tensor([[1.0, 2.0, 3.0, 4.0, 5.0]])
+    y_pred = torch.tensor([[0.1, 0.2, 0.3, 0.4, 0.5]])
+    ndcg = NDCG()
+    ndcg.update([y_pred, y_true])
+    ndcg.reset()
+
+    with pytest.raises(NotComputableError, match=r"NGCD must have at least one example before it can be computed."):
+        ndcg.compute()
+
+
+@pytest.mark.parametrize(
+    "y_pred, y_true",
+    [
+        (torch.tensor([[0.1, 0.2, 0.3, 0.4, 0.5]]), torch.tensor([[1.0, 2.0, 3.0, 4.0, 5.0]])),
+        (torch.tensor([[3.7, 4.8, 3.9, 4.3, 4.9]]), torch.tensor([[2.9, 5.6, 3.8, 7.9, 6.2]])),
+        (
+            torch.tensor([[0.1, 0.2, 0.3, 0.4, 0.5], [3.7, 4.8, 3.9, 4.3, 4.9], [3.7, 4.8, 3.9, 4.3, 4.9]]),
+            torch.tensor([[1.0, 2.0, 3.0, 4.0, 5.0], [1.2, 4.5, 8.9, 5.6, 7.2], [2.9, 5.6, 3.8, 7.9, 6.2]]),
+        ),
+        (torch.tensor([[3.7, 3.7, 3.7, 3.7, 3.7]]), torch.tensor([[1.0, 2.0, 3.0, 4.0, 5.0]])),
+        (
+            torch.tensor([[3.7, 3.7, 3.7, 3.7, 3.7], [3.7, 3.7, 3.7, 3.7, 3.9]]),
+            torch.tensor([[1.0, 2.0, 3.0, 4.0, 5.0], [1.0, 2.0, 3.0, 4.0, 5.0]]),
+        ),
+    ],
+)
+@pytest.mark.parametrize("k", [None, 2, 3])
+def test_exponential(y_pred, y_true, k):
+
+    device = "cpu"
+
+    ndcg = NDCG(k=k, device=device, exponential=True)
+    ndcg.update([y_pred, y_true])
+    result_ignite = ndcg.compute()
+    result_sklearn = ndcg_score(2 ** y_true.numpy() - 1, y_pred.numpy(), k=k)
+
+    np.testing.assert_allclose(np.array(result_ignite), result_sklearn, rtol=2e-7)
+
+
+@pytest.mark.parametrize(
+    "y_pred, y_true",
+    [
+        (torch.tensor([[0.1, 0.2, 0.3, 0.4, 0.5]]), torch.tensor([[1.0, 2.0, 3.0, 4.0, 5.0]])),
+        (torch.tensor([[3.7, 4.8, 3.9, 4.3, 4.9]]), torch.tensor([[2.9, 5.6, 3.8, 7.9, 6.2]])),
+        (
+            torch.tensor([[0.1, 0.2, 0.3, 0.4, 0.5], [3.7, 4.8, 3.9, 4.3, 4.9], [3.7, 4.8, 3.9, 4.3, 4.9]]),
+            torch.tensor([[1.0, 2.0, 3.0, 4.0, 5.0], [1.2, 4.5, 8.9, 5.6, 7.2], [2.9, 5.6, 3.8, 7.9, 6.2]]),
+        ),
+    ],
+)
+@pytest.mark.parametrize("k", [None, 2, 3])
+def test_output_cpu_ignore_ties(y_pred, y_true, k):
+
+    device = "cpu"
+
+    ndcg = NDCG(k=k, device=device, ignore_ties=True)
+    ndcg.update([y_pred, y_true])
+    result_ignite = ndcg.compute()
+    result_sklearn = ndcg_score(y_true.numpy(), y_pred.numpy(), k=k)
+
+    np.testing.assert_allclose(np.array(result_ignite), result_sklearn, rtol=2e-7)
+
+
+@pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
+@pytest.mark.parametrize(
+    "y_pred, y_true",
+    [
+        (torch.tensor([[0.1, 0.2, 0.3, 0.4, 0.5]]), torch.tensor([[1.0, 2.0, 3.0, 4.0, 5.0]])),
+        (torch.tensor([[3.7, 4.8, 3.9, 4.3, 4.9]]), torch.tensor([[2.9, 5.6, 3.8, 7.9, 6.2]])),
+        (
+            torch.tensor([[0.1, 0.2, 0.3, 0.4, 0.5], [3.7, 4.8, 3.9, 4.3, 4.9], [3.7, 4.8, 3.9, 4.3, 4.9]]),
+            torch.tensor([[1.0, 2.0, 3.0, 4.0, 5.0], [1.2, 4.5, 8.9, 5.6, 7.2], [2.9, 5.6, 3.8, 7.9, 6.2]]),
+        ),
+    ],
+)
+@pytest.mark.parametrize("k", [None, 2, 3])
+def test_output_gpu_ignore_ties(y_pred, y_true, k):
+
+    device = "cuda"
+    y_pred = y_pred.to(device)
+    y_true = y_true.to(device)
+    ndcg = NDCG(k=k, device=device, ignore_ties=True)
+    ndcg.update([y_pred, y_true])
+    result_ignite = ndcg.compute()
+    result_sklearn = ndcg_score(y_true.cpu().numpy(), y_pred.cpu().numpy(), k=k)
+
+    np.testing.assert_allclose(np.array(result_ignite), result_sklearn, rtol=2e-7)
+
+
+@pytest.mark.parametrize("log_base", [2, 3, 10])
+def test_log_base(log_base):
+    def _ndcg_sample_scores(y_true, y_score, k=None, ignore_ties=False):
+
+        gain = _dcg_sample_scores(y_true, y_score, k, ignore_ties=ignore_ties)
+        normalizing_gain = _dcg_sample_scores(y_true, y_true, k, ignore_ties=True)
+        all_irrelevant = normalizing_gain == 0
+        gain[all_irrelevant] = 0
+        gain[~all_irrelevant] /= normalizing_gain[~all_irrelevant]
+        return gain
+
+    def ndcg_score_with_log_base(y_true, y_score, *, k=None, sample_weight=None, ignore_ties=False, log_base=2):
+
+        gain = _ndcg_sample_scores(y_true, y_score, k=k, ignore_ties=ignore_ties)
+        return np.average(gain, weights=sample_weight)
+
+    y_true = torch.tensor([[3.7, 4.8, 3.9, 4.3, 4.9]])
+    y_pred = torch.tensor([[2.9, 5.6, 3.8, 7.9, 6.2]])
+
+    ndcg = NDCG(log_base=log_base)
+    ndcg.update([y_pred, y_true])
+
+    result_ignite = ndcg.compute()
+    result_sklearn = ndcg_score_with_log_base(y_true.numpy(), y_pred.numpy(), log_base=log_base)
+
+    np.testing.assert_allclose(np.array(result_ignite), result_sklearn, rtol=2e-7)