pytorch · vfdev-5 · Aug 25, 2023 · Aug 23, 2023 · Aug 24, 2023 · Aug 24, 2023
diff --git a/ignite/metrics/ssim.py b/ignite/metrics/ssim.py
@@ -1,3 +1,4 @@
+import warnings
 from typing import Callable, Sequence, Union
 
 import torch
@@ -103,6 +104,7 @@
         self.pad_h = (self.kernel_size[0] - 1) // 2
         self.pad_w = (self.kernel_size[1] - 1) // 2
         self._kernel = self._gaussian_or_uniform_kernel(kernel_size=self.kernel_size, sigma=self.sigma)
+        self._expanded_kernel = None
 
     @reinit__is_reduced
     def reset(self) -> None:
@@ -157,18 +159,34 @@
                 f"Expected y_pred and y to have BxCxHxW shape. Got y_pred: {y_pred.shape} and y: {y.shape}."
             )
 
-        channel = y_pred.size(1)
-        if len(self._kernel.shape) < 4:
-            self._kernel = self._kernel.expand(channel, 1, -1, -1).to(device=y_pred.device)
+        if y_pred.device != self._kernel.device:
+            if self._kernel.device == torch.device("cpu"):
+                self._kernel = self._kernel.to(device=y_pred.device)
+                self._sum_of_ssim = self._sum_of_ssim.to(device=y_pred.device)
+                self._expanded_kernel = None
+
+            if y_pred.device == torch.device("cpu"):
+                warnings.warn(
+                    "The metric device or one of the previous update tensor was set on another device than this "
+                    "update tensor, which is on CPU. To avoid having a performance hit, ensure that your metric "
+                    "device and all of your update tensors are on the same device.",
+                    RuntimeWarning,
+                )
+                y_pred = y_pred.to(device=self._kernel.device)
+                y = y.to(device=self._kernel.device)
 
         y_pred = F.pad(y_pred, [self.pad_w, self.pad_w, self.pad_h, self.pad_h], mode="reflect")
         y = F.pad(y, [self.pad_w, self.pad_w, self.pad_h, self.pad_h], mode="reflect")
 
         if y_pred.dtype != self._kernel.dtype:
             self._kernel = self._kernel.to(dtype=y_pred.dtype)
 
+        channel = y_pred.size(1)
+        if self._expanded_kernel is None or self._expanded_kernel.shape[0] != channel:
+            self._expanded_kernel = self._kernel.expand(channel, 1, -1, -1)
+
         input_list = [y_pred, y, y_pred * y_pred, y * y, y_pred * y]
-        outputs = F.conv2d(torch.cat(input_list), self._kernel, groups=channel)
+        outputs = F.conv2d(torch.cat(input_list), self._expanded_kernel, groups=channel)
         batch_size = y_pred.size(0)
         output_list = [outputs[x * batch_size : (x + 1) * batch_size] for x in range(len(input_list))]
 
@@ -186,7 +204,7 @@
         b2 = sigma_pred_sq + sigma_target_sq + self.c2
 
         ssim_idx = (a1 * a2) / (b1 * b2)
-        self._sum_of_ssim += torch.mean(ssim_idx, (1, 2, 3), dtype=torch.float64).sum().to(self._device)
+        self._sum_of_ssim += torch.mean(ssim_idx, (1, 2, 3), dtype=torch.float64).sum()
 
         self._num_examples += y.shape[0]
 

diff --git a/tests/ignite/metrics/test_ssim.py b/tests/ignite/metrics/test_ssim.py
@@ -102,6 +102,43 @@ def test_ssim(
     assert np.allclose(ignite_ssim, skimg_ssim, atol=precision)
 
 
+@pytest.mark.parametrize(
+    "metric_device, y_pred_device",
+    [
+        [torch.device("cpu"), torch.device("cpu")],
+        [torch.device("cpu"), torch.device("cuda")],
+        [torch.device("cuda"), torch.device("cpu")],
+        [torch.device("cuda"), torch.device("cuda")],
+    ],
+)
+def test_ssim_device(available_device, metric_device, y_pred_device):
+    if available_device == "cpu":
+        pytest.skip("This test requires a cuda device.")
+
+    data_range = 1.0
+    sigma = 1.5
+    shape = (12, 5, 256, 256)
+
+    ssim = SSIM(data_range=data_range, sigma=sigma, device=metric_device)
+
+    y_pred = torch.rand(shape, device=y_pred_device)
+    y = y_pred * 0.8
+
+    if metric_device == torch.device("cuda") and y_pred_device == torch.device("cpu"):
+        with pytest.warns(RuntimeWarning):
+            ssim.update((y_pred, y))
+    else:
+        ssim.update((y_pred, y))
+
+    if metric_device == torch.device("cuda") or y_pred_device == torch.device("cuda"):
+        # A tensor will always have the device index set
+        excepted_device = torch.device("cuda:0")
+    else:
+        excepted_device = torch.device("cpu")
+
+    assert ssim._kernel.device == excepted_device
+
+
 def test_ssim_variable_batchsize(available_device):
     # Checks https://github.com/pytorch/ignite/issues/2532
     sigma = 1.5