hmorimitsu · hmorimitsu · Feb 21, 2024 · Feb 18, 2024 · Feb 19, 2024 · Feb 19, 2024
diff --git a/ptlflow/models/flowformer/flowformer.py b/ptlflow/models/flowformer/flowformer.py
@@ -177,7 +177,8 @@ def forward_tile(self, inputs, train_size):
         input_size = inputs["images"].shape[-2:]
         image_size = (max(self.args.tile_height, input_size[-2]), input_size[-1])
         hws = compute_grid_indices(image_size, train_size)
-        weights = compute_weight(hws, image_size, train_size, self.args.tile_sigma)
+        device = inputs["images"].device
+        weights = compute_weight(hws, image_size, train_size, self.args.tile_sigma, device=device)
 
         images, image_resizer = self.preprocess_images(
             inputs["images"],

diff --git a/ptlflow/models/flowformer/utils.py b/ptlflow/models/flowformer/utils.py
@@ -136,24 +136,27 @@ def compute_grid_indices(image_shape, patch_size, min_overlap=20):
     return [(h, w) for h in hs for w in ws]
 
 
-def compute_weight(hws, image_shape, patch_size, sigma=1.0, wtype="gaussian"):
+def compute_weight(hws, image_shape, patch_size, sigma=1.0, wtype="gaussian", device: torch.device = torch.device("cpu")):
     patch_num = len(hws)
-    h, w = torch.meshgrid(torch.arange(patch_size[0]), torch.arange(patch_size[1]))
+    h, w = torch.meshgrid(
+        torch.arange(patch_size[0], device=device),
+        torch.arange(patch_size[1], device=device),
+        indexing='ij',
+    )
     h, w = h / float(patch_size[0]), w / float(patch_size[1])
     c_h, c_w = 0.5, 0.5
     h, w = h - c_h, w - c_w
-    weights_hw = (h**2 + w**2) ** 0.5 / sigma
+    weights_hw = (h ** 2 + w ** 2) ** 0.5 / sigma
     denorm = 1 / (sigma * math.sqrt(2 * math.pi))
     weights_hw = denorm * torch.exp(-0.5 * (weights_hw) ** 2)
 
-    weights = torch.zeros(1, patch_num, *image_shape)
+    weights = torch.zeros(1, patch_num, *image_shape, device=device)
     for idx, (h, w) in enumerate(hws):
-        weights[:, idx, h : h + patch_size[0], w : w + patch_size[1]] = weights_hw
-    weights = weights.cuda()
+        weights[:, idx, h: h + patch_size[0], w: w + patch_size[1]] = weights_hw
     patch_weights = []
     for idx, (h, w) in enumerate(hws):
         patch_weights.append(
-            weights[:, idx : idx + 1, h : h + patch_size[0], w : w + patch_size[1]]
+            weights[:, idx: idx + 1, h: h + patch_size[0], w: w + patch_size[1]]
         )
 
     return patch_weights
diff --git a/ptlflow/models/flowformerplusplus/flowformerplusplus.py b/ptlflow/models/flowformerplusplus/flowformerplusplus.py
@@ -197,7 +197,8 @@ def forward_tile(self, inputs, train_size):
         input_size = inputs["images"].shape[-2:]
         image_size = (max(self.args.tile_height, input_size[-2]), input_size[-1])
         hws = compute_grid_indices(image_size, train_size)
-        weights = compute_weight(hws, image_size, train_size, self.args.tile_sigma)
+        device = inputs["images"].device
+        weights = compute_weight(hws, image_size, train_size, self.args.tile_sigma,device=device)
 
         images, image_resizer = self.preprocess_images(
             inputs["images"],

diff --git a/ptlflow/models/flowformerplusplus/utils.py b/ptlflow/models/flowformerplusplus/utils.py
@@ -135,24 +135,27 @@ def compute_grid_indices(image_shape, patch_size, min_overlap=20):
     return [(h, w) for h in hs for w in ws]
 
 
-def compute_weight(hws, image_shape, patch_size, sigma=1.0, wtype="gaussian"):
+def compute_weight(hws, image_shape, patch_size, sigma=1.0, wtype="gaussian", device: torch.device = torch.device("cpu")):
     patch_num = len(hws)
-    h, w = torch.meshgrid(torch.arange(patch_size[0]), torch.arange(patch_size[1]))
+    h, w = torch.meshgrid(
+        torch.arange(patch_size[0], device=device),
+        torch.arange(patch_size[1], device=device),
+        indexing='ij',
+    )
     h, w = h / float(patch_size[0]), w / float(patch_size[1])
     c_h, c_w = 0.5, 0.5
     h, w = h - c_h, w - c_w
-    weights_hw = (h**2 + w**2) ** 0.5 / sigma
+    weights_hw = (h ** 2 + w ** 2) ** 0.5 / sigma
     denorm = 1 / (sigma * math.sqrt(2 * math.pi))
     weights_hw = denorm * torch.exp(-0.5 * (weights_hw) ** 2)
 
-    weights = torch.zeros(1, patch_num, *image_shape)
+    weights = torch.zeros(1, patch_num, *image_shape, device=device)
     for idx, (h, w) in enumerate(hws):
-        weights[:, idx, h : h + patch_size[0], w : w + patch_size[1]] = weights_hw
-    weights = weights.cuda()
+        weights[:, idx, h: h + patch_size[0], w: w + patch_size[1]] = weights_hw
     patch_weights = []
     for idx, (h, w) in enumerate(hws):
         patch_weights.append(
-            weights[:, idx : idx + 1, h : h + patch_size[0], w : w + patch_size[1]]
+            weights[:, idx: idx + 1, h: h + patch_size[0], w: w + patch_size[1]]
         )
 
     return patch_weights
diff --git a/ptlflow/models/matchflow/matchflow.py b/ptlflow/models/matchflow/matchflow.py
@@ -222,7 +222,8 @@ def forward_tile(self, inputs, train_size):
         input_size = inputs["images"].shape[-2:]
         image_size = (max(self.args.tile_height, input_size[-2]), input_size[-1])
         hws = compute_grid_indices(image_size, train_size)
-        weights = compute_weight(hws, image_size, train_size, self.args.tile_sigma)
+        device = inputs["images"].device
+        weights = compute_weight(hws, image_size, train_size, self.args.tile_sigma, device=device)
 
         images, image_resizer = self.preprocess_images(
             inputs["images"],

diff --git a/ptlflow/models/matchflow/utils.py b/ptlflow/models/matchflow/utils.py
@@ -263,24 +263,27 @@ def compute_grid_indices(image_shape, patch_size, min_overlap=20):
     return [(h, w) for h in hs for w in ws]
 
 
-def compute_weight(hws, image_shape, patch_size, sigma=1.0, wtype="gaussian"):
+def compute_weight(hws, image_shape, patch_size, sigma=1.0, wtype="gaussian", device: torch.device = torch.device("cpu")):
     patch_num = len(hws)
-    h, w = torch.meshgrid(torch.arange(patch_size[0]), torch.arange(patch_size[1]))
+    h, w = torch.meshgrid(
+        torch.arange(patch_size[0], device=device),
+        torch.arange(patch_size[1], device=device),
+        indexing='ij',
+    )
     h, w = h / float(patch_size[0]), w / float(patch_size[1])
     c_h, c_w = 0.5, 0.5
     h, w = h - c_h, w - c_w
-    weights_hw = (h**2 + w**2) ** 0.5 / sigma
+    weights_hw = (h ** 2 + w ** 2) ** 0.5 / sigma
     denorm = 1 / (sigma * math.sqrt(2 * math.pi))
     weights_hw = denorm * torch.exp(-0.5 * (weights_hw) ** 2)
 
-    weights = torch.zeros(1, patch_num, *image_shape)
+    weights = torch.zeros(1, patch_num, *image_shape, device=device)
     for idx, (h, w) in enumerate(hws):
-        weights[:, idx, h : h + patch_size[0], w : w + patch_size[1]] = weights_hw
-    weights = weights.cuda()
+        weights[:, idx, h: h + patch_size[0], w: w + patch_size[1]] = weights_hw
     patch_weights = []
     for idx, (h, w) in enumerate(hws):
         patch_weights.append(
-            weights[:, idx : idx + 1, h : h + patch_size[0], w : w + patch_size[1]]
+            weights[:, idx: idx + 1, h: h + patch_size[0], w: w + patch_size[1]]
         )
 
     return patch_weights
diff --git a/ptlflow/utils/io_adapter.py b/ptlflow/utils/io_adapter.py
@@ -127,6 +127,8 @@ def prepare_inputs(
                 del inputs[k]
             inputs = self.transform(inputs)
 
+        inputs = self._to_cuda(inputs)
+
         for k, v in inputs.items():
             if image_only and k != "images":
                 continue
@@ -138,8 +140,6 @@ def prepare_inputs(
                     v = self.scaler.fill(v, is_flow=k.startswith("flow"))
                 inputs[k] = v
 
-        inputs = self._to_cuda(inputs)
-
         return inputs
 
     def unscale(