From 684688a2877b07b0e1da98bb13f18a60ac73d426 Mon Sep 17 00:00:00 2001
From: binliunls <107988372+binliunls@users.noreply.github.com>
Date: Tue, 22 Oct 2024 20:18:02 +0800
Subject: [PATCH] Optimize VISTA3D (#8123)

Fixes #8122 .

### Description
As shown in [this
PR](https://github.com/Project-MONAI/model-zoo/pull/671), the memory
malloc and mask embedding for-loop are the bottlenecks that caused the
vista3d slow inference. Therefore, this PR fixed them by adding the
logic for malloc and replacing the for-loop with a tensor
multiplication.

### Types of changes
<!--- Put an `x` in all the boxes that apply, and remove the not
applicable items -->
- [x] Non-breaking change (fix or new feature that would not break
existing functionality).
- [ ] Breaking change (fix or new feature that would cause existing
functionality to change).
- [ ] New tests added to cover the changes.
- [ ] Integration tests passed locally by running `./runtests.sh -f -u
--net --coverage`.
- [ ] Quick tests passed locally by running `./runtests.sh --quick
--unittests --disttests`.
- [ ] In-line docstrings updated.
- [ ] Documentation updated, tested `make html` command in the `docs/`
folder.

Signed-off-by: binliu <binliu@nvidia.com>
Co-authored-by: Yiheng Wang <68361391+yiheng-wang-nv@users.noreply.github.com>
Co-authored-by: YunLiu <55491388+KumoLiu@users.noreply.github.com>
---
 monai/networks/nets/segresnet_ds.py | 4 +++-
 monai/networks/nets/vista3d.py      | 8 +++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/monai/networks/nets/segresnet_ds.py b/monai/networks/nets/segresnet_ds.py
index 1ac5a79ee3..098e490511 100644
--- a/monai/networks/nets/segresnet_ds.py
+++ b/monai/networks/nets/segresnet_ds.py
@@ -508,8 +508,10 @@ def forward(  # type: ignore
 
         outputs: list[torch.Tensor] = []
         outputs_auto: list[torch.Tensor] = []
-        x_ = x.clone()
+        x_ = x
         if with_point:
+            if with_label:
+                x_ = x.clone()
             i = 0
             for level in self.up_layers:
                 x = level["upsample"](x)
diff --git a/monai/networks/nets/vista3d.py b/monai/networks/nets/vista3d.py
index 4215a9a594..6313b7812d 100644
--- a/monai/networks/nets/vista3d.py
+++ b/monai/networks/nets/vista3d.py
@@ -639,12 +639,10 @@ def forward(self, src: torch.Tensor, class_vector: torch.Tensor):
         if self.use_mlp:
             class_embedding = self.mlp(class_embedding)
         # [b,1,feat] @ [1,feat,dim], batch dimension become class_embedding batch dimension.
-        masks = []
-        for i in range(b):
-            mask = class_embedding @ src[[i]].view(1, c, h * w * d)
-            masks.append(mask.view(-1, 1, h, w, d))
+        masks_embedding = class_embedding.squeeze() @ src.view(b, c, h * w * d)
+        masks_embedding = masks_embedding.view(b, -1, h, w, d).transpose(0, 1)
 
-        return torch.cat(masks, 1), class_embedding
+        return masks_embedding, class_embedding
 
 
 class TwoWayTransformer(nn.Module):