From 13b96aedc48ad2da16149490b06a1a6bd8361335 Mon Sep 17 00:00:00 2001
From: Boris Fomitchev <borisfom@users.noreply.github.com>
Date: Fri, 15 Nov 2024 00:48:02 -0800
Subject: [PATCH] Fixed fold_constants, test_handler switched to onnx (#8211)

Fixed fold_constants: the result was not saved.
test_handler switched to onnx as torch-tensorrt is causing issues with
CI on various Torch versions and is not used anyway.

### Description

A few sentences describing the changes proposed in this pull request.

### Types of changes
<!--- Put an `x` in all the boxes that apply, and remove the not
applicable items -->
- [x] Non-breaking change (fix or new feature that would not break
existing functionality).
- [ ] Breaking change (fix or new feature that would cause existing
functionality to change).
- [ ] New tests added to cover the changes.
- [ ] Integration tests passed locally by running `./runtests.sh -f -u
--net --coverage`.
- [ ] Quick tests passed locally by running `./runtests.sh --quick
--unittests --disttests`.
- [ ] In-line docstrings updated.
- [ ] Documentation updated, tested `make html` command in the `docs/`
folder.

Signed-off-by: Boris Fomitchev <bfomitchev@nvidia.com>
---
 monai/networks/utils.py   |  5 +++--
 tests/test_trt_compile.py | 10 +++++++---
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/monai/networks/utils.py b/monai/networks/utils.py
index 05627f9c00..1b4cb220ae 100644
--- a/monai/networks/utils.py
+++ b/monai/networks/utils.py
@@ -712,9 +712,10 @@ def convert_to_onnx(
         onnx_model = onnx.load(f)
 
     if do_constant_folding and polygraphy_imported:
-        from polygraphy.backend.onnx.loader import fold_constants
+        from polygraphy.backend.onnx.loader import fold_constants, save_onnx
 
-        fold_constants(onnx_model, size_threshold=constant_size_threshold)
+        onnx_model = fold_constants(onnx_model, size_threshold=constant_size_threshold)
+        save_onnx(onnx_model, f)
 
     if verify:
         if isinstance(inputs, dict):
diff --git a/tests/test_trt_compile.py b/tests/test_trt_compile.py
index 9716a4a715..e1323c201f 100644
--- a/tests/test_trt_compile.py
+++ b/tests/test_trt_compile.py
@@ -61,7 +61,7 @@ def tearDown(self):
         if current_device != self.gpu_device:
             torch.cuda.set_device(self.gpu_device)
 
-    @unittest.skipUnless(torch_trt_imported, "torch_tensorrt is required")
+    # @unittest.skipUnless(torch_trt_imported, "torch_tensorrt is required")
     def test_handler(self):
         from ignite.engine import Engine
 
@@ -74,7 +74,7 @@ def test_handler(self):
 
         with tempfile.TemporaryDirectory() as tempdir:
             engine = Engine(lambda e, b: None)
-            args = {"method": "torch_trt"}
+            args = {"method": "onnx", "dynamic_batchsize": [1, 4, 8]}
             TrtHandler(net1, tempdir + "/trt_handler", args=args).attach(engine)
             engine.run([0] * 8, max_epochs=1)
             self.assertIsNotNone(net1._trt_compiler)
@@ -86,7 +86,11 @@ def test_lists(self):
         model = ListAdd().cuda()
 
         with torch.no_grad(), tempfile.TemporaryDirectory() as tmpdir:
-            args = {"output_lists": [[-1], [2], []], "export_args": {"dynamo": False, "verbose": True}}
+            args = {
+                "output_lists": [[-1], [2], []],
+                "export_args": {"dynamo": False, "verbose": True},
+                "dynamic_batchsize": [1, 4, 8],
+            }
             x = torch.randn(1, 16).to("cuda")
             y = torch.randn(1, 16).to("cuda")
             z = torch.randn(1, 16).to("cuda")