altered pytorch and onnx

UBCAgroBot · Sep 11, 2024 · 6a7d199 · 6a7d199
1 parent b1c948f
commit 6a7d199
Show file tree

Hide file tree

Showing 3 changed files with 43 additions and 27 deletions.
diff --git a/conversion_tools/ONNX_TRT.py b/conversion_tools/ONNX_TRT.py
@@ -1,9 +1,9 @@
 import argparse
 import onnx
 import tensorrt as trt
-import pycuda.driver as cuda
-import pycuda.autoinit
 import numpy as np
+# import pycuda.driver as cuda
+# import pycuda.autoinit
 
 def get_max_memory():
     total, free = cuda.mem_get_info()
@@ -14,15 +14,14 @@ def get_max_memory():
     print(f"Max memory to use: {max_mem / (1024**2)} MB")
     return max_mem
 
-# precision can be FP32, FP16, or INT8. The batch size is the maximum number of samples that can be processed in a single inference. The get_max_memory() function calculates the maximum memory that can be used by the TensorRT engine. The convert_onnx_to_engine() function converts the ONNX model to a TensorRT engine and saves it to a file. The engine is built with the specified precision and batch size.
-def convert_onnx_to_trt(model_path="./model.onnx", output_path="model_trt.trt", FP16_mode = True, batch_size=1, input_shape=(1, 3, 224, 224)):
-    print("Loading the ONNX model")
-    onnx_model = onnx.load(model_path)
-
+def convert_onnx_to_trt(model_path, output_path, batch_size):
     # # Simplify the ONNX model (optional)
+    # print("Loading the ONNX model")
+    # onnx_model = onnx.load(model_path)
     # graph = gs.import_onnx(onnx_model)
     # graph.toposort()
     # onnx_model = gs.export_onnx(graph)
+
     TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
     builder = trt.Builder(TRT_LOGGER)
     network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
@@ -35,14 +34,10 @@ def convert_onnx_to_trt(model_path="./model.onnx", output_path="model_trt.trt",
                 print(parser.get_error(error))
             return
 
-    # config.max_workspace_size = 1 << 30  # Adjust as needed
-    # builder.max_workspace_size = get_max_memory()
-    # builder.fp16_mode = fp16_mode
-    # builder.max_batch_size = batch_size
     config = builder.create_builder_config()
-    config.fp16_mode = FP16_mode
+    # config.fp16_mode = FP16_mode
     config.max_batch_size = batch_size
-    config.max_workspace_size = get_max_memory()
+    config.max_workspace_size = 15
 
     print("Building TensorRT engine. This may take a few minutes.")
     engine = builder.build_cuda_engine(network, config)
@@ -81,9 +76,11 @@ def convert_onnx_to_trt(model_path="./model.onnx", output_path="model_trt.trt",
     print("Usage: python3 Onnx_TensorRT.py <model_path> <output_path> FP16_mode batch_size input_shape")
     print("Example: python3 Onnx_TensorRT.py ./model.onnx ./model_trt.trt True 1 (1, 3, 224, 224)")
 
-    if len(sys.argv) < 2:
-        convert_onnx_to_trt()
-    else:
-        for i in range(len(sys.argv), 6):
-            sys.argv.append(None)
-            convert_onnx_to_trt(*sys.argv[1:6])
+    parser = argparse.ArgumentParser(description='Convert Onnx model to TensorRT')
+    parser.add_argument('--modelpath', type=str, default="./model.onnx", required=False, help='Path to the PyTorch model file (.pt)')
+    parser.add_argument('--outputpath', type=str, default="model_trt.trt", required=False, help='Path to save the converted TensorRT model file (.trt)')
+    # parser.add_argument('--FP16_mode', type=bool, default=True, help='FP16 mode for TensorRT')
+    parser.add_argument('--batch_size', type=int, default=1, help='Batch size for TensorRT')
+    args = parser.parse_args()
+
+    convert_onnx_to_trt(args.modelpath, args.outputpath, args.batch_size)
diff --git a/conversion_tools/PT_ONNX.py b/conversion_tools/PT_ONNX.py
@@ -56,7 +56,7 @@ def checkConfidenceConsistency(predictions_original, converted_results, toleranc
 
     print("All confidence percentages are consistent")
 
-def convert_pytorch_to_onnx(model_path="./model.pt", output_path="./model.onnx", input_shape=(1,3,224,224), constant_folding=False):
+def convert_pytorch_to_onnx(model_path="./model.pt", output_path="./model.onnx", input_shape=(1, 3, 448, 1024), constant_folding=False):
     print("Loading the PyTorch model")
     model = torch.load(model_path)
     model.eval()

diff --git a/conversion_tools/PT_TRT.py b/conversion_tools/PT_TRT.py
@@ -1,9 +1,9 @@
 import argparse
 import torch
-from torch2trt import torch2trt
+import torch_tensorrt
+import numpy as np
 # import pycuda.driver as cuda
 # import pycuda.autoinit
-import numpy as np
 
 def get_max_memory():
     total, free = cuda.mem_get_info()
@@ -14,14 +14,33 @@ def get_max_memory():
     print(f"Max memory to use: {max_mem / (1024**2)} MB")
     return max_mem
 
-def convert_pt_to_trt(model_path='./model.pt', output_path='./model_trt.trt', FP16_mode=True, batch_size=1, input_shape=(1, 3, 224, 224)):
+# def convert_pt_to_trt(model_path='./model.pt', output_path='./model_trt.trt', FP16_mode=True, batch_size=1, input_shape=(1, 3, 224, 224)):
+#     print("Loading the PyTorch model")
+#     model = torch.load(model_path, weights_only=True)
+#     model.eval()
+
+#     input_data = torch.randn(input_shape).cuda()
+#     print("Building TensorRT engine. This may take a few minutes.")
+#     model_trt = torch2trt(model, [input_data], fp16_mode=FP16_mode, max_batch_size=batch_size, max_workspace_size=15000000000) # get_max_memory()
+#     # torch.save(model_trt.state_dict(), output_file)
+
+#     with open(output_path, 'wb') as f:
+#         f.write(model_trt.engine.serialize())
+
+#     print("Engine built successfully")
+#     print(f"Converted TensorRT engine saved at {output_path}")    
+#     return model_trt
+
+def convert_pt_to_trt(model_path='./model.pt', output_path='./model_trt.trt', batch_size=1, input_shape=(1, 3, 448, 1024)):
     print("Loading the PyTorch model")
-    model = torch.load(model_path)
-    model.eval()
+    model = torch.load(model_path, weights_only=True)
+    model = model().eval().cuda()
 
-    input_data = torch.randn(input_shape).cuda()
+    inputs = [torch.randn(input_shape).cuda()]
+    trt_gm = torch_tensorrt.compile(model, inputs, ir="dynamo")
+    torch_tensorrt.save(trt_gm, "trt.ep", inputs=inputs)
     print("Building TensorRT engine. This may take a few minutes.")
-    model_trt = torch2trt(model, [input_data], fp16_mode=FP16_mode, max_batch_size=batch_size, max_workspace_size=15000000000) # get_max_memory()
+    model_trt = torch2trt(model, [input_data], max_batch_size=batch_size, max_workspace_size=15000000000) # get_max_memory()
     # torch.save(model_trt.state_dict(), output_file)
 
     with open(output_path, 'wb') as f: