Skip to content

Commit

Permalink
altered pytorch and onnx
Browse files Browse the repository at this point in the history
  • Loading branch information
Ishaan-Datta committed Sep 11, 2024
1 parent b1c948f commit 6a7d199
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 27 deletions.
35 changes: 16 additions & 19 deletions conversion_tools/ONNX_TRT.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import argparse
import onnx
import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit
import numpy as np
# import pycuda.driver as cuda
# import pycuda.autoinit

def get_max_memory():
total, free = cuda.mem_get_info()
Expand All @@ -14,15 +14,14 @@ def get_max_memory():
print(f"Max memory to use: {max_mem / (1024**2)} MB")
return max_mem

# precision can be FP32, FP16, or INT8. The batch size is the maximum number of samples that can be processed in a single inference. The get_max_memory() function calculates the maximum memory that can be used by the TensorRT engine. The convert_onnx_to_engine() function converts the ONNX model to a TensorRT engine and saves it to a file. The engine is built with the specified precision and batch size.
def convert_onnx_to_trt(model_path="./model.onnx", output_path="model_trt.trt", FP16_mode = True, batch_size=1, input_shape=(1, 3, 224, 224)):
print("Loading the ONNX model")
onnx_model = onnx.load(model_path)

def convert_onnx_to_trt(model_path, output_path, batch_size):
# # Simplify the ONNX model (optional)
# print("Loading the ONNX model")
# onnx_model = onnx.load(model_path)
# graph = gs.import_onnx(onnx_model)
# graph.toposort()
# onnx_model = gs.export_onnx(graph)

TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
builder = trt.Builder(TRT_LOGGER)
network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
Expand All @@ -35,14 +34,10 @@ def convert_onnx_to_trt(model_path="./model.onnx", output_path="model_trt.trt",
print(parser.get_error(error))
return

# config.max_workspace_size = 1 << 30 # Adjust as needed
# builder.max_workspace_size = get_max_memory()
# builder.fp16_mode = fp16_mode
# builder.max_batch_size = batch_size
config = builder.create_builder_config()
config.fp16_mode = FP16_mode
# config.fp16_mode = FP16_mode
config.max_batch_size = batch_size
config.max_workspace_size = get_max_memory()
config.max_workspace_size = 15

print("Building TensorRT engine. This may take a few minutes.")
engine = builder.build_cuda_engine(network, config)
Expand Down Expand Up @@ -81,9 +76,11 @@ def convert_onnx_to_trt(model_path="./model.onnx", output_path="model_trt.trt",
print("Usage: python3 Onnx_TensorRT.py <model_path> <output_path> FP16_mode batch_size input_shape")
print("Example: python3 Onnx_TensorRT.py ./model.onnx ./model_trt.trt True 1 (1, 3, 224, 224)")

if len(sys.argv) < 2:
convert_onnx_to_trt()
else:
for i in range(len(sys.argv), 6):
sys.argv.append(None)
convert_onnx_to_trt(*sys.argv[1:6])
parser = argparse.ArgumentParser(description='Convert Onnx model to TensorRT')
parser.add_argument('--modelpath', type=str, default="./model.onnx", required=False, help='Path to the PyTorch model file (.pt)')
parser.add_argument('--outputpath', type=str, default="model_trt.trt", required=False, help='Path to save the converted TensorRT model file (.trt)')
# parser.add_argument('--FP16_mode', type=bool, default=True, help='FP16 mode for TensorRT')
parser.add_argument('--batch_size', type=int, default=1, help='Batch size for TensorRT')
args = parser.parse_args()

convert_onnx_to_trt(args.modelpath, args.outputpath, args.batch_size)
2 changes: 1 addition & 1 deletion conversion_tools/PT_ONNX.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def checkConfidenceConsistency(predictions_original, converted_results, toleranc

print("All confidence percentages are consistent")

def convert_pytorch_to_onnx(model_path="./model.pt", output_path="./model.onnx", input_shape=(1,3,224,224), constant_folding=False):
def convert_pytorch_to_onnx(model_path="./model.pt", output_path="./model.onnx", input_shape=(1, 3, 448, 1024), constant_folding=False):
print("Loading the PyTorch model")
model = torch.load(model_path)
model.eval()
Expand Down
33 changes: 26 additions & 7 deletions conversion_tools/PT_TRT.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import argparse
import torch
from torch2trt import torch2trt
import torch_tensorrt
import numpy as np
# import pycuda.driver as cuda
# import pycuda.autoinit
import numpy as np

def get_max_memory():
total, free = cuda.mem_get_info()
Expand All @@ -14,14 +14,33 @@ def get_max_memory():
print(f"Max memory to use: {max_mem / (1024**2)} MB")
return max_mem

def convert_pt_to_trt(model_path='./model.pt', output_path='./model_trt.trt', FP16_mode=True, batch_size=1, input_shape=(1, 3, 224, 224)):
# def convert_pt_to_trt(model_path='./model.pt', output_path='./model_trt.trt', FP16_mode=True, batch_size=1, input_shape=(1, 3, 224, 224)):
# print("Loading the PyTorch model")
# model = torch.load(model_path, weights_only=True)
# model.eval()

# input_data = torch.randn(input_shape).cuda()
# print("Building TensorRT engine. This may take a few minutes.")
# model_trt = torch2trt(model, [input_data], fp16_mode=FP16_mode, max_batch_size=batch_size, max_workspace_size=15000000000) # get_max_memory()
# # torch.save(model_trt.state_dict(), output_file)

# with open(output_path, 'wb') as f:
# f.write(model_trt.engine.serialize())

# print("Engine built successfully")
# print(f"Converted TensorRT engine saved at {output_path}")
# return model_trt

def convert_pt_to_trt(model_path='./model.pt', output_path='./model_trt.trt', batch_size=1, input_shape=(1, 3, 448, 1024)):
print("Loading the PyTorch model")
model = torch.load(model_path)
model.eval()
model = torch.load(model_path, weights_only=True)
model = model().eval().cuda()

input_data = torch.randn(input_shape).cuda()
inputs = [torch.randn(input_shape).cuda()]
trt_gm = torch_tensorrt.compile(model, inputs, ir="dynamo")
torch_tensorrt.save(trt_gm, "trt.ep", inputs=inputs)
print("Building TensorRT engine. This may take a few minutes.")
model_trt = torch2trt(model, [input_data], fp16_mode=FP16_mode, max_batch_size=batch_size, max_workspace_size=15000000000) # get_max_memory()
model_trt = torch2trt(model, [input_data], max_batch_size=batch_size, max_workspace_size=15000000000) # get_max_memory()
# torch.save(model_trt.state_dict(), output_file)

with open(output_path, 'wb') as f:
Expand Down

0 comments on commit 6a7d199

Please sign in to comment.