-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
organized directories, cleaned up conversion tools, need testing
- Loading branch information
1 parent
47e47fc
commit 005c532
Showing
116 changed files
with
407 additions
and
711 deletions.
There are no files selected for viewing
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,32 +1,103 @@ | ||
import sys | ||
|
||
import torch | ||
import torchvision | ||
import torch.onnx | ||
|
||
import onnx | ||
# import onnxruntime as ort | ||
|
||
import numpy as np | ||
import pycuda.driver as cuda | ||
|
||
OPSET_VERS = 13 | ||
|
||
# netowrk is the model | ||
# network.eval() | ||
# torch_out_load = network(example_data) | ||
# mct.torch_to_onnx(network, example_data, "torch_model.onnx") | ||
# ort_session = mct.loadOnnxModel("torch_model.onnx") | ||
# ort_predictions = mct.predictOnnx(example_data.numpy(), session=ort_session) | ||
# ort_predictions[0][0] = 3 | ||
# mct.checkPredictionConsistency(torch_out_load.detach().numpy(), ort_predictions) | ||
# mct.checkConfidenceConsistency(torch_out_load.detach().numpy(), ort_predictions) | ||
|
||
# given an array of test inputs and a path to onnx model or a session returns the predictions | ||
def predictOnnx(x_test,session=None,dest_path=""): | ||
if session is None and dest_path == "": | ||
raise ValueError("No model or path provided, please specifiy one of them.") | ||
if session is None: | ||
session = loadOnnxModel(dest_path) | ||
|
||
results_ort = session.run([out.name for out in session.get_outputs()], {session.get_inputs()[0].name: x_test}) | ||
return np.array(results_ort[0]) | ||
|
||
# given the predictions from the original model and the converted model, check if they are consistent | ||
# shape of predictions_original and converted_results should be the same | ||
# only checks for the predicted class (aka the argmax) | ||
# takes in two 2D arrays: first dimension is the number of samples, second dimension is the number of classes and values correspond to confidence | ||
def checkPredictionConsistency(predictions_original, converted_results): | ||
for n in range(predictions_original.shape[0]): | ||
if np.argmax(predictions_original[n]) != np.argmax(converted_results[n]): | ||
print(f"Original: {np.argmax(predictions_original[n])}, ONNX: {np.argmax(converted_results[n])}") | ||
print(f"{predictions_original[n]}, \n{converted_results[n]}") | ||
print("=====================================") | ||
raise ValueError("Predictions are not consistent") | ||
|
||
print("All predictions are consistent") | ||
|
||
# given the predictions from the original model and the converted model, check if they are consistent | ||
# shape of predictions_original and converted_results should be the same | ||
# only checks for the difference in confidence | ||
# takes in two 2D arrays: first dimension is the number of samples, second dimension is the number of classes and values correspond to confidence | ||
# tolerance: the maximum difference in confidence that is allowed | ||
def checkConfidenceConsistency(predictions_original, converted_results, tolerance=1e-5): | ||
np.testing.assert_allclose(predictions_original, converted_results,atol=tolerance) | ||
# for n in range(predictions_original.shape[0]): | ||
# if not np.allclose(predictions_original[n], converted_results[n], atol=tolerance): | ||
# print(f"Original: \t {predictions_original[n]}, \nONNX: \t{converted_results[n]}") | ||
# print("=====================================") | ||
# return | ||
|
||
print("All confidence percentages are consistent") | ||
|
||
def convert_pytorch_to_onnx(model_path="./model.pt", output_path="./model.onnx", input_shape=(1,3,224,224), constant_folding=False): | ||
print("Loading the PyTorch model") | ||
model = torch.load(model_path) | ||
model.eval() | ||
# traced_model = torch.jit.trace(model, torch.randn(input_shape)) | ||
|
||
input_data = torch.randn(input_shape).cuda() | ||
|
||
print("Exporting model to ONNX format") | ||
torch.onnx.export(model, input_data, output_path, | ||
verbose=True, | ||
opset_version=OPSET_VERS, | ||
export_params=True, | ||
do_constant_folding=constant_folding, | ||
input_names = ['input'], | ||
output_names = ['output'], | ||
dynamic_axes={'input' : {0 : 'batch_size'}, | ||
'output' : {0 : 'batch_size'}} | ||
) | ||
|
||
model = onnx.load(output_path) | ||
onnx.checker.check_model(model) | ||
print("Model converted successfully") | ||
print(model.graph) | ||
|
||
print(f"Converted ONNX model saved at {output_path}") | ||
# return loadOnnxModel(output_path) | ||
|
||
def loadOnnxModel(path, providers=["CUDAExecutionProvider"]): | ||
return ort.InferenceSession(path,providers=providers) | ||
|
||
if __name__ == "__main__": | ||
if len(sys.argv) != 2: | ||
print("Usage: python convert_to_trt.py <model_path>") | ||
sys.exit(1) | ||
|
||
model_path = sys.argv[1] | ||
convert_pytorch_to_onnx(model_path) | ||
|
||
model_path = input("Enter the full/absolute path to the model: ") | ||
model_name = input("Enter the name of the model: ") | ||
input_dimensions = (224, 224) | ||
model = torch.load(model_path) | ||
|
||
model.eval() | ||
x = torch.randn(1, 3, input_dimensions[0], input_dimensions[1]) | ||
traced_model = torch.jit.trace(model, x) | ||
|
||
torch.onnx.export(traced_model, | ||
x, | ||
f"{model_name}.onnx", | ||
export_params=True, | ||
opset_version=10, | ||
do_constant_folding=True, | ||
input_names = ['input'], | ||
output_names = ['output'], | ||
dynamic_axes={'input' : {0 : 'batch_size'}, | ||
'output' : {0 : 'batch_size'}}) | ||
|
||
model = onnx.load(f"{model_name}.onnx") | ||
print("Model converted successfully") | ||
print(model.graph) | ||
|
||
# improvements: | ||
# Model Pruning: Pruning is a technique in deep learning where you remove the weights of certain neurons which are less important. This can help in reducing the size of the model and hence improve the performance during conversion. | ||
# Quantization: Quantization is a process that reduces the numerical precision of the model's weights, which can lead to a significant reduction in both the memory requirement and computational cost of the model. | ||
# Set the Appropriate Opset Version: The ONNX opset version corresponds to the set of operators and their versions supported. Newer opset versions can have optimizations that were not available in previous versions. You can set the opset version with the opset_version parameter in torch.onnx.export(). The latest version as of ONNX 1.8.0 is 13. | ||
# Quantization: Quantization is a process that reduces the numerical precision of the model's weights, which can lead to a significant reduction in both the memory requirement and computational cost of the model. |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
import sys | ||
import torch | ||
from torch2trt import torch2trt | ||
import pycuda.driver as cuda | ||
import pycuda.autoinit | ||
import numpy as np | ||
|
||
def get_max_memory(): | ||
total, free = cuda.mem_get_info() | ||
max_mem = free * 0.95 | ||
|
||
print(f"Total GPU memory: {total / (1024**2)} MB") | ||
print(f"Free GPU memory: {free / (1024**2)} MB") | ||
print(f"Max memory to use: {max_mem / (1024**2)} MB") | ||
return max_mem | ||
|
||
def convert_pt_to_trt(model_path='./model.pt', output_path='./model_trt.trt', FP16_mode=True, batch_size=1, input_shape=(1, 3, 224, 224)): | ||
print("Loading the PyTorch model") | ||
model = torch.load(model_path) | ||
model.eval() | ||
|
||
input_data = torch.randn(input_shape).cuda() | ||
print("Building TensorRT engine. This may take a few minutes.") | ||
model_trt = torch2trt(model, [input_data], fp16_mode=FP16_mode, max_batch_size=batch_size, max_workspace_size=get_max_memory()) | ||
|
||
with open(output_path, 'wb') as f: | ||
f.write(model_trt.engine.serialize()) | ||
|
||
print("Engine built successfully") | ||
print(f"Converted TensorRT engine saved at {output_path}") | ||
return model_trt | ||
|
||
|
||
if __name__ == "__main__": | ||
print("Usage: python3 PyTorch_TensorRT.py <model_path> <output_path> FP16_mode batch_size input_shape") | ||
print("Example: python3 PyTorch_TensorRT.py ./model.pt ./model_trt.trt True 1 (1, 3, 224, 224)") | ||
|
||
if len(sys.argv) < 2: | ||
convert_pt_to_trt() | ||
else: | ||
for i in range(len(sys.argv), 6): | ||
sys.argv.append(None) | ||
convert_pt_to_trt(*sys.argv[1:6]) |
Empty file.
Oops, something went wrong.