organized directories, cleaned up conversion tools, need testing

UBCAgroBot · Sep 5, 2024 · 005c532 · 005c532
1 parent 47e47fc
commit 005c532
Show file tree

Hide file tree

Showing 116 changed files with 407 additions and 711 deletions.
diff --git a/Misc Scripts/benchmarking.sh → CI-CD Scripts/benchmarking.sh b/Misc Scripts/benchmarking.sh → CI-CD Scripts/benchmarking.sh
diff --git a/Misc Scripts/delete-logs.sh → CI-CD Scripts/delete-logs.sh b/Misc Scripts/delete-logs.sh → CI-CD Scripts/delete-logs.sh
diff --git a/Misc Scripts/post-job-script.sh → CI-CD Scripts/post-job-script.sh b/Misc Scripts/post-job-script.sh → CI-CD Scripts/post-job-script.sh
diff --git a/Misc Scripts/pre-job-script.sh → CI-CD Scripts/pre-job-script.sh b/Misc Scripts/pre-job-script.sh → CI-CD Scripts/pre-job-script.sh
diff --git a/Misc Scripts/unit-tests.sh → CI-CD Scripts/unit-tests.sh b/Misc Scripts/unit-tests.sh → CI-CD Scripts/unit-tests.sh
diff --git a/Conversion Tools/Onnx_TensorRT.py b/Conversion Tools/Onnx_TensorRT.py
@@ -1,38 +1,11 @@
+import sys
+import onnx
 import tensorrt as trt
 import pycuda.driver as cuda
-
-def convert_onnx_to_engine(onnx_filename, engine_filename = None, max_batch_size = 32, max_workspace_size = 1 << 30, fp16_mode = True):
-    logger = trt.Logger(trt.Logger.WARNING)
-    with trt.Builder(logger) as builder, builder.create_network() as network, trt.OnnxParser(network, logger) as parser:
-        builder.max_workspace_size = max_workspace_size
-        builder.fp16_mode = fp16_mode
-        builder.max_batch_size = max_batch_size
-
-        print("Parsing ONNX file.")
-        with open(onnx_filename, 'rb') as model:
-            if not parser.parse(model.read()):
-                for error in range(parser.num_errors):
-                    print(parser.get_error(error))
-
-        print("Completed parsing of ONNX file.")
-
-        print("Building TensorRT engine. This may take a few minutes.")
-        engine = builder.build_cuda_engine(network)
-
-        if engine_filename:
-            with open(engine_filename, 'wb') as f:
-                f.write(engine.serialize())
-
-        print("Completed building Engine.")
-        return engine, logger
-
-model_name = input("Enter the name of the ONNX model file (without .onnx extension): ")
-engine_name = input("Enter the name of the TensorRT engine file (without .engine extension): ")
-precision = input("Enter the precision (FP32/FP16): ") # INT8 later
-batch_size = int(input("Enter the maximum batch size: "))
+import pycuda.autoinit
+import numpy as np
 
 def get_max_memory():
-    cuda.init()
     total, free = cuda.mem_get_info()
     max_mem = free * 0.95
 
@@ -41,7 +14,58 @@ def get_max_memory():
     print(f"Max memory to use: {max_mem / (1024**2)} MB")
     return max_mem
 
-convert_onnx_to_engine(f"{model_name}.onnx", f"{engine_name}.engine", batch_size, get_max_memory(), precision == "FP16")
+# precision can be FP32, FP16, or INT8. The batch size is the maximum number of samples that can be processed in a single inference. The get_max_memory() function calculates the maximum memory that can be used by the TensorRT engine. The convert_onnx_to_engine() function converts the ONNX model to a TensorRT engine and saves it to a file. The engine is built with the specified precision and batch size.
+def convert_onnx_to_trt(model_path="./model.onnx", output_path="model_trt.trt", FP16_mode = True, batch_size=1, input_shape=(1, 3, 224, 224)):
+    print("Loading the ONNX model")
+    onnx_model = onnx.load(model_path)
+
+    # # Simplify the ONNX model (optional)
+    # graph = gs.import_onnx(onnx_model)
+    # graph.toposort()
+    # onnx_model = gs.export_onnx(graph)
+    TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
+    builder = trt.Builder(TRT_LOGGER)
+    network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
+    parser = trt.OnnxParser(network, TRT_LOGGER)
+
+    with open(model_path, 'rb') as model_file:
+        print("Parsing ONNX model")
+        if not parser.parse(model_file.read()):
+            for error in range(parser.num_errors):
+                print(parser.get_error(error))
+            return
+
+    # config.max_workspace_size = 1 << 30  # Adjust as needed
+    # builder.max_workspace_size = get_max_memory()
+    # builder.fp16_mode = fp16_mode
+    # builder.max_batch_size = batch_size
+    config = builder.create_builder_config()
+    config.fp16_mode = FP16_mode
+    config.max_batch_size = batch_size
+    config.max_workspace_size = get_max_memory()
+
+    print("Building TensorRT engine. This may take a few minutes.")
+    engine = builder.build_cuda_engine(network, config)
+
+    # Serialize the TensorRT engine to a file
+    with open(output_path, 'wb') as f:
+        f.write(engine.serialize())
+
+    print("Engine built successfully")
+    print(f"Converted TensorRT engine saved at {output_path}")    
+    return engine, TRT_LOGGER
+
+
+if __name__ == "__main__":
+    print("Usage: python3 Onnx_TensorRT.py <model_path> <output_path> FP16_mode batch_size input_shape")
+    print("Example: python3 Onnx_TensorRT.py ./model.onnx ./model_trt.trt True 1 (1, 3, 224, 224)")
+
+    if len(sys.argv) < 2:
+        convert_onnx_to_trt()
+    else:
+        for i in range(len(sys.argv), 6):
+            sys.argv.append(None)
+            convert_onnx_to_trt(*sys.argv[1:6])
 
 # improvements
 # Use Dynamic Shapes: If your model supports it, using dynamic input shapes can improve the inference speed. This allows TensorRT to optimize the execution plan based on the actual input shapes at runtime.

diff --git a/Conversion Tools/PyTorch_Onnx.py b/Conversion Tools/PyTorch_Onnx.py
@@ -1,32 +1,103 @@
+import sys
+
 import torch
-import torchvision
+import torch.onnx
+
 import onnx
+# import onnxruntime as ort
+
+import numpy as np
+import pycuda.driver as cuda
+
+OPSET_VERS = 13
+
+# netowrk is the model
+# network.eval()
+# torch_out_load = network(example_data)
+# mct.torch_to_onnx(network, example_data, "torch_model.onnx")
+# ort_session = mct.loadOnnxModel("torch_model.onnx")
+# ort_predictions = mct.predictOnnx(example_data.numpy(), session=ort_session)
+# ort_predictions[0][0] = 3
+# mct.checkPredictionConsistency(torch_out_load.detach().numpy(), ort_predictions)
+# mct.checkConfidenceConsistency(torch_out_load.detach().numpy(), ort_predictions)
+
+# given an array of test inputs and a path to onnx model or a session returns the predictions
+def predictOnnx(x_test,session=None,dest_path=""):
+    if session is None and dest_path == "":
+        raise ValueError("No model or path provided, please specifiy one of them.")
+    if session is None:
+        session = loadOnnxModel(dest_path)
+
+    results_ort = session.run([out.name for out in session.get_outputs()], {session.get_inputs()[0].name: x_test})
+    return np.array(results_ort[0])
+
+# given the predictions from the original model and the converted model, check if they are consistent
+# shape of predictions_original and converted_results should be the same
+# only checks for the predicted class (aka the argmax)
+# takes in two 2D arrays: first dimension is the number of samples,  second dimension is the number of classes and values correspond to confidence
+def checkPredictionConsistency(predictions_original, converted_results):
+    for n in range(predictions_original.shape[0]):
+        if np.argmax(predictions_original[n]) != np.argmax(converted_results[n]):
+            print(f"Original: {np.argmax(predictions_original[n])}, ONNX: {np.argmax(converted_results[n])}")
+            print(f"{predictions_original[n]}, \n{converted_results[n]}")
+            print("=====================================")
+            raise ValueError("Predictions are not consistent")
+
+    print("All predictions are consistent")
+
+# given the predictions from the original model and the converted model, check if they are consistent
+# shape of predictions_original and converted_results should be the same
+# only checks for the difference in confidence
+# takes in two 2D arrays: first dimension is the number of samples,  second dimension is the number of classes and values correspond to confidence
+# tolerance: the maximum difference in confidence that is allowed
+def checkConfidenceConsistency(predictions_original, converted_results, tolerance=1e-5):
+    np.testing.assert_allclose(predictions_original, converted_results,atol=tolerance)
+    # for n in range(predictions_original.shape[0]):
+    #     if not np.allclose(predictions_original[n], converted_results[n], atol=tolerance):
+    #         print(f"Original: \t {predictions_original[n]}, \nONNX: \t{converted_results[n]}")
+    #         print("=====================================")
+    #         return
+
+    print("All confidence percentages are consistent")
+
+def convert_pytorch_to_onnx(model_path="./model.pt", output_path="./model.onnx", input_shape=(1,3,224,224), constant_folding=False):
+    print("Loading the PyTorch model")
+    model = torch.load(model_path)
+    model.eval()
+    # traced_model = torch.jit.trace(model, torch.randn(input_shape))
+
+    input_data = torch.randn(input_shape).cuda()
+
+    print("Exporting model to ONNX format")
+    torch.onnx.export(model, input_data, output_path, 
+                        verbose=True, 
+                        opset_version=OPSET_VERS, 
+                        export_params=True,
+                        do_constant_folding=constant_folding,
+                        input_names = ['input'], 
+                        output_names = ['output'], 
+                        dynamic_axes={'input' : {0 : 'batch_size'}, 
+                                    'output' : {0 : 'batch_size'}}
+                        )
+
+    model = onnx.load(output_path)
+    onnx.checker.check_model(model)
+    print("Model converted successfully")
+    print(model.graph)
+
+    print(f"Converted ONNX model saved at {output_path}")
+    # return loadOnnxModel(output_path)
+
+def loadOnnxModel(path, providers=["CUDAExecutionProvider"]):
+    return ort.InferenceSession(path,providers=providers)
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: python convert_to_trt.py <model_path>")
+        sys.exit(1)
+
+    model_path = sys.argv[1]
+    convert_pytorch_to_onnx(model_path)
 
-model_path = input("Enter the full/absolute path to the model: ")
-model_name = input("Enter the name of the model: ")
-input_dimensions = (224, 224)
-model = torch.load(model_path)
-
-model.eval()
-x = torch.randn(1, 3, input_dimensions[0], input_dimensions[1])
-traced_model = torch.jit.trace(model, x)
-
-torch.onnx.export(traced_model,               
-                  x,                         
-                  f"{model_name}.onnx",   
-                  export_params=True,        
-                  opset_version=10,          
-                  do_constant_folding=True,  
-                  input_names = ['input'],   
-                  output_names = ['output'], 
-                  dynamic_axes={'input' : {0 : 'batch_size'},    
-                                'output' : {0 : 'batch_size'}})
-
-model = onnx.load(f"{model_name}.onnx")
-print("Model converted successfully")
-print(model.graph)
-
-# improvements:
 # Model Pruning: Pruning is a technique in deep learning where you remove the weights of certain neurons which are less important. This can help in reducing the size of the model and hence improve the performance during conversion.
-# Quantization: Quantization is a process that reduces the numerical precision of the model's weights, which can lead to a significant reduction in both the memory requirement and computational cost of the model.
-# Set the Appropriate Opset Version: The ONNX opset version corresponds to the set of operators and their versions supported. Newer opset versions can have optimizations that were not available in previous versions. You can set the opset version with the opset_version parameter in torch.onnx.export(). The latest version as of ONNX 1.8.0 is 13.
+# Quantization: Quantization is a process that reduces the numerical precision of the model's weights, which can lead to a significant reduction in both the memory requirement and computational cost of the model.
diff --git a/Conversion Tools/PyTorch_TensorFlow.py b/Conversion Tools/PyTorch_TensorFlow.py
diff --git a/Conversion Tools/PyTorch_TensorRT.py b/Conversion Tools/PyTorch_TensorRT.py
@@ -0,0 +1,43 @@
+import sys
+import torch
+from torch2trt import torch2trt
+import pycuda.driver as cuda
+import pycuda.autoinit
+import numpy as np
+
+def get_max_memory():
+    total, free = cuda.mem_get_info()
+    max_mem = free * 0.95
+
+    print(f"Total GPU memory: {total / (1024**2)} MB")
+    print(f"Free GPU memory: {free / (1024**2)} MB")
+    print(f"Max memory to use: {max_mem / (1024**2)} MB")
+    return max_mem
+
+def convert_pt_to_trt(model_path='./model.pt', output_path='./model_trt.trt', FP16_mode=True, batch_size=1, input_shape=(1, 3, 224, 224)):
+    print("Loading the PyTorch model")
+    model = torch.load(model_path)
+    model.eval()
+
+    input_data = torch.randn(input_shape).cuda()
+    print("Building TensorRT engine. This may take a few minutes.")
+    model_trt = torch2trt(model, [input_data], fp16_mode=FP16_mode, max_batch_size=batch_size, max_workspace_size=get_max_memory())
+
+    with open(output_path, 'wb') as f:
+        f.write(model_trt.engine.serialize())
+
+    print("Engine built successfully")
+    print(f"Converted TensorRT engine saved at {output_path}")    
+    return model_trt
+
+
+if __name__ == "__main__":
+    print("Usage: python3 PyTorch_TensorRT.py <model_path> <output_path> FP16_mode batch_size input_shape")
+    print("Example: python3 PyTorch_TensorRT.py ./model.pt ./model_trt.trt True 1 (1, 3, 224, 224)")
+
+    if len(sys.argv) < 2:
+        convert_pt_to_trt()
+    else:
+        for i in range(len(sys.argv), 6):
+            sys.argv.append(None)
+            convert_pt_to_trt(*sys.argv[1:6])
diff --git a/Conversion Tools/README.md b/Conversion Tools/README.md