Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add TensorRT YOLO implementation #17

Draft
wants to merge 16 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions .clang-format
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
---
BasedOnStyle: Google
AccessModifierOffset: -2
ConstructorInitializerIndentWidth: 2
AlignEscapedNewlinesLeft: false
AlignTrailingComments: true
AllowAllParametersOfDeclarationOnNextLine: false
AllowShortIfStatementsOnASingleLine: false
AllowShortLoopsOnASingleLine: false
AllowShortFunctionsOnASingleLine: None
AlwaysBreakTemplateDeclarations: true
AlwaysBreakBeforeMultilineStrings: false
BreakBeforeBinaryOperators: false
BreakBeforeTernaryOperators: false
BreakConstructorInitializersBeforeComma: true
BinPackParameters: true
ColumnLimit: 120
ConstructorInitializerAllOnOneLineOrOnePerLine: true
DerivePointerBinding: false
PointerBindsToType: true
ExperimentalAutoDetectBinPacking: false
IndentCaseLabels: true
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCSpaceBeforeProtocolList: true
PenaltyBreakBeforeFirstCallParameter: 19
PenaltyBreakComment: 60
PenaltyBreakString: 100
PenaltyBreakFirstLessLess: 1000
PenaltyExcessCharacter: 1000
PenaltyReturnTypeOnItsOwnLine: 70
SpacesBeforeTrailingComments: 2
Cpp11BracedListStyle: false
Standard: Auto
IndentWidth: 2
TabWidth: 2
UseTab: Never
IndentFunctionDeclarationAfterType: false
SpacesInParentheses: false
SpacesInAngles: false
SpaceInEmptyParentheses: false
SpacesInCStyleCastParentheses: false
SpaceAfterControlStatementKeyword: true
SpaceBeforeAssignmentOperators: true
ContinuationIndentWidth: 4
SortIncludes: false
SpaceAfterCStyleCast: false

# Configure each individual brace in BraceWrapping
BreakBeforeBraces: Custom

# Control of individual brace wrapping cases
BraceWrapping: {
AfterClass: 'true'
AfterControlStatement: 'true'
AfterEnum : 'true'
AfterFunction : 'true'
AfterNamespace : 'true'
AfterStruct : 'true'
AfterUnion : 'true'
BeforeCatch : 'true'
BeforeElse : 'true'
IndentBraces : 'false'
}
49 changes: 49 additions & 0 deletions .clang-tidy
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
---
Checks: '-*,
performance-*,
llvm-namespace-comment,
modernize-redundant-void-arg,
modernize-use-nullptr,
modernize-use-default,
modernize-use-override,
modernize-loop-convert,
readability-named-parameter,
readability-redundant-smartptr-get,
readability-redundant-string-cstr,
readability-simplify-boolean-expr,
readability-container-size-empty,
readability-identifier-naming,
'
HeaderFilterRegex: ''
AnalyzeTemporaryDtors: false
CheckOptions:
- key: llvm-namespace-comment.ShortNamespaceLines
value: '10'
- key: llvm-namespace-comment.SpacesBeforeComments
value: '2'
- key: readability-braces-around-statements.ShortStatementLines
value: '2'
# type names
- key: readability-identifier-naming.ClassCase
value: CamelCase
- key: readability-identifier-naming.EnumCase
value: CamelCase
- key: readability-identifier-naming.UnionCase
value: CamelCase
# method names
- key: readability-identifier-naming.MethodCase
value: camelBack
# variable names
- key: readability-identifier-naming.VariableCase
value: lower_case
- key: readability-identifier-naming.ClassMemberSuffix
value: '_'
# const static or global variables are UPPER_CASE
- key: readability-identifier-naming.EnumConstantCase
value: UPPER_CASE
- key: readability-identifier-naming.StaticConstantCase
value: UPPER_CASE
- key: readability-identifier-naming.ClassConstantCase
value: UPPER_CASE
- key: readability-identifier-naming.GlobalVariableCase
value: UPPER_CASE
2 changes: 1 addition & 1 deletion .flake8
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[flake8]
max-line-length = 160
exclude = tests/*
exclude = tests/* uw_detection/src/yolov3
max-complexity = 10
# We import modules here to make the API cleaner
per-file-ignores = **/recognize_speech/__init__.py:F401
5 changes: 4 additions & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ jobs:
env:
AFTER_SCRIPT: 'cd $target_ws && rosenv && catkin build $(catkin list --depends-on roslint -u) --no-deps --no-status --verbose --make-args roslint'
CATKIN_LINT: true
CATKIN_LINT_ARGS: "--ignore missing_directory --ignore external_link_directory"
ROS_DISTRO: melodic
ROS_REPO: main
strategy:
Expand All @@ -15,7 +16,9 @@ jobs:
- {}
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
- uses: actions/checkout@v2
with:
submodules: true
- uses: 'ros-industrial/industrial_ci@master'
env: ${{matrix.env}}
lint:
Expand Down
70 changes: 70 additions & 0 deletions uw_detection/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
cmake_minimum_required(VERSION 2.8.12)
project(uw_detection)

find_package(catkin REQUIRED COMPONENTS
cv_bridge
geometry_msgs
image_transport
message_generation
message_filters
message_runtime
roscpp
sensor_msgs
std_msgs
std_srvs
tf2
tf2_eigen
visualization_msgs)

catkin_python_setup()

catkin_package()


option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
set(CMAKE_CXX_STANDARD 11)

find_package(OpenCV REQUIRED)
include_directories(OpenCV_INCLUDE_DIRS
${catkin_INCLUDE_DIRS}
include)

find_package(CUDA)

if (CUDA_FOUND)
set(CUDA_NVCC_PLAGS ${CUDA_NVCC_PLAGS};-std=c++11;-g;-G;-gencode;arch=compute_30;code=sm_30)


if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
message("embed_platform on")
include_directories(/usr/local/cuda/targets/aarch64-linux/include)
link_directories(/usr/local/cuda/targets/aarch64-linux/lib)
else()
message("embed_platform off")
include_directories(/usr/local/cuda/include)
link_directories(/usr/local/cuda/lib64)
endif()


set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Ofast -Wfatal-errors -D_MWAITXINTRIN_H_INCLUDED")

cuda_add_library(yololayer SHARED src/yolov3/yololayer.cu)

add_library(yolov3-spp src/yolov3/yolov3-spp.cpp)
target_link_libraries(yolov3-spp nvinfer cudart yololayer ${OpenCV_LIBS})

add_library(yolov3 src/yolov3/yolov3.cpp)
target_link_libraries(yolov3 nvinfer cudart yololayer ${OpenCV_LIBS})

add_executable(yolov3_spp_prepare_plan src/yolov3_spp_prepare_plan.cpp)
target_link_libraries(yolov3_spp_prepare_plan yolov3 nvinfer cudart yololayer ${OpenCV_LIBS} ${catkin_LIBRARIES})

add_executable(yolov3_prepare_plan src/yolov3_prepare_plan.cpp)
target_link_libraries(yolov3_prepare_plan yolov3 nvinfer cudart yololayer ${OpenCV_LIBS} ${catkin_LIBRARIES})

add_executable(yolo_ros_image_node src/yolo_ros_image_node.cpp)
target_link_libraries(yolo_ros_image_node yolov3 nvinfer cudart yololayer ${OpenCV_LIBS} ${catkin_LIBRARIES})

add_definitions(-O2 -pthread)
endif()

17 changes: 17 additions & 0 deletions uw_detection/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# uw_detection

Wrappers for detection models. Tools for preparing data and training.

TensorRT YOLOv3 implementations taken from [tensorrtx](https://github.com/wang-xinyu/tensorrtx). Additional modifactions based on [this implementation](https://github.com/jkjung-avt/tensorrt_demos/blob/master/plugins/yolo_layer.cu#L39)

## Usage

### YOLO

All of the YOLO targets described here are only available when CMake can detect a working CUDA and TensorRT installation. Ensure these are set up correctly before beginning. They can't be run on the CPU

These implementations come with a helper for loading weights from the Ultralytics PyTorch trained models. Follow the steps in [their readme](https://github.com/wang-xinyu/tensorrtx/blob/master/yolov3/README.md) (the process involves using one of the `gen_wts.py` scripts to load the Torch graph and dump it into the necessary format). You may need to change the CUDA device set in the script.

TensorRT requires that you precompute an inference graph ("engine") before you can actually use the weights you've obtained. The `*_prepare_plan.cpp` targets will take care of this. They expect the weights file to be placed in the `share` folder at the root of the package. When they've run successfully (only required once), they'll output the serialized engine into the `share` folder. Note that if you're using a customized class list, you'll need to adjust the definition for the number of classes in "yololayer.h".

Now you can test the model on a ROS image topic using `yolo_ros_image_node.cpp`. It's configured to take images from Fetch's head camera topic and feed them through YOLOv3, outputting the results to the console and drawing them in a CV window. It'll work in simulation without changes assuming you've followed the previous steps for the Ultralytics provided yolov3 checkpoint.
94 changes: 94 additions & 0 deletions uw_detection/include/uw_detection/Utils.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
#ifndef __TRT_UTILS_H_
#define __TRT_UTILS_H_

#include <iostream>
#include <vector>
#include <algorithm>
#include <cudnn.h>

#ifndef CUDA_CHECK

#define CUDA_CHECK(callstr) \
{ \
cudaError_t error_code = callstr; \
if (error_code != cudaSuccess) { \
std::cerr << "CUDA error " << error_code << " at " << __FILE__ << ":" << __LINE__; \
assert(0); \
} \
}

#endif

namespace Tn
{
class Profiler : public nvinfer1::IProfiler
{
public:
void printLayerTimes(int itrationsTimes)
{
float totalTime = 0;
for (size_t i = 0; i < mProfile.size(); i++)
{
printf("%-40.40s %4.3fms\n", mProfile[i].first.c_str(), mProfile[i].second / itrationsTimes);
totalTime += mProfile[i].second;
}
printf("Time over all layers: %4.3f\n", totalTime / itrationsTimes);
}
private:
typedef std::pair<std::string, float> Record;
std::vector<Record> mProfile;

virtual void reportLayerTime(const char* layerName, float ms)
{
auto record = std::find_if(mProfile.begin(), mProfile.end(), [&](const Record& r){ return r.first == layerName; });
if (record == mProfile.end())
mProfile.push_back(std::make_pair(layerName, ms));
else
record->second += ms;
}
};

//Logger for TensorRT info/warning/errors
class Logger : public nvinfer1::ILogger
{
public:

Logger(): Logger(Severity::kWARNING) {}

Logger(Severity severity): reportableSeverity(severity) {}

void log(Severity severity, const char* msg) override
{
// suppress messages with severity enum value greater than the reportable
if (severity > reportableSeverity) return;

switch (severity)
{
case Severity::kINTERNAL_ERROR: std::cerr << "INTERNAL_ERROR: "; break;
case Severity::kERROR: std::cerr << "ERROR: "; break;
case Severity::kWARNING: std::cerr << "WARNING: "; break;
case Severity::kINFO: std::cerr << "INFO: "; break;
default: std::cerr << "UNKNOWN: "; break;
}
std::cerr << msg << std::endl;
}

Severity reportableSeverity{Severity::kWARNING};
};

template<typename T>
void write(char*& buffer, const T& val)
{
*reinterpret_cast<T*>(buffer) = val;
buffer += sizeof(T);
}

template<typename T>
void read(const char*& buffer, T& val)
{
val = *reinterpret_cast<const T*>(buffer);
buffer += sizeof(T);
}
}

#endif
Loading