Skip to content

Commit

Permalink
feat(yolov8_obb): Implement obb task, add yolov8_obb sample.
Browse files Browse the repository at this point in the history
Change-Id: I1104b1ee00a69786f45c1f04ba4117d34f836dcf
  • Loading branch information
sophon-leevi authored and yizhou-xu committed Nov 7, 2024
1 parent 1bf4446 commit 0aa5339
Show file tree
Hide file tree
Showing 16 changed files with 722 additions and 4 deletions.
2 changes: 1 addition & 1 deletion element/algorithm/yolov8/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ sophon-stream yolov8插件具有一些可配置的参数,可以根据需求进
| model_path | 字符串 | "../data/models/BM1684X/yolov8s_int8_1b.bmodel" | yolov8模型路径 |
| threshold_conf | 浮点数或map | 0.5 | 目标检测物体置信度阈值,设置为浮点数时,所有类别共用同一个阈值;设置为map时,不同类别可以使用不同阈值,此时还需要正确设置class_names_file |
| threshold_nms | 浮点数 | 0.5 | 目标检测NMS IOU阈值 |
| task_type | 字符串 | "Detect" | yolov8算法类型,支持了 "Detect", "Cls", "Pose""Seg" |
| task_type | 字符串 | "Detect" | yolov8算法类型,支持了 "Detect", "Cls", "Pose", "Seg"和"obb" |
| bgr2rgb | bool | true | 解码器解出来的图像默认是bgr格式,是否需要将图像转换成rgb格式 |
| mean | 浮点数组 || 图像前处理均值,长度为3;计算方式为: y=(x-mean)/std;若bgr2rgb=true,数组中数组顺序需为r、g、b,否则需为b、g、r |
| std | 浮点数组 || 图像前处理方差,长度为3;计算方式同上;若bgr2rgb=true数组中数组顺序需为r、g、b,否则需为b、g、r |
Expand Down
2 changes: 1 addition & 1 deletion element/algorithm/yolov8/README_EN.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ The Sophon-Stream YOLOv8 plugin has several configurable parameters that can be
| threshold_conf | float/map | 0.5 | Object detection confidence threshold. When set as a float number, all categories share the same threshold. When set as a map, different categories can have different thresholds. In second case, it's necessary to correctly set the class_names_file. |
| threshold_nms | float | 0.5 | NMS Threshold |
| bgr2rgb | bool | true | The images decoded by the decoder are in the default BGR format. whether a need to convert the images to the RGB format |
| task_type | string | "Detect" | yolov8 alg type, supports "Detect", "Cls", "Pose" and "Seg" |
| task_type | string | "Detect" | yolov8 alg type, supports "Detect", "Cls", "Pose, "Seg" and "obb" |
| mean | float[] | \ | The image preprocessing requires mean values in an array of length 3. The formula used for calculation is `y=(x-mean)/std` . When bgr2rgb is set to true, the array should be in RGB order; otherwise, it should be in BGR order. |
| std | float[] | \ | The image preprocessing involves variance values in an array of length 3. The calculation method remains the same. When bgr2rgb is set to true, the array should be in RGB order; otherwise, it should be in BGR order. |
| stage | queue | ["pre"] | The three stages include preprocessing, inference, and postprocessing. |
Expand Down
2 changes: 1 addition & 1 deletion element/algorithm/yolov8/include/yolov8_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ namespace yolov8 {

#define USE_ASPECT_RATIO 1

enum class TaskType { Detect = 0, Pose, Cls, Seg };
enum class TaskType { Detect = 0, Pose, Cls, Seg, Obb };

class Yolov8Context : public ::sophon_stream::element::Context {
public:
Expand Down
15 changes: 15 additions & 0 deletions element/algorithm/yolov8/include/yolov8_post_process.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@ struct Paras {

using YoloV8BoxVec = std::vector<YoloV8Box>;

struct obbBox{
float x, y, w, h, angle, score;
int class_id;
};
using obbBoxVec = std::vector<obbBox>;

class Yolov8PostProcess : public ::sophon_stream::element::PostProcess {
public:
void init(std::shared_ptr<Yolov8Context> context);
Expand Down Expand Up @@ -71,6 +77,8 @@ class Yolov8PostProcess : public ::sophon_stream::element::PostProcess {
common::ObjectMetadatas& objectMetadatas);
void postProcessSeg(std::shared_ptr<Yolov8Context> context,
common::ObjectMetadatas& objectMetadatas);
void postProcessObb(std::shared_ptr<Yolov8Context> context,
common::ObjectMetadatas& objectMetadatas);
void clip_boxes(YoloV8BoxVec& yolobox_vec, int src_w, int src_h);

// yolov8 seg
Expand All @@ -81,6 +89,13 @@ class Yolov8PostProcess : public ::sophon_stream::element::PostProcess {
YoloV8BoxVec& yolov8box_input, int start,
const bm_tensor_t& segmentation_tensor, Paras& paras,
YoloV8BoxVec& yolov8box_output, float confThreshold);

//obb utils.
void nms_rotated(obbBoxVec& dets, float nmsConfidence = 0.5);
std::tuple<float, float, float> convariance_matrix(const obbBox& obb);
float probiou(const obbBox& obb1, const obbBox& obb2, float eps = 1e-7);
void regularize_rbox(obbBoxVec& obb);
common::ObbObjectMetadata xywhr2xyxyxyxy(const obbBox& obb);
};

} // namespace yolov8
Expand Down
14 changes: 13 additions & 1 deletion element/algorithm/yolov8/src/yolov8.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ const std::string Yolov8::elementName = "yolov8";
std::unordered_map<std::string, TaskType> taskMap{{"Detect", TaskType::Detect},
{"Pose", TaskType::Pose},
{"Cls", TaskType::Cls},
{"Seg", TaskType::Seg}};
{"Seg", TaskType::Seg},
{"Obb", TaskType::Obb}};

common::ErrorCode Yolov8::initContext(const std::string& json) {
common::ErrorCode errorCode = common::ErrorCode::SUCCESS;
Expand Down Expand Up @@ -210,6 +211,17 @@ common::ErrorCode Yolov8::initContext(const std::string& json) {
mContext->class_num =
mContext->bmNetwork->outputTensor(0)->get_shape()->dims[1] -
mContext->mask_len - 4;
else if (mContext->taskType == TaskType::Obb){
int ndim1 = mContext->bmNetwork->outputTensor(0)->get_shape()->dims[1];
int ndim2 = mContext->bmNetwork->outputTensor(0)->get_shape()->dims[2];
if (ndim1 < ndim2){
IVS_CRITICAL(
"We only support bmodel's output_shape like [N, box_num, nout], usually box_num > nout. "
"But your bmodel's shape is [{0:d}, {1:d}, {2:d}].", mContext->max_batch, ndim1, ndim2);
abort();
}
mContext->class_num = mContext->bmNetwork->outputTensor(0)->get_shape()->dims[2] - 5;
}
}

if (mContext->class_thresh_valid) {
Expand Down
237 changes: 237 additions & 0 deletions element/algorithm/yolov8/src/yolov8_post_process.cc
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ void Yolov8PostProcess::postProcess(std::shared_ptr<Yolov8Context> context,
postProcessCls(context, objectMetadatas);
else if (context->taskType == TaskType::Seg)
postProcessSeg(context, objectMetadatas);
else if (context->taskType == TaskType::Obb)
postProcessObb(context, objectMetadatas);
}

void Yolov8PostProcess::clip_boxes(YoloV8BoxVec& yolobox_vec, int src_w,
Expand Down Expand Up @@ -1039,6 +1041,241 @@ void Yolov8PostProcess::get_mask(std::shared_ptr<Yolov8Context> context,
mask_out = mask(bound) > context->thresh_nms;
}

void Yolov8PostProcess::postProcessObb(
std::shared_ptr<Yolov8Context> context,
common::ObjectMetadatas& objectMetadatas) {
// Yolov8 obb vec
obbBoxVec yolobox_vec;

int idx = 0;
for (auto obj : objectMetadatas) {
if (obj->mFrame->mEndOfStream) break;
std::vector<std::shared_ptr<BMNNTensor>> outputTensors(context->output_num);
for (int i = 0; i < context->output_num; i++) {
outputTensors[i] = std::make_shared<BMNNTensor>(
obj->mOutputBMtensors->handle,
context->bmNetwork->m_netinfo->output_names[i],
context->bmNetwork->m_netinfo->output_scales[i],
obj->mOutputBMtensors->tensors[i].get(), context->bmNetwork->is_soc);
}

int frame_width = obj->mFrame->mSpData->width;
int frame_height = obj->mFrame->mSpData->height;
int tx1 = 0, ty1 = 0;
float ratio = 1.0;
#ifdef USE_ASPECT_RATIO
bool isAlignWidth = false;
ratio =
context->roi_predefined
? get_aspect_scaled_ratio(context->roi.crop_w, context->roi.crop_h,
context->net_w, context->net_h,
&isAlignWidth)
: get_aspect_scaled_ratio(frame_width, frame_height, context->net_w,
context->net_h, &isAlignWidth);
if (isAlignWidth) {
ty1 = (int)((context->net_h -
(int)((context->roi_predefined ? context->roi.crop_h
: frame_height) *
ratio)) /
2);
} else {
tx1 = (int)((context->net_w -
(int)((context->roi_predefined ? context->roi.crop_w
: frame_width) *
ratio)) /
2);
}
#endif
int min_idx = 0;
int box_num = 0;
for (int i = 0; i < context->output_num; ++i) {
auto output_shape = context->bmNetwork->outputTensor(i)->get_shape();
auto output_dims = output_shape->num_dims;
assert(output_dims == 3 || output_dims == 5);
if (output_dims == 5) {
box_num += output_shape->dims[1] * output_shape->dims[2] *
output_shape->dims[3];
}

if (context->min_dim > output_dims) {
min_idx = i;
context->min_dim = output_dims;
}
}
// mask info
int mask_num = 0;
auto out_tensor = outputTensors[min_idx];
int m_class_num = out_tensor->get_shape()->dims[2] - mask_num - 5;
int feature_num = out_tensor->get_shape()->dims[1]; // 8400
int nout = m_class_num + mask_num + 5;
int max_wh = 7680; // (pixels) maximum box width and height
bool agnostic = false;

float* output_data = nullptr;
std::vector<float> decoded_data;

if (context->min_dim == 3 && context->output_num != 1) {
std::cout << "--> WARNING: the current bmodel has redundant outputs"
<< std::endl;
std::cout << " you can remove the redundant outputs to "
"improve performance"
<< std::endl;
std::cout << std::endl;
}

assert(box_num == 0 || box_num == out_tensor->get_shape()->dims[1]);
box_num = out_tensor->get_shape()->dims[1];
output_data =
(float*)out_tensor->get_cpu_data(); // 如果只有一张图片不要需修改

// Candidates
float* cls_conf = output_data + 4; //output_tensor's last dim: [x, y, w, h, cls_conf0, ..., cls_conf14, rotate_angle]
for (int i = 0; i < box_num; i++) {
// multilabel
for (int j = 0; j < m_class_num; j++) {
float cur_conf = cls_conf[i * nout + j];
float cur_class_thresh =
context->class_thresh_valid
? context->thresh_conf[context->class_names[j]]
: context->thresh_conf_min;
if (cur_conf > cur_class_thresh) {
obbBox box;
box.score = cur_conf;
box.class_id = j;
int c = agnostic ? 0 : box.class_id * max_wh;
box.x = output_data[i * nout + 0] + c;
box.y = output_data[i * nout + 1] + c;
box.w = output_data[i * nout + 2];
box.h = output_data[i * nout + 3];
box.angle = output_data[(i + 1) * nout - 1];
yolobox_vec.push_back(box);
}
}
}
nms_rotated(yolobox_vec, context->thresh_nms);
if (yolobox_vec.size() > max_det) {
yolobox_vec.erase(yolobox_vec.begin(), yolobox_vec.begin() + (yolobox_vec.size() - max_det));
}
if(!agnostic){
for (int i = 0; i < yolobox_vec.size(); i++) {
int c = yolobox_vec[i].class_id * max_wh;
yolobox_vec[i].x = yolobox_vec[i].x - c;
yolobox_vec[i].y = yolobox_vec[i].y - c;
}
}
regularize_rbox(yolobox_vec);
float inv_ratio = 1.0 / ratio;
for (int i = 0; i < yolobox_vec.size(); i++) {
yolobox_vec[i].x = std::round((yolobox_vec[i].x - tx1) * inv_ratio);
yolobox_vec[i].y = std::round((yolobox_vec[i].y - ty1) * inv_ratio);
yolobox_vec[i].w = std::round(yolobox_vec[i].w * inv_ratio);
yolobox_vec[i].h = std::round(yolobox_vec[i].h * inv_ratio);
}

for (auto bbox : yolobox_vec) {
std::shared_ptr<common::ObbObjectMetadata> obbData =
std::make_shared<common::ObbObjectMetadata>(xywhr2xyxyxyxy(bbox));

if (context->roi_predefined) {
obbData->add_offset(context->roi.start_x, context->roi.start_y);
}
obj->mObbObjectMetadatas.push_back(obbData);
}
++idx;
}
}


void Yolov8PostProcess::regularize_rbox(obbBoxVec& obbVec){
for(auto& obb : obbVec){
if(obb.h > obb.w){
std::swap(obb.w, obb.h);
obb.angle = obb.angle + M_PI / 2;
}
obb.angle = std::fmod(obb.angle, M_PI);
if(obb.angle < 0){
obb.angle += M_PI;
}
}
}

std::tuple<float, float, float> Yolov8PostProcess::convariance_matrix(const obbBox& obb){
float w = obb.w;
float h = obb.h;
float r = obb.angle;
float a = w * w / 12.0;
float b = h * h / 12.0;
float cos_r = std::cos(r);
float sin_r = std::sin(r);
float a_val = a * cos_r * cos_r + b * sin_r * sin_r;
float b_val = a * sin_r * sin_r + b * cos_r * cos_r;
float c_val = (a - b) * cos_r * sin_r;
return std::make_tuple(a_val, b_val, c_val);
}

float Yolov8PostProcess::probiou(const obbBox& obb1, const obbBox& obb2, float eps){
// Calculate the prob iou between oriented bounding boxes, https://arxiv.org/pdf/2106.06072v1.pdf.
float a1, b1, c1, a2, b2, c2;
std::tie(a1, b1, c1) = convariance_matrix(obb1);
std::tie(a2, b2, c2) = convariance_matrix(obb2);
float x1 = obb1.x, y1 = obb1.y;
float x2 = obb2.x, y2 = obb2.y;
float t1 = ((a1 + a2) * std::pow(y1 - y2, 2) + (b1 + b2) * std::pow(x1 - x2, 2)) / ((a1 + a2) * (b1 + b2) - std::pow(c1 + c2, 2) + eps);
float t2 = ((c1 + c2) * (x2 - x1) * (y1 - y2)) / ((a1 + a2) * (b1 + b2) - std::pow(c1 + c2, 2) + eps);
float t3 = std::log(((a1 + a2) * (b1 + b2) - std::pow(c1 + c2, 2)) / (4 * std::sqrt(std::max(a1 * b1 - c1 * c1, 0.0f)) * std::sqrt(std::max(a2 * b2 - c2 * c2, 0.0f)) + eps) + eps);
float bd = 0.25 * t1 + 0.5 * t2 + 0.5 * t3;
bd = std::max(std::min(bd, 100.0f), eps);
float hd = std::sqrt(1.0 - std::exp(-bd) + eps);
return 1 - hd;
}


void Yolov8PostProcess::nms_rotated(obbBoxVec& dets, float nmsConfidence) {
int length = dets.size();
int index = length - 1;

std::sort(dets.begin(), dets.end(), [](const obbBox& a, const obbBox& b) { return a.score < b.score; });

while (index > 0) {
int i = 0;
while (i < index) {
float iou = probiou(dets[index], dets[i]);
if (iou >= nmsConfidence) {
dets.erase(dets.begin() + i);
index--;
} else {
i++;
}
}
index--;
}
}

common::ObbObjectMetadata Yolov8PostProcess::xywhr2xyxyxyxy(const obbBox& obb){
common::ObbObjectMetadata obb_;
float cos_value = std::cos(obb.angle);
float sin_value = std::sin(obb.angle);

// Calculate half-dimensions rotated
float dx1 = (obb.w / 2) * cos_value;
float dy1 = (obb.w / 2) * sin_value;
float dx2 = (obb.h / 2) * sin_value;
float dy2 = (obb.h / 2) * cos_value;

// Calculate corners
obb_.class_id = obb.class_id;
obb_.score = obb.score;
obb_.x1 = std::round(obb.x + dx1 + dx2);
obb_.y1 = std::round(obb.y + dy1 - dy2);
obb_.x2 = std::round(obb.x + dx1 - dx2);
obb_.y2 = std::round(obb.y + dy1 + dy2);
obb_.x3 = std::round(obb.x - dx1 - dx2);
obb_.y3 = std::round(obb.y - dy1 + dy2);
obb_.x4 = std::round(obb.x - dx1 + dx2);
obb_.y4 = std::round(obb.y - dy1 - dy2);
return obb_;
}

} // namespace yolov8
} // namespace element
} // namespace sophon_stream
38 changes: 38 additions & 0 deletions framework/common/obb_object_metadata.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
//===----------------------------------------------------------------------===//
//
// Copyright (C) 2022 Sophgo Technologies Inc. All rights reserved.
//
// SOPHON-STREAM is licensed under the 2-Clause BSD License except for the
// third-party components.
//
//===----------------------------------------------------------------------===//

#ifndef SOPHON_STREAM_COMMON_OBB_OBJECT_METADATA_H_
#define SOPHON_STREAM_COMMON_OBB_OBJECT_METADATA_H_

#include <memory>
#include <string>
#include <vector>


namespace sophon_stream {
namespace common {
struct ObbObjectMetadata {
float x1, y1, x2, y2, x3, y3, x4, y4, score;
int class_id;
inline void add_offset(int x, int y){
this->x1 += x;
this->x2 += x;
this->x3 += x;
this->x4 += x;
this->y1 += y;
this->y2 += y;
this->y3 += y;
this->y4 += y;
}
};

} // namespace common
} // namespace sophon_stream

#endif // SOPHON_STREAM_COMMON_POSED_OBJECT_METADATA_H_
Loading

0 comments on commit 0aa5339

Please sign in to comment.