Skip to content

Commit

Permalink
implemented yolov5 #15
Browse files Browse the repository at this point in the history
  • Loading branch information
cansik committed Dec 8, 2022
1 parent 543a224 commit 7eaa027
Show file tree
Hide file tree
Showing 10 changed files with 173 additions and 19 deletions.
31 changes: 30 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ Here you find a list of implemented networks:
- YOLOv3
- YOLOv4
- YOLOv4-tiny
- [YOLOv5](https://github.com/ultralytics/yolov5/) (n, s, m, l, x)
- [YOLO Fastest & XL](https://github.com/dog-qiuqiu/Yolo-Fastest)
- SSDMobileNetV2
- Handtracking based on SSDMobileNetV2
Expand Down Expand Up @@ -174,8 +175,13 @@ YOLO a very fast and accurate single shot network. The pre-trained model is trai
- YOLOv3-tiny (very fast, but trading performance for accuracy)
- YOLOv3-spp (original model using [spatial pyramid pooling](https://stackoverflow.com/a/55014630/1138326))
- YOLOv3 (608)
- YOLOv4 (608) (most accurate network)
- YOLOv4 (608)
- YOLOv4-tiny (416)
- YOLOv5n (640)
- YOLOv5s (640)
- YOLOv5m (640)
- YOLOv5l (640)
- YOLOv5x (640)

```java
// setup the network
Expand All @@ -184,6 +190,11 @@ YOLONetwork net = vision.createYOLOv4Tiny();
YOLONetwork net = vision.createYOLOv3();
YOLONetwork net = vision.createYOLOv3SPP();
YOLONetwork net = vision.createYOLOv3Tiny();
YOLONetwork net = vision.createYOLOv5n();
YOLONetwork net = vision.createYOLOv5s();
YOLONetwork net = vision.createYOLOv5m();
YOLONetwork net = vision.createYOLOv5l();
YOLONetwork net = vision.createYOLOv5x();

// set confidence threshold
net.setConfidenceThreshold(0.2f);
Expand All @@ -193,6 +204,24 @@ net.setConfidenceThreshold(0.2f);
* [WebCam Example YOLO](examples/YOLOWebcamExample)
* [RealSense Example YOLO](examples/RealSenseYoloDetector)

#### YOLOv5
Since version `0.9.0` YOLOv5 is implemented as well. It uses the pre-trained models converted into the ONNX format. At the moment YOLOv5 does not work well with the implemented NMS. To adjust the settings of the NMS use the following functions.

```
// set confidence threshold
net.setConfidenceThreshold(0.2f);
// set confidence threshold
net.set(0.2f);
// set the IoU threshold (overlapping of the bounding boxes)
net.setNmsThreshold(0.4f);
// set how many objects should be taken into account for nms
// 0 means all objects
net.setTopK(100);
```

#### SSDMobileNetV2 [[Paper](https://arxiv.org/abs/1512.02325)]
This network is a single shot detector based on the mobilenetv2 architecture. It is pre-trained on the 90 classes COCO dataset and is really fast.

Expand Down
2 changes: 1 addition & 1 deletion examples/YOLOWebcamExample/YOLOWebcamExample.pde
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ ResultList<ObjectDetectionResult> detections;
int textSize = 12;

public void setup() {
size(640, 480, FX2D);
size(640, 480);

colorMode(HSB, 360, 100, 100);

Expand Down
58 changes: 58 additions & 0 deletions examples/YOLOv5/YOLOv5.pde
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import ch.bildspur.vision.*;
import ch.bildspur.vision.result.*;

DeepVision deepVision = new DeepVision(this);
YOLONetwork yolo;
ResultList<ObjectDetectionResult> detections;

PImage image;
int textSize = 12;

public void setup() {
size(640, 480);

colorMode(HSB, 360, 100, 100);

image = loadImage("pexels-lina-kivaka-5623971.jpg");

println("creating model...");
yolo = deepVision.createYOLOv5l();

println("loading yolo model...");
yolo.setup();

println("inferencing...");
yolo.setConfidenceThreshold(0.95f);
yolo.setTopK(0);

detections = yolo.run(image);
}

public void draw() {
background(55);

image(image, 0, 0);

noFill();
strokeWeight(2f);

strokeWeight(3f);
textSize(textSize);

for (ObjectDetectionResult detection : detections) {
int hue = (int)(360.0 / yolo.getLabels().size() * detection.getClassId());

noFill();
stroke(hue, 80, 100);
rect(detection.getX(), detection.getY(), detection.getWidth(), detection.getHeight());

fill(hue, 80, 100);
rect(detection.getX(), detection.getY() - (textSize + 3), textWidth(detection.getClassName()) + 4, textSize + 3);

fill(0);
textAlign(LEFT, TOP);
text(detection.getClassName(), detection.getX() + 2, detection.getY() - textSize - 3);
}

surface.setTitle("YOLO Test - FPS: " + Math.round(frameRate));
}
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
37 changes: 36 additions & 1 deletion src/main/java/ch/bildspur/vision/DeepVision.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ public DeepVision(PApplet sketch, boolean enableCUDABackend) {
public static void enableDesiredBackend(Net net) {
if (USE_DEFAULT_BACKEND) return;

if(haveOpenCL()) {
if (haveOpenCL()) {
System.out.println("DNN OpenCL backend enabled");
net.setPreferableBackend(opencv_dnn.DNN_BACKEND_OPENCV);
net.setPreferableTarget(opencv_dnn.DNN_TARGET_OPENCL);
Expand Down Expand Up @@ -195,6 +195,41 @@ public YOLONetwork createYOLOv4Tiny(int inputSize) {
return createYOLONetwork(Repository.YOLOv4TinyModel, Repository.YOLOv4TinyWeight, Repository.COCONames, inputSize);
}

protected YOLONetwork createYOLOv5(Dependency weights, Dependency names, int inputSize) {
prepareDependencies(weights, names);

YOLONetwork network = new YOLONetwork(
null,
weights.getPath(),
inputSize, inputSize,
true
);

network.loadLabels(names.getPath());
network.setTopK(100);
return network;
}

public YOLONetwork createYOLOv5n() {
return createYOLOv5(Repository.YOLOv5nOnnx, Repository.COCONames, 640);
}

public YOLONetwork createYOLOv5s() {
return createYOLOv5(Repository.YOLOv5sOnnx, Repository.COCONames, 640);
}

public YOLONetwork createYOLOv5m() {
return createYOLOv5(Repository.YOLOv5mOnnx, Repository.COCONames, 640);
}

public YOLONetwork createYOLOv5l() {
return createYOLOv5(Repository.YOLOv5lOnnx, Repository.COCONames, 640);
}

public YOLONetwork createYOLOv5x() {
return createYOLOv5(Repository.YOLOv5xOnnx, Repository.COCONames, 640);
}

public YOLONetwork createYOLOFastest() {
return createYOLONetwork(Repository.YOLOFastestModel, Repository.YOLOFastestWeight, Repository.COCONames, 320);
}
Expand Down
45 changes: 37 additions & 8 deletions src/main/java/ch/bildspur/vision/YOLONetwork.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ public class YOLONetwork extends ObjectDetectionNetwork {
private float nmsThreshold = 0.4f;
private boolean skipNMS = false;

private int topK = 0;

private boolean isOutputScaled = false;

private Net net;
private StringVector outNames;

Expand All @@ -38,10 +42,19 @@ public YOLONetwork(Path configPath, Path weightsPath, int width, int height) {
this.setConfidenceThreshold(0.5f);
}

public YOLONetwork(Path configPath, Path weightsPath, int width, int height, boolean isOutputScaled) {
this(configPath, weightsPath, width, height);
this.isOutputScaled = isOutputScaled;
}

public boolean setup() {
net = readNetFromDarknet(
configPath.toAbsolutePath().toString(),
weightsPath.toAbsolutePath().toString());
if (weightsPath.toString().endsWith(".onnx")) {
net = readNetFromONNX(weightsPath.toAbsolutePath().toString());
} else {
net = readNetFromDarknet(
configPath.toAbsolutePath().toString(),
weightsPath.toAbsolutePath().toString());
}

// setup output layers
outNames = net.getUnconnectedOutLayersNames();
Expand Down Expand Up @@ -98,6 +111,11 @@ private ResultList<ObjectDetectionResult> postprocess(Mat frame, MatVector outs)
// ones with high confidence scores. Assign the box's class label as the class
// with the highest score for the box.
Mat result = outs.get(i);
if (result.dims() > 2) {
// squeeze output mat
result = new Mat(result.size(1), result.size(2), CV_32F, result.ptr(0, i));
}

FloatIndexer data = result.createIndexer();

for (int j = 0; j < result.rows(); j++) {
Expand All @@ -112,11 +130,14 @@ private ResultList<ObjectDetectionResult> postprocess(Mat frame, MatVector outs)
}
}

float iw = isOutputScaled ? width : 1;
float ih = isOutputScaled ? height : 1;

if (maxScore > getConfidenceThreshold()) {
int centerX = (int) (data.get(j, 0) * frame.cols());
int centerY = (int) (data.get(j, 1) * frame.rows());
int width = (int) (data.get(j, 2) * frame.cols());
int height = (int) (data.get(j, 3) * frame.rows());
int centerX = (int) (data.get(j, 0) / iw * frame.cols());
int centerY = (int) (data.get(j, 1) / ih * frame.rows());
int width = (int) (data.get(j, 2) / iw * frame.cols());
int height = (int) (data.get(j, 3) / ih * frame.rows());
int left = centerX - width / 2;
int top = centerY - height / 2;

Expand Down Expand Up @@ -152,7 +173,7 @@ private ResultList<ObjectDetectionResult> postprocess(Mat frame, MatVector outs)
FloatPointer confidencesPointer = new FloatPointer(confidences.size());
confidencesPointer.put(confidences.get());

NMSBoxes(boxes, confidencesPointer, getConfidenceThreshold(), nmsThreshold, indices, 1.f, 0);
NMSBoxes(boxes, confidencesPointer, getConfidenceThreshold(), nmsThreshold, indices, 1.f, topK);

ResultList<ObjectDetectionResult> detections = new ResultList<>();
for (int i = 0; i < indices.limit(); ++i) {
Expand Down Expand Up @@ -211,4 +232,12 @@ public int getHeight() {
public Net getNet() {
return net;
}

public int getTopK() {
return topK;
}

public void setTopK(int topK) {
this.topK = topK;
}
}
6 changes: 6 additions & 0 deletions src/main/java/ch/bildspur/vision/dependency/Repository.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@ public class Repository {
// networks

// object detection
public static final Dependency YOLOv5nOnnx = new Dependency("yolov5n.onnx");
public static final Dependency YOLOv5sOnnx = new Dependency("yolov5s.onnx");
public static final Dependency YOLOv5mOnnx = new Dependency("yolov5m.onnx");
public static final Dependency YOLOv5lOnnx = new Dependency("yolov5l.onnx");
public static final Dependency YOLOv5xOnnx = new Dependency("yolov5x.onnx");

public static final Dependency YOLOv4Model = new Dependency("yolov4.cfg");
public static final Dependency YOLOv4Weight = new Dependency("yolov4.weights");

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import ch.bildspur.vision.result.ObjectDetectionResult;
import ch.bildspur.vision.result.ResultList;
import org.bytedeco.javacpp.DoublePointer;
import org.bytedeco.opencv.global.opencv_dnn;
import org.bytedeco.opencv.opencv_core.Mat;
import org.bytedeco.opencv.opencv_core.Point;
import org.bytedeco.opencv.opencv_core.Scalar;
Expand Down
2 changes: 1 addition & 1 deletion src/test/java/ch/bildspur/vision/test/Sketch.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ public static void main(String... args) {
}

public void settings() {
size(640, 480, FX2D);
size(640, 480);
}

public void setup() {
Expand Down
10 changes: 4 additions & 6 deletions src/test/java/ch/bildspur/vision/test/YOLODetectionTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import ch.bildspur.vision.YOLONetwork;
import ch.bildspur.vision.result.ObjectDetectionResult;
import ch.bildspur.vision.test.tools.StopWatch;
import org.opencv.dnn.Dnn;
import processing.core.PApplet;
import processing.core.PImage;

Expand Down Expand Up @@ -40,15 +39,14 @@ public void setup() {
testImage = officeImage;

println("creating network...");
yolo = vision.createYOLOv4Tiny();
vision.setUseDefaultBackend(true);
yolo = vision.createYOLOv5s();

println("loading model...");
yolo.setup();

yolo.getNet().setPreferableBackend(Dnn.DNN_BACKEND_OPENCV);
yolo.getNet().setPreferableTarget(Dnn.DNN_TARGET_OPENCL);

yolo.setConfidenceThreshold(0.2f);
yolo.setConfidenceThreshold(0.25f);
yolo.setNmsThreshold(0.1f);

println("inferencing...");
watch.start();
Expand Down

0 comments on commit 7eaa027

Please sign in to comment.