Skip to content

Commit

Permalink
add Image::from_opencv_mat() and opencv as feature
Browse files Browse the repository at this point in the history
  • Loading branch information
Michal Conos committed Sep 14, 2024
1 parent 33febba commit bdede6e
Show file tree
Hide file tree
Showing 7 changed files with 237 additions and 23 deletions.
14 changes: 13 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,28 @@ edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[features]
default = []
opencv = ["dep:opencv"]

# use opencv to resize the image during the pre-process step
[dependencies]
tch = "0.17.0"
opencv = { version = "0.92.3", optional = true }

[[example]]
name = "yolo-predict"
path = "examples/predict/main.rs"

[[example]]
name = "video"
path = "examples/video/main.rs"
required-features = ["opencv"]


[dev-dependencies]
criterion = { version = "0.5.1", features = ["html_reports"] }
opencv = "0.92.3"


[[bench]]
Expand All @@ -31,4 +44,3 @@ harness = false
[[bench]]
name = "e2e"
harness = false

59 changes: 52 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,14 @@ The following library is tested against stable version `libtorch-2.4.0`.

### Install opencv

In case you want to use opencv, please install it first:

```bash
apt install libopencv-dev clang libclang-dev
```



### Obtain YOLOv8

```bash
Expand All @@ -32,19 +36,60 @@ ls -l yolov8n.torchscript
More on yolo export: https://docs.ultralytics.com/modes/export/#__tabbed_1_2


### Build
### Get `libtorch`

Get `libtorch` from https://pytorch.org/, the current bindings are using stable version of `2.4.0`:
Get `libtorch` from https://pytorch.org/, the current bindings are using stable version of `2.4.0`. Please decide beforehand if you want to use `cpu` or `cuda` version.
```bash
unzip ~/Downloads/libtorch-cxx11-abi-shared-with-deps-2.4.0+cpu.zip
export LIBTORCH=$(pwd)/libtorch/
export LIBTORCH_INCLUDE=$(pwd)/libtorch/
export LIBTORCH_LIB=$(pwd)/libtorch/
```

### Build

```bash
source ./env # environment
cargo build --release
```

### Run
### Build Examples

The video example needs to have opencv installed and enabled as feature.

```bash
source ./env # environment
cargo build --release --examples --features=opencv
```

### Run Examples

```bash
export LD_LIBRARY_PATH="$(pwd)/libtorch/lib/:$LD_LIBRARY_PATH"
source ./env # environment
target/release/examples/yolo-predict
```

```bash
source ./env # environment
target/release/examples/video <some.video.file>
```

## Description

This library tries to do things as close as `ultralytics` python package.

### Image pre-processing

Two options are available: using torch and tensor, or using `opencv`. In the latter case the `opencv` feature must be turned on (default is off).

We pad the image exactly as `ultralytics` does, i.e. the aspect ratio is preserved and the gray (114, 114, 114) coler
is used as padding to keep `(640, 640)` as input dimension.


![bus-padded](images/bus_padded.jpg)
![katri-padded](images/katri_padded.jpg)

### Image post-processing

We try to use `torch` and _tensors_ as long as possible. However, in certain cases it might be faster to move the prediction to the `cpu`. In that case, please initialize yolo as:

```rust
let yolo = yolo_v8::YoloV8ObjectDetection::with_model(yolo_v8::YOLOModel::Nano).post_process_on_cpu();
```
91 changes: 91 additions & 0 deletions examples/video/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
use std::{env::args, time::Instant};

use opencv::{
core::{Mat, MatTrait, MatTraitConst, Rect, Scalar, Vector},
highgui::{destroy_all_windows, imshow, wait_key},
imgcodecs::imwrite,
imgproc::{rectangle, LINE_8},
videoio::{VideoCaptureTrait, CAP_ANY},
};
use tch::{Kind, Tensor};
use yolo_v8::image::Image;

fn main() -> Result<(), opencv::Error> {
let filename = args().nth(1).unwrap_or("test3.mp4".to_owned());
println!("filename={filename}");
let mut cap = opencv::videoio::VideoCapture::from_file(&filename, CAP_ANY)?;
let yolo = yolo_v8::YoloV8ObjectDetection::with_model(yolo_v8::YOLOModel::Nano); //.post_process_on_cpu();
let device = tch::Device::cuda_if_available();
println!("device: {:?}", device);

loop {
let mut timings = Vec::new();
let mut frame = Mat::default();
let start = Instant::now();
let have_image = cap.read(&mut frame)?;
if !have_image {
break;
}

let mut image = Image::from_opencv_mat(&frame, (640, 640))?;
timings.push(("read_frame", start.elapsed()));
let start = Instant::now();
let predictions = yolo.predict(&image, 0.25, 0.7); //.postprocess();
timings.push(("detection", start.elapsed()));
// image.draw_rectangle(&predictions);
// image.save("result.jpg");

let start = Instant::now();
let predictions = predictions.postprocess();
timings.push(("postprocess", start.elapsed()));
let start = Instant::now();
for bbox in predictions.0 {
let w = bbox.xmax - bbox.xmin;
let h = bbox.ymax - bbox.ymin;
let class = format!("{} {}%", bbox.name, (bbox.conf * 100.0) as i32);
let _ = opencv::imgproc::put_text(
&mut frame,
&class,
(bbox.xmin as i32, bbox.ymin as i32).into(),
0,
1.0,
Scalar::new(255.0, 255.0, 255.0, 255.0),
1,
LINE_8,
false,
);
rectangle(
&mut frame,
Rect::new(bbox.xmin as i32, bbox.ymin as i32, w as i32, h as i32),
Scalar::new(255.0, 128.0, 0.0, 255.0),
2,
1,
0,
)?;
}

imshow("Image", &frame)?;
let key = wait_key(1)?;
if key > 0 && key != 255 {
break;
}
timings.push(("draw_boxes", start.elapsed()));
println!("timings:{:?}", timings);
}
cap.release()?;
destroy_all_windows()?;
Ok(())
}

fn square64(size: i64, w: i64, h: i64) -> (i64, i64) {
let aspect = w as f32 / h as f32;
if w > h {
let tw = size;
let th = (tw as f32 / aspect) as i64;
(tw, th)
} else {
let th = size;
let tw = (size as f32 * aspect) as i64;
(tw, th)
}
}
Binary file added images/bus_padded.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added images/katri_padded.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
94 changes: 80 additions & 14 deletions src/image.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ pub type ImageCHW = (i64, i64, i64);
pub struct Image {
width: i64,
height: i64,
pub(crate) image: Tensor,
image: Option<Tensor>,
pub(crate) scaled_image: Tensor,
pub(crate) image_dim: ImageCHW,
pub(crate) scaled_image_dim: ImageCHW,
Expand All @@ -25,13 +25,75 @@ impl Image {
Self {
width,
height,
image,
image: Some(image),
scaled_image,
image_dim,
scaled_image_dim,
}
}

#[cfg(feature = "opencv")]
pub fn from_opencv_mat(
src_frame: &opencv::core::Mat,
dimension: (i64, i64),
) -> Result<Self, opencv::Error> {
let width = dimension.0;
let height = dimension.1;
let square_size = width;
let size = opencv::core::MatTraitConst::size(src_frame)?;
let uh = size.height as i64;
let uw = size.width as i64;
let image_dim = (3 as i64, uh, uw);
let (sw, sh) = utils::square64(width, size.width.into(), size.height.into());
let mut frame = opencv::core::Mat::default();
// opencv resize is much faster than tch::resize
opencv::imgproc::resize(
src_frame,
&mut frame,
(sw as i32, sh as i32).into(),
0.0,
0.0,
0,
)?;

let size = opencv::core::MatTraitConst::size(&frame)?;
let scaled_image = unsafe {
Tensor::from_blob(
opencv::core::MatTraitConst::data(&frame),
&[
size.height as i64,
size.width as i64,
opencv::core::MatTraitConst::channels(&frame) as i64,
],
&[],
tch::Kind::Uint8,
tch::Device::Cpu,
)
};

let scaled_image_dim = (3 as i64, width, width);

let scaled_image = scaled_image
.permute([2, 0, 1]) // swap [[b0, g0, r0], [b1, g1, r1], ...] array to [[b0, b1, ...], [g0, g1, ...], [r0, r1, ..]]
.flip(0); // swap [[B], [G], [R]] to [[R], [G], B]

let gray: Vec<u8> = vec![114; (square_size * square_size * 3) as usize];
let bg = Tensor::from_slice(&gray).reshape([3, square_size, square_size]);
let dh = (square_size - sh) / 2;
let dw = (square_size - sw) / 2;

bg.narrow(2, dw, sw).narrow(1, dh, sh).copy_(&scaled_image);

Ok(Self {
width,
height,
image: None,
scaled_image: bg,
image_dim,
scaled_image_dim,
})
}

pub fn from_slice(
slice: &[u8],
orig_width: i64,
Expand All @@ -55,21 +117,25 @@ impl Image {
}

pub fn draw_rectangle(&mut self, bboxes: &Vec<BBox>) {
let image = &mut self.image;

for bbox in bboxes.iter() {
let xmin = bbox.xmin as i64;
let ymin = bbox.ymin as i64;
let xmax = bbox.xmax as i64;
let ymax = bbox.ymax as i64;
Self::draw_line(image, xmin, xmax, ymin, ymax.min(ymin + 2));
Self::draw_line(image, xmin, xmax, ymin.max(ymax - 2), ymax);
Self::draw_line(image, xmin, xmax.min(xmin + 2), ymin, ymax);
Self::draw_line(image, xmin.max(xmax - 2), xmax, ymin, ymax);
if let Some(ref mut image) = &mut self.image {
// let image = &mut self.image;

for bbox in bboxes.iter() {
let xmin = bbox.xmin as i64;
let ymin = bbox.ymin as i64;
let xmax = bbox.xmax as i64;
let ymax = bbox.ymax as i64;
Self::draw_line(image, xmin, xmax, ymin, ymax.min(ymin + 2));
Self::draw_line(image, xmin, xmax, ymin.max(ymax - 2), ymax);
Self::draw_line(image, xmin, xmax.min(xmin + 2), ymin, ymax);
Self::draw_line(image, xmin.max(xmax - 2), xmax, ymin, ymax);
}
}
}

pub fn save(&self, path: &str) {
tch::vision::image::save(&self.image, path).expect("can't save image");
if let Some(ref image) = self.image {
tch::vision::image::save(image, path).expect("can't save image");
}
}
}
2 changes: 1 addition & 1 deletion src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ pub fn preprocess(image: &Tensor, square_size: i64) -> Tensor {
bg
}

fn square64(size: i64, w: i64, h: i64) -> (i64, i64) {
pub(crate) fn square64(size: i64, w: i64, h: i64) -> (i64, i64) {
let aspect = w as f32 / h as f32;
if w > h {
let tw = size;
Expand Down

0 comments on commit bdede6e

Please sign in to comment.