add Image::from_opencv_mat() and opencv as feature

mixaal · Sep 14, 2024 · bdede6e · bdede6e
1 parent 33febba
commit bdede6e
Show file tree

Hide file tree

Showing 7 changed files with 237 additions and 23 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -5,15 +5,28 @@ edition = "2021"
 
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
+[features]
+default = []
+opencv = ["dep:opencv"]
+
+# use opencv to resize the image during the pre-process step
 [dependencies]
 tch = "0.17.0"
+opencv = { version = "0.92.3", optional = true }
 
 [[example]]
 name = "yolo-predict"
 path = "examples/predict/main.rs"
 
+[[example]]
+name = "video"
+path = "examples/video/main.rs"
+required-features = ["opencv"]
+
+
 [dev-dependencies]
 criterion = { version = "0.5.1", features = ["html_reports"] }
+opencv = "0.92.3"
 
 
 [[bench]]
@@ -31,4 +44,3 @@ harness = false
 [[bench]]
 name = "e2e"
 harness = false
-
diff --git a/README.md b/README.md
@@ -15,10 +15,14 @@ The following library is tested against stable version `libtorch-2.4.0`.
 
 ### Install opencv
 
+In case you want to use opencv, please install it first:
+
 ```bash
 apt install libopencv-dev clang libclang-dev
 ```
 
+
+
 ### Obtain YOLOv8
 
 ```bash
@@ -32,19 +36,60 @@ ls -l yolov8n.torchscript
 More on yolo export: https://docs.ultralytics.com/modes/export/#__tabbed_1_2
 
 
-### Build
+### Get `libtorch`
 
-Get `libtorch` from https://pytorch.org/, the current bindings are using stable version of `2.4.0`:
+Get `libtorch` from https://pytorch.org/, the current bindings are using stable version of `2.4.0`. Please decide beforehand if you want to use `cpu` or `cuda` version.
 ```bash
 unzip ~/Downloads/libtorch-cxx11-abi-shared-with-deps-2.4.0+cpu.zip
-export LIBTORCH=$(pwd)/libtorch/
-export LIBTORCH_INCLUDE=$(pwd)/libtorch/
-export LIBTORCH_LIB=$(pwd)/libtorch/
+```
+
+### Build
+
+```bash
+source ./env # environment
 cargo build --release
 ```
 
-### Run
+### Build Examples
+
+The video example needs to have opencv installed and enabled as feature.
+
+```bash
+source ./env # environment
+cargo build --release --examples --features=opencv
+```
+
+### Run Examples
+
 ```bash
-export LD_LIBRARY_PATH="$(pwd)/libtorch/lib/:$LD_LIBRARY_PATH"
+source ./env # environment
 target/release/examples/yolo-predict
 ```
+
+```bash
+source ./env # environment
+target/release/examples/video <some.video.file>
+```
+
+## Description
+
+This library tries to do things as close as `ultralytics` python package.
+
+### Image pre-processing
+
+Two options are available: using torch and tensor, or using `opencv`. In the latter case the `opencv` feature must be turned on (default is off).
+
+We pad the image exactly as `ultralytics` does, i.e. the aspect ratio is preserved and the gray (114, 114, 114) coler
+is used as padding to keep `(640, 640)` as input dimension.
+
+
+![bus-padded](images/bus_padded.jpg)
+![katri-padded](images/katri_padded.jpg)
+
+### Image post-processing
+
+We try to use `torch` and _tensors_ as long as possible. However, in certain cases it might be faster to move the prediction to the `cpu`. In that case, please initialize yolo as:
+
+```rust
+let yolo = yolo_v8::YoloV8ObjectDetection::with_model(yolo_v8::YOLOModel::Nano).post_process_on_cpu();
+```
diff --git a/examples/video/main.rs b/examples/video/main.rs
@@ -0,0 +1,91 @@
+use std::{env::args, time::Instant};
+
+use opencv::{
+    core::{Mat, MatTrait, MatTraitConst, Rect, Scalar, Vector},
+    highgui::{destroy_all_windows, imshow, wait_key},
+    imgcodecs::imwrite,
+    imgproc::{rectangle, LINE_8},
+    videoio::{VideoCaptureTrait, CAP_ANY},
+};
+use tch::{Kind, Tensor};
+use yolo_v8::image::Image;
+
+fn main() -> Result<(), opencv::Error> {
+    let filename = args().nth(1).unwrap_or("test3.mp4".to_owned());
+    println!("filename={filename}");
+    let mut cap = opencv::videoio::VideoCapture::from_file(&filename, CAP_ANY)?;
+    let yolo = yolo_v8::YoloV8ObjectDetection::with_model(yolo_v8::YOLOModel::Nano); //.post_process_on_cpu();
+    let device = tch::Device::cuda_if_available();
+    println!("device: {:?}", device);
+
+    loop {
+        let mut timings = Vec::new();
+        let mut frame = Mat::default();
+        let start = Instant::now();
+        let have_image = cap.read(&mut frame)?;
+        if !have_image {
+            break;
+        }
+
+        let mut image = Image::from_opencv_mat(&frame, (640, 640))?;
+        timings.push(("read_frame", start.elapsed()));
+        let start = Instant::now();
+        let predictions = yolo.predict(&image, 0.25, 0.7); //.postprocess();
+        timings.push(("detection", start.elapsed()));
+        // image.draw_rectangle(&predictions);
+        // image.save("result.jpg");
+
+        let start = Instant::now();
+        let predictions = predictions.postprocess();
+        timings.push(("postprocess", start.elapsed()));
+        let start = Instant::now();
+        for bbox in predictions.0 {
+            let w = bbox.xmax - bbox.xmin;
+            let h = bbox.ymax - bbox.ymin;
+            let class = format!("{} {}%", bbox.name, (bbox.conf * 100.0) as i32);
+            let _ = opencv::imgproc::put_text(
+                &mut frame,
+                &class,
+                (bbox.xmin as i32, bbox.ymin as i32).into(),
+                0,
+                1.0,
+                Scalar::new(255.0, 255.0, 255.0, 255.0),
+                1,
+                LINE_8,
+                false,
+            );
+            rectangle(
+                &mut frame,
+                Rect::new(bbox.xmin as i32, bbox.ymin as i32, w as i32, h as i32),
+                Scalar::new(255.0, 128.0, 0.0, 255.0),
+                2,
+                1,
+                0,
+            )?;
+        }
+
+        imshow("Image", &frame)?;
+        let key = wait_key(1)?;
+        if key > 0 && key != 255 {
+            break;
+        }
+        timings.push(("draw_boxes", start.elapsed()));
+        println!("timings:{:?}", timings);
+    }
+    cap.release()?;
+    destroy_all_windows()?;
+    Ok(())
+}
+
+fn square64(size: i64, w: i64, h: i64) -> (i64, i64) {
+    let aspect = w as f32 / h as f32;
+    if w > h {
+        let tw = size;
+        let th = (tw as f32 / aspect) as i64;
+        (tw, th)
+    } else {
+        let th = size;
+        let tw = (size as f32 * aspect) as i64;
+        (tw, th)
+    }
+}
diff --git a/images/bus_padded.jpg b/images/bus_padded.jpg
diff --git a/images/katri_padded.jpg b/images/katri_padded.jpg
diff --git a/src/image.rs b/src/image.rs
@@ -8,7 +8,7 @@ pub type ImageCHW = (i64, i64, i64);
 pub struct Image {
     width: i64,
     height: i64,
-    pub(crate) image: Tensor,
+    image: Option<Tensor>,
     pub(crate) scaled_image: Tensor,
     pub(crate) image_dim: ImageCHW,
     pub(crate) scaled_image_dim: ImageCHW,
@@ -25,13 +25,75 @@ impl Image {
         Self {
             width,
             height,
-            image,
+            image: Some(image),
             scaled_image,
             image_dim,
             scaled_image_dim,
         }
     }
 
+    #[cfg(feature = "opencv")]
+    pub fn from_opencv_mat(
+        src_frame: &opencv::core::Mat,
+        dimension: (i64, i64),
+    ) -> Result<Self, opencv::Error> {
+        let width = dimension.0;
+        let height = dimension.1;
+        let square_size = width;
+        let size = opencv::core::MatTraitConst::size(src_frame)?;
+        let uh = size.height as i64;
+        let uw = size.width as i64;
+        let image_dim = (3 as i64, uh, uw);
+        let (sw, sh) = utils::square64(width, size.width.into(), size.height.into());
+        let mut frame = opencv::core::Mat::default();
+        // opencv resize is much faster than tch::resize
+        opencv::imgproc::resize(
+            src_frame,
+            &mut frame,
+            (sw as i32, sh as i32).into(),
+            0.0,
+            0.0,
+            0,
+        )?;
+
+        let size = opencv::core::MatTraitConst::size(&frame)?;
+        let scaled_image = unsafe {
+            Tensor::from_blob(
+                opencv::core::MatTraitConst::data(&frame),
+                &[
+                    size.height as i64,
+                    size.width as i64,
+                    opencv::core::MatTraitConst::channels(&frame) as i64,
+                ],
+                &[],
+                tch::Kind::Uint8,
+                tch::Device::Cpu,
+            )
+        };
+
+        let scaled_image_dim = (3 as i64, width, width);
+
+        let scaled_image = scaled_image
+            .permute([2, 0, 1]) // swap [[b0, g0, r0], [b1, g1, r1], ...] array to [[b0, b1, ...], [g0, g1, ...], [r0, r1, ..]]
+            .flip(0); // swap [[B], [G], [R]] to [[R], [G], B]
+
+        let gray: Vec<u8> = vec![114; (square_size * square_size * 3) as usize];
+        let bg = Tensor::from_slice(&gray).reshape([3, square_size, square_size]);
+        let dh = (square_size - sh) / 2;
+        let dw = (square_size - sw) / 2;
+
+        bg.narrow(2, dw, sw).narrow(1, dh, sh).copy_(&scaled_image);
+
+        Ok(Self {
+            width,
+            height,
+            image: None,
+            scaled_image: bg,
+            image_dim,
+            scaled_image_dim,
+        })
+    }
+
     pub fn from_slice(
         slice: &[u8],
         orig_width: i64,
@@ -55,21 +117,25 @@ impl Image {
     }
 
     pub fn draw_rectangle(&mut self, bboxes: &Vec<BBox>) {
-        let image = &mut self.image;
-
-        for bbox in bboxes.iter() {
-            let xmin = bbox.xmin as i64;
-            let ymin = bbox.ymin as i64;
-            let xmax = bbox.xmax as i64;
-            let ymax = bbox.ymax as i64;
-            Self::draw_line(image, xmin, xmax, ymin, ymax.min(ymin + 2));
-            Self::draw_line(image, xmin, xmax, ymin.max(ymax - 2), ymax);
-            Self::draw_line(image, xmin, xmax.min(xmin + 2), ymin, ymax);
-            Self::draw_line(image, xmin.max(xmax - 2), xmax, ymin, ymax);
+        if let Some(ref mut image) = &mut self.image {
+            // let image = &mut self.image;
+
+            for bbox in bboxes.iter() {
+                let xmin = bbox.xmin as i64;
+                let ymin = bbox.ymin as i64;
+                let xmax = bbox.xmax as i64;
+                let ymax = bbox.ymax as i64;
+                Self::draw_line(image, xmin, xmax, ymin, ymax.min(ymin + 2));
+                Self::draw_line(image, xmin, xmax, ymin.max(ymax - 2), ymax);
+                Self::draw_line(image, xmin, xmax.min(xmin + 2), ymin, ymax);
+                Self::draw_line(image, xmin.max(xmax - 2), xmax, ymin, ymax);
+            }
         }
     }
 
     pub fn save(&self, path: &str) {
-        tch::vision::image::save(&self.image, path).expect("can't save image");
+        if let Some(ref image) = self.image {
+            tch::vision::image::save(image, path).expect("can't save image");
+        }
     }
 }
diff --git a/src/utils.rs b/src/utils.rs
@@ -376,7 +376,7 @@ pub fn preprocess(image: &Tensor, square_size: i64) -> Tensor {
     bg
 }
 
-fn square64(size: i64, w: i64, h: i64) -> (i64, i64) {
+pub(crate) fn square64(size: i64, w: i64, h: i64) -> (i64, i64) {
     let aspect = w as f32 / h as f32;
     if w > h {
         let tw = size;