Skip to content

Commit

Permalink
feat: windows native ocr
Browse files Browse the repository at this point in the history
  • Loading branch information
louis030195 committed Aug 3, 2024
1 parent c5787a3 commit 1e834ac
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 16 deletions.
2 changes: 1 addition & 1 deletion screenpipe-vision/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -73,5 +73,5 @@ harness = false


[target.'cfg(target_os = "windows")'.dependencies]
windows = { version = "0.48", features = ["Graphics_Imaging", "Media_Ocr", "Storage", "Storage_Streams"] }
windows = { version = "0.58", features = ["Graphics_Imaging", "Media_Ocr", "Storage", "Storage_Streams"] }

27 changes: 12 additions & 15 deletions screenpipe-vision/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,6 @@ use std::sync::Arc;
use std::time::{Duration, Instant};
use xcap::Monitor;

#[cfg(target_os = "windows")]
use windows::{
core::*,
Graphics::Imaging::BitmapDecoder,
Storage::Streams::Buffer,
Storage::{FileAccessMode, StorageFile},
};

#[derive(Clone, Debug)]
pub enum OcrEngine {
Deepgram,
Expand Down Expand Up @@ -313,27 +305,32 @@ pub async fn perform_ocr_cloud(image: &DynamicImage) -> (String, DataOutput, Str

#[cfg(target_os = "windows")]
pub async fn perform_ocr_windows(image: &DynamicImage) -> (String, DataOutput, String) {
use windows::{
Graphics::Imaging::{BitmapDecoder, SoftwareBitmap},
Media::Ocr::OcrEngine as WindowsOcrEngine,
Storage::Streams::{Buffer, InMemoryRandomAccessStream},
};

let mut buffer = Vec::new();
image
.write_to(&mut Cursor::new(&mut buffer), image::ImageFormat::Png)
.unwrap();

let windows_buffer = Buffer::Create(buffer.len() as u32).unwrap();
windows_buffer
.CreateReference()
.AsBuffer()
.unwrap()
.WriteBytes(&buffer)
.unwrap();

let stream = windows_buffer
.CreateReference()
.unwrap()
.AsStream()
.unwrap();
let stream = InMemoryRandomAccessStream::new().unwrap();
stream.WriteAsync(&windows_buffer).unwrap().get().unwrap();
stream.Seek(0).unwrap();

let decoder = BitmapDecoder::CreateAsync(&stream).unwrap().get().unwrap();
let bitmap = decoder.GetSoftwareBitmapAsync().unwrap().get().unwrap();

let engine = windows::Media::Ocr::OcrEngine::TryCreateFromUserProfileLanguages().unwrap();
let engine = WindowsOcrEngine::TryCreateFromUserProfileLanguages().unwrap();
let result = engine.RecognizeAsync(&bitmap).unwrap().get().unwrap();

let text = result.Text().unwrap().to_string();
Expand Down

0 comments on commit 1e834ac

Please sign in to comment.