Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

apple native ocr #103

Merged
merged 5 commits into from
Aug 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions .github/workflows/release-cli.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,11 @@ jobs:

- name: Create deployment package
run: |
ls -R target/
tar -czf screenpipe-${{ env.VERSION }}-${{ matrix.target }}.tar.gz -C target/${{ matrix.target }}/release screenpipe
mkdir -p screenpipe-${{ env.VERSION }}-${{ matrix.target }}/bin
mkdir -p screenpipe-${{ env.VERSION }}-${{ matrix.target }}/lib
cp target/${{ matrix.target }}/release/screenpipe screenpipe-${{ env.VERSION }}-${{ matrix.target }}/bin/
cp target/${{ matrix.target }}/release/libscreenpipe.dylib screenpipe-${{ env.VERSION }}-${{ matrix.target }}/lib/
tar -czf screenpipe-${{ env.VERSION }}-${{ matrix.target }}.tar.gz -C screenpipe-${{ env.VERSION }}-${{ matrix.target }} .

- name: Calculate SHA256
run: |
Expand Down
3 changes: 2 additions & 1 deletion Formula/screenpipe.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ class Screenpipe < Formula
depends_on "tesseract"

def install
bin.install "screenpipe"
bin.install "screenpipe"
lib.install "libscreenpipe.dylib"
end

test do
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
"ffmpeg/lib/libavutil.59.dylib",
"ffmpeg/lib/libffmpeg.7.dylib",
"ffmpeg/lib/libswresample.5.dylib",
"ffmpeg/lib/libswscale.8.dylib"
"ffmpeg/lib/libswscale.8.dylib",
"../../../../screenpipe-vision/lib/libscreenpipe.dylib"
],
"entitlements": "entitlements.plist",
"signingIdentity": "-",
Expand Down
23 changes: 13 additions & 10 deletions screenpipe-server/src/bin/screenpipe-server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@ use crossbeam::queue::SegQueue;
use dirs::home_dir;
use log::{debug, error, info, LevelFilter};
use screenpipe_audio::{
default_input_device, default_output_device, list_audio_devices, parse_audio_device,
DeviceControl,
default_input_device, list_audio_devices, parse_audio_device, DeviceControl,
};
use screenpipe_vision::OcrEngine;
use std::io::Write;
Expand All @@ -34,6 +33,7 @@ enum CliOcrEngine {
Unstructured,
Tesseract,
WindowsNative,
AppleNative,
}

impl From<CliOcrEngine> for CoreOcrEngine {
Expand All @@ -42,6 +42,7 @@ impl From<CliOcrEngine> for CoreOcrEngine {
CliOcrEngine::Unstructured => CoreOcrEngine::Unstructured,
CliOcrEngine::Tesseract => CoreOcrEngine::Tesseract,
CliOcrEngine::WindowsNative => CoreOcrEngine::WindowsNative,
CliOcrEngine::AppleNative => CoreOcrEngine::AppleNative,
}
}
}
Expand Down Expand Up @@ -233,14 +234,16 @@ async fn main() -> anyhow::Result<()> {
}
// audio output only supported on linux atm
// see https://github.com/louis030195/screen-pipe/pull/106
#[cfg(target_os = "linux")]
if let Ok(output_device) = default_output_device() {
audio_devices.push(Arc::new(output_device.clone()));
let device_control = DeviceControl {
is_running: true,
is_paused: false,
};
devices_status.insert(output_device, device_control);
if cfg!(target_os = "linux") {
use screenpipe_audio::default_output_device;
if let Ok(output_device) = default_output_device() {
audio_devices.push(Arc::new(output_device.clone()));
let device_control = DeviceControl {
is_running: true,
is_paused: false,
};
devices_status.insert(output_device, device_control);
}
}
} else {
// Use specified devices
Expand Down
142 changes: 71 additions & 71 deletions screenpipe-server/src/db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,38 +144,38 @@ impl DatabaseManager {
tx.commit().await?;

// Now, let's chunk the transcription and insert into chunk tables
const CHUNKING_ENGINE: &str = "candle-jina-bert";
match text_chunking_local(transcription).await {
Ok(chunks) => {
info!("Successfully chunked audio transcription into {} chunks", chunks.len());
for chunk in chunks.iter() {
if let Err(e) = self.insert_chunked_text(
audio_chunk_id,
chunk,
Utc::now(),
transcription_engine,
CHUNKING_ENGINE,
ContentSource::Audio,
).await {
error!("Failed to insert chunk into chunked text index: {}", e);
}
}
}
Err(e) => {
error!("Failed to chunk audio transcription: {}", e);
// Fallback to inserting the whole transcription as a single chunk
if let Err(e) = self.insert_chunked_text(
audio_chunk_id,
transcription,
Utc::now(),
transcription_engine,
"No_Chunking",
ContentSource::Audio,
).await {
error!("Failed to insert whole audio transcription into chunked text index: {}", e);
}
}
}
// const CHUNKING_ENGINE: &str = "candle-jina-bert";
// match text_chunking_local(transcription).await {
// Ok(chunks) => {
// info!("Successfully chunked audio transcription into {} chunks", chunks.len());
// for chunk in chunks.iter() {
// if let Err(e) = self.insert_chunked_text(
// audio_chunk_id,
// chunk,
// Utc::now(),
// transcription_engine,
// CHUNKING_ENGINE,
// ContentSource::Audio,
// ).await {
// error!("Failed to insert chunk into chunked text index: {}", e);
// }
// }
// }
// Err(e) => {
// error!("Failed to chunk audio transcription: {}", e);
// // Fallback to inserting the whole transcription as a single chunk
// if let Err(e) = self.insert_chunked_text(
// audio_chunk_id,
// transcription,
// Utc::now(),
// transcription_engine,
// "No_Chunking",
// ContentSource::Audio,
// ).await {
// error!("Failed to insert whole audio transcription into chunked text index: {}", e);
// }
// }
// }

Ok(())
}
Expand Down Expand Up @@ -275,45 +275,45 @@ impl DatabaseManager {
.await
{
Ok(Ok(())) => {
debug!("Successfully inserted OCR text, proceeding to chunking");
// Chunk the text before inserting into chunked text index
const CHUNKING_ENGINE: &str = "candle-jina-bert";
match text_chunking_local(text).await {
Ok(chunks) => {
info!("Successfully chunked text into {} chunks", chunks.len());
for chunk in chunks.iter() {
if let Err(e) = self.insert_chunked_text(
frame_id,
chunk,
Utc::now(),
&format!("{:?}", *ocr_engine),
CHUNKING_ENGINE,
ContentSource::Screen,
).await {
error!("Failed to insert chunk into chunked text index: {}", e);
}
}
}
Err(e) => {
error!("Failed to chunk text: {}", e);
// Fallback to inserting the whole text if chunking fails
debug!("Inserting whole text as a single chunk");
if let Err(e) = self.insert_chunked_text(
frame_id,
text,
Utc::now(),
&format!("{:?}", *ocr_engine),
"No_Chunking",
ContentSource::Screen,
).await {
error!("Failed to insert whole text into chunked text index: {}", e);
}
}
}
info!(
"Successfully completed OCR text insertion for frame_id: {} on attempt {}",
frame_id, attempt
);
// debug!("Successfully inserted OCR text, proceeding to chunking");
// // Chunk the text before inserting into chunked text index
// const CHUNKING_ENGINE: &str = "candle-jina-bert";
// match text_chunking_local(text).await {
// Ok(chunks) => {
// info!("Successfully chunked text into {} chunks", chunks.len());
// for chunk in chunks.iter() {
// if let Err(e) = self.insert_chunked_text(
// frame_id,
// chunk,
// Utc::now(),
// &format!("{:?}", *ocr_engine),
// CHUNKING_ENGINE,
// ContentSource::Screen,
// ).await {
// error!("Failed to insert chunk into chunked text index: {}", e);
// }
// }
// }
// Err(e) => {
// error!("Failed to chunk text: {}", e);
// // Fallback to inserting the whole text if chunking fails
// debug!("Inserting whole text as a single chunk");
// if let Err(e) = self.insert_chunked_text(
// frame_id,
// text,
// Utc::now(),
// &format!("{:?}", *ocr_engine),
// "No_Chunking",
// ContentSource::Screen,
// ).await {
// error!("Failed to insert whole text into chunked text index: {}", e);
// }
// }
// }
// info!(
// "Successfully completed OCR text insertion for frame_id: {} on attempt {}",
// frame_id, attempt
// );
return Ok(());
}
Ok(Err(e)) => {
Expand Down
3 changes: 2 additions & 1 deletion screenpipe-server/src/video.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@ use chrono::Utc;
use image::ImageFormat::{self};
use log::{debug, error, info, warn};
use screenpipe_core::find_ffmpeg_path;
use screenpipe_vision::{continuous_capture, get_monitor, CaptureResult, OcrEngine};
use screenpipe_vision::core::get_monitor;
use screenpipe_vision::{continuous_capture, CaptureResult, OcrEngine};
use std::collections::VecDeque;
use std::path::PathBuf;
use std::process::Stdio;
Expand Down
9 changes: 9 additions & 0 deletions screenpipe-vision/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,13 @@ assert_cmd = "2.0.14"
predicates = "3.1.0"
assert_fs = "1.1.1"


[build-dependencies]
cc = "1.0"

[package.metadata.osx]
framework = ["Vision", "AppKit"]

[[bin]]
name = "screenpipe-vision"
path = "src/bin/screenpipe-vision.rs"
Expand All @@ -78,3 +85,5 @@ harness = false
[target.'cfg(target_os = "windows")'.dependencies]
windows = { version = "0.58", features = ["Graphics_Imaging", "Media_Ocr", "Storage", "Storage_Streams"] }

[target.'cfg(target_os = "macos")'.dependencies]
libc = "0.2"
19 changes: 19 additions & 0 deletions screenpipe-vision/build.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
use std::env;

#[cfg(target_os = "macos")]
fn main() {
let destination = env::var("DESTINATION").unwrap_or_default();

if destination == "brew" {
println!("cargo:rustc-link-arg=-Wl,-rpath,@executable_path/../lib");
} else if destination == "tauri" {
println!("cargo:rustc-link-arg=-Wl,-rpath,@executable_path/../Frameworks");
} else {
println!("cargo:rustc-link-arg=-Wl,-rpath,@executable_path/../../screenpipe-vision/lib");
}

println!("cargo:rustc-link-lib=dylib=screenpipe");
}

#[cfg(not(target_os = "macos"))]
fn main() {}
31 changes: 31 additions & 0 deletions screenpipe-vision/src/apple.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
use image::DynamicImage;
use std::ffi::CStr;
use std::os::raw::{c_char, c_uchar};

#[link(name = "screenpipe")]
extern "C" {
fn perform_ocr(
image_data: *const c_uchar,
length: usize,
width: i32,
height: i32,
) -> *mut c_char;
}

pub fn perform_ocr_apple(image: &DynamicImage) -> String {
let rgba = image.to_rgba8();
let (width, height) = rgba.dimensions();
let raw_data = rgba.as_raw();

unsafe {
let result_ptr = perform_ocr(
raw_data.as_ptr(),
raw_data.len(),
width as i32,
height as i32,
);
let result = CStr::from_ptr(result_ptr).to_string_lossy().into_owned();
libc::free(result_ptr as *mut libc::c_void);
result
}
}
2 changes: 1 addition & 1 deletion screenpipe-vision/src/bin/screenpipe-vision.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use clap::Parser;
use screenpipe_vision::{continuous_capture, get_monitor, OcrEngine};
use screenpipe_vision::{continuous_capture, core::get_monitor, OcrEngine};
use std::{sync::Arc, time::Duration};
use tokio::sync::mpsc::channel;

Expand Down
Loading
Loading