diff --git a/Cargo.toml b/Cargo.toml index b1d05cb5..4044aca1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,6 +33,7 @@ tracing-subscriber = "0.3.18" tokio = { version = "1.15", features = ["full", "tracing"] } hf-hub = "0.3.0" crossbeam = "0.8.4" +image = "0.25" # dev criterion = { version = "0.5.1", features = ["async_tokio"] } diff --git a/screenpipe-integrations/Cargo.toml b/screenpipe-integrations/Cargo.toml index c1c715ef..5d80f46e 100644 --- a/screenpipe-integrations/Cargo.toml +++ b/screenpipe-integrations/Cargo.toml @@ -8,8 +8,11 @@ license = { workspace = true } edition = { workspace = true } [dependencies] -curl = "0.4.46" +image = { workspace = true } +reqwest = { version = "0.11", features = ["multipart", "json"] } +rusty-tesseract = { git = "https://github.com/louis030195/rusty-tesseract.git", branch = "main" } serde_json = "1.0" +tokio = { version = "1.0", features = ["full"] } uuid = { version = "1.3", features = ["v4"] } chrono = "0.4" log = "0.4" \ No newline at end of file diff --git a/screenpipe-integrations/src/friend_wearable.rs b/screenpipe-integrations/src/friend_wearable.rs index 73e5ccb7..2ffdb5ef 100644 --- a/screenpipe-integrations/src/friend_wearable.rs +++ b/screenpipe-integrations/src/friend_wearable.rs @@ -1,6 +1,5 @@ -use curl::easy::{Easy, List}; +use reqwest::Client; use serde_json::{json, Value}; -use std::io::Read; use uuid::Uuid; use chrono::Utc; use log::debug; @@ -31,7 +30,7 @@ fn decode_from_uuid(uuid: Uuid) -> (String, String) { (source.to_string(), parts.get(1).unwrap_or(&"").to_string()) } -pub fn send_data_to_friend_wearable( +pub async fn send_data_to_friend_wearable( memory_source: String, memory_id: String, memory_text: String, @@ -39,11 +38,9 @@ pub fn send_data_to_friend_wearable( ) -> Result> { let request_id = encode_to_uuid(&memory_source, &memory_id); - // Use the provided UID instead of a hardcoded value let friend_user_id = uid.to_string(); let endpoint = "https://webhook-test.com/c46d38536e2851a100e3c230386ae238"; - // Generate timestamp let memory_timestamp = Utc::now().to_rfc3339(); let payload = json!({ @@ -56,35 +53,21 @@ pub fn send_data_to_friend_wearable( }); debug!("Sending request to friend endpoint: {}", payload); - let data = payload.to_string().into_bytes(); - let mut handle = Easy::new(); - let mut response = Vec::new(); - let mut headers = List::new(); - headers.append("Content-Type: application/json")?; - handle.http_headers(headers)?; + let client = Client::new(); + let response = client.post(endpoint) + .json(&payload) + .send() + .await?; - handle.url(endpoint)?; - handle.post(true)?; - handle.post_field_size(data.len() as u64)?; + let status = response.status(); + let response_body = response.text().await?; - { - let mut transfer = handle.transfer(); - transfer.read_function(|buf| { - Ok(data.as_slice().read(buf).unwrap_or(0)) - })?; - transfer.write_function(|new_data| { - response.extend_from_slice(new_data); - Ok(new_data.len()) - })?; - transfer.perform()?; - } - - let response_body = String::from_utf8(response)?; - let response_json: Value = serde_json::from_str(&response_body)?; - - match handle.response_code()? { - 200 => Ok(response_json), + match status.as_u16() { + 200 => { + let response_json: Value = serde_json::from_str(&response_body)?; + Ok(response_json) + }, 400 => Err("Bad Request: Invalid data sent".into()), 401 => Err("Unauthorized: Authentication failed".into()), 500 => Err("Server Error: Please try again later".into()), diff --git a/screenpipe-integrations/src/lib.rs b/screenpipe-integrations/src/lib.rs index 3441469c..2535f712 100644 --- a/screenpipe-integrations/src/lib.rs +++ b/screenpipe-integrations/src/lib.rs @@ -1 +1,2 @@ -pub mod friend_wearable; \ No newline at end of file +pub mod friend_wearable; +pub mod unstructured_ocr; \ No newline at end of file diff --git a/screenpipe-integrations/src/unstructured_ocr.rs b/screenpipe-integrations/src/unstructured_ocr.rs new file mode 100644 index 00000000..bb427855 --- /dev/null +++ b/screenpipe-integrations/src/unstructured_ocr.rs @@ -0,0 +1,65 @@ +use image::{DynamicImage, ImageEncoder, codecs::png::PngEncoder}; +use reqwest::multipart::{Form, Part}; +use rusty_tesseract::DataOutput; +use serde_json; +use std::collections::HashMap; +use std::io::Cursor; +use std::sync::Arc; + +pub async fn perform_ocr_cloud(image: &Arc) -> (String, DataOutput, String) { + let api_key = "ZUxfTRkf6lRgHZDXPHlFaSoOKAEbwV".to_string(); + let api_url = "https://api.unstructuredapp.io/general/v0/general".to_string(); + + let mut buffer = Vec::new(); + let mut cursor = Cursor::new(&mut buffer); + PngEncoder::new(&mut cursor) + .write_image( + image.as_bytes(), + image.width(), + image.height(), + image.color().into(), + ) + .unwrap(); + + let part = Part::bytes(buffer) + .file_name("image.png".to_string()) + .mime_str("image/png") + .unwrap(); + + let form = Form::new() + .part("files", part) + .text("strategy", "auto") + .text("coordinates", "true"); + + let client = reqwest::Client::new(); + let response = client + .post(&api_url) + .header("accept", "application/json") + .header("unstructured-api-key", &api_key) + .multipart(form) + .send() + .await + .unwrap(); + + let response_text = if response.status().is_success() { + response.text().await.unwrap() + } else { + panic!("Error: {}", response.status()); + }; + + let json_output = response_text.clone(); + let data_output = DataOutput { + data: Vec::new(), + output: String::new(), + }; + + let parsed_response: Vec> = + serde_json::from_str(&response_text).unwrap(); + let text = parsed_response + .iter() + .filter_map(|item| item.get("text").and_then(|v| v.as_str())) + .collect::>() + .join(" "); + + (text, data_output, json_output) +} \ No newline at end of file diff --git a/screenpipe-server/Cargo.toml b/screenpipe-server/Cargo.toml index 60ecce85..d22086c9 100644 --- a/screenpipe-server/Cargo.toml +++ b/screenpipe-server/Cargo.toml @@ -18,7 +18,7 @@ screenpipe-audio = { path = "../screenpipe-audio" } screenpipe-core = { path = "../screenpipe-core" } # Image processing -image = "0.25.0" +image = { workspace = true } # OCR # rusty-tesseract = "1.1.10" diff --git a/screenpipe-server/src/core.rs b/screenpipe-server/src/core.rs index c8613ff5..71535c93 100644 --- a/screenpipe-server/src/core.rs +++ b/screenpipe-server/src/core.rs @@ -171,7 +171,7 @@ async fn record_video( frame_id.to_string(), frame.text.clone(), uid, // Pass the UID to the function - ) { + ).await { error!("Failed to send screen data to friend wearable: {}", e); } else { debug!("Sent screen data to friend wearable for frame {}", frame_id); @@ -361,7 +361,7 @@ async fn process_audio_result( audio_chunk_id.to_string(), transcription.clone(), uid, // Pass the UID to the function - ) { + ).await { error!("Failed to send data to friend wearable: {}", e); } else { debug!( @@ -377,4 +377,4 @@ async fn process_audio_result( result.input.device, e ), } -} +} \ No newline at end of file diff --git a/screenpipe-vision/Cargo.toml b/screenpipe-vision/Cargo.toml index c45f21ed..a9274e8f 100644 --- a/screenpipe-vision/Cargo.toml +++ b/screenpipe-vision/Cargo.toml @@ -25,7 +25,7 @@ num_cpus = "1.0" tokio = { workspace = true } # Image processing -image = "0.25.0" +image = { workspace = true } # OCR # rusty-tesseract = "1.1.10" @@ -56,6 +56,9 @@ strsim = "0.10.0" clap = { version = "4.0", features = ["derive"] } # tokio = { version = "1", features = ["full"] } +# Integrations +screenpipe-integrations = { path = "../screenpipe-integrations" } + [dev-dependencies] tempfile = "3.3.0" criterion = { workspace = true } diff --git a/screenpipe-vision/src/core.rs b/screenpipe-vision/src/core.rs index 1f89987a..1b88d075 100644 --- a/screenpipe-vision/src/core.rs +++ b/screenpipe-vision/src/core.rs @@ -15,11 +15,11 @@ use xcap::{Monitor, Window}; use crate::utils::perform_ocr_windows; use crate::utils::OcrEngine; use crate::utils::{ - capture_screenshot, compare_with_previous_image, perform_ocr_cloud, perform_ocr_tesseract, + capture_screenshot, compare_with_previous_image, perform_ocr_tesseract, save_text_files, }; use rusty_tesseract::{Data, DataOutput}; // Add this import - +use screenpipe_integrations::unstructured_ocr::perform_ocr_cloud; pub struct DataOutputWrapper { pub data_output: rusty_tesseract::tesseract::output_data::DataOutput, @@ -307,4 +307,4 @@ pub async fn process_ocr_task( frame_number, _duration ); Ok(()) -} +} \ No newline at end of file diff --git a/screenpipe-vision/src/utils.rs b/screenpipe-vision/src/utils.rs index 6041eb0b..3fc00616 100644 --- a/screenpipe-vision/src/utils.rs +++ b/screenpipe-vision/src/utils.rs @@ -1,16 +1,12 @@ use crate::core::MaxAverageFrame; // Assuming core.rs is in the same crate under the `core` module -use image::codecs::png::PngEncoder; use image::DynamicImage; -use image::ImageEncoder; use image_compare::{Algorithm, Metric, Similarity}; // Added import for Similarity use log::{debug, error}; -use reqwest::multipart::{Form, Part}; use rusty_tesseract::{Args, DataOutput, Image}; // Added import for Args, Image, DataOutput use serde_json; use std::collections::HashMap; use std::fs::{self, File}; use std::hash::{DefaultHasher, Hash, Hasher}; -use std::io::Cursor; // Import Cursor for wrapping Vec use std::io::Write; use std::path::PathBuf; use std::sync::Arc; @@ -245,64 +241,6 @@ pub async fn save_text_files( } } -pub async fn perform_ocr_cloud(image: &DynamicImage) -> (String, DataOutput, String) { - let api_key = "CDMIN4mAq1TDtufPY2HpIIRdJkHqi6".to_string(); - let api_url = "https://api.unstructuredapp.io/general/v0/general".to_string(); - - let mut buffer = Vec::new(); - let mut cursor = Cursor::new(&mut buffer); // Wrap Vec in Cursor - PngEncoder::new(&mut cursor) - .write_image( - image.as_bytes(), - image.width(), - image.height(), - image.color().into(), // Convert ColorType to ExtendedColorType - ) - .unwrap(); - - let part = Part::bytes(buffer) - .file_name("image.png".to_string()) - .mime_str("image/png") - .unwrap(); - - let form = Form::new() - .part("files", part) - .text("strategy", "auto") - .text("coordinates", "true"); - - let client = reqwest::Client::new(); - let response = client - .post(&api_url) - .header("accept", "application/json") - .header("unstructured-api-key", &api_key) - .multipart(form) - .send() - .await - .unwrap(); - - let response_text = if response.status().is_success() { - response.text().await.unwrap() - } else { - panic!("Error: {}", response.status()); - }; - - let json_output = response_text.clone(); - let data_output = DataOutput { - data: Vec::new(), - output: String::new(), - }; // Initialize blank DataOutput - - let parsed_response: Vec> = - serde_json::from_str(&response_text).unwrap(); - let text = parsed_response - .iter() - .filter_map(|item| item.get("text").and_then(|v| v.as_str())) - .collect::>() - .join(" "); - - (text, data_output, json_output) -} - #[cfg(target_os = "windows")] pub async fn perform_ocr_windows(image: &DynamicImage) -> (String, DataOutput, String) { use windows::{ @@ -351,4 +289,4 @@ pub async fn perform_ocr_windows(image: &DynamicImage) -> (String, DataOutput, S .to_string(); (text, data_output, json_output) -} +} \ No newline at end of file