-
Notifications
You must be signed in to change notification settings - Fork 504
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
fc1356f
commit a585f9b
Showing
9 changed files
with
180 additions
and
194 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
use image::DynamicImage; | ||
use std::ffi::{CStr, CString}; | ||
use std::os::raw::{c_char, c_uchar}; | ||
|
||
#[link(name = "ocr")] | ||
extern "C" { | ||
fn perform_ocr( | ||
image_data: *const c_uchar, | ||
length: usize, | ||
width: i32, | ||
height: i32, | ||
) -> *mut c_char; | ||
} | ||
|
||
pub fn perform_ocr_apple(image: &DynamicImage) -> String { | ||
let rgba = image.to_rgba8(); | ||
let (width, height) = rgba.dimensions(); | ||
let raw_data = rgba.as_raw(); | ||
|
||
unsafe { | ||
let result_ptr = perform_ocr( | ||
raw_data.as_ptr(), | ||
raw_data.len(), | ||
width as i32, | ||
height as i32, | ||
); | ||
let result = CStr::from_ptr(result_ptr).to_string_lossy().into_owned(); | ||
libc::free(result_ptr as *mut libc::c_void); | ||
result | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,7 @@ | ||
pub mod apple; | ||
pub mod core; | ||
pub mod utils; | ||
pub use core::{continuous_capture, get_monitor, process_ocr_task, CaptureResult}; | ||
#[cfg(target_os = "macos")] | ||
pub use apple::perform_ocr_apple; | ||
pub use core::{continuous_capture, process_ocr_task, CaptureResult, ControlMessage}; | ||
pub use utils::{perform_ocr_tesseract, OcrEngine}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
import CoreGraphics | ||
import Foundation | ||
import Vision | ||
|
||
@_cdecl("perform_ocr") | ||
public func performOCR(imageData: UnsafePointer<UInt8>, length: Int, width: Int, height: Int) | ||
-> UnsafeMutablePointer<CChar>? { | ||
|
||
print("Attempting to create image from raw data") | ||
print("Image dimensions: \(width)x\(height)") | ||
|
||
guard let dataProvider = CGDataProvider(data: Data(bytes: imageData, count: length) as CFData) | ||
else { | ||
print("Failed to create CGDataProvider.") | ||
return strdup("Error: Failed to create CGDataProvider") | ||
} | ||
|
||
guard | ||
let cgImage = CGImage( | ||
width: width, | ||
height: height, | ||
bitsPerComponent: 8, | ||
bitsPerPixel: 32, | ||
bytesPerRow: width * 4, | ||
space: CGColorSpaceCreateDeviceRGB(), | ||
bitmapInfo: CGBitmapInfo(rawValue: CGImageAlphaInfo.premultipliedLast.rawValue), | ||
provider: dataProvider, | ||
decode: nil, | ||
shouldInterpolate: false, | ||
intent: .defaultIntent | ||
) | ||
else { | ||
print("Failed to create CGImage.") | ||
return strdup("Error: Failed to create CGImage") | ||
} | ||
|
||
print("CGImage created successfully.") | ||
|
||
let semaphore = DispatchSemaphore(value: 0) | ||
var ocrResult = "" | ||
|
||
let request = VNRecognizeTextRequest { request, error in | ||
defer { semaphore.signal() } | ||
|
||
if let error = error { | ||
print("Error in text recognition request: \(error)") | ||
ocrResult = "Error: \(error.localizedDescription)" | ||
return | ||
} | ||
|
||
guard let observations = request.results as? [VNRecognizedTextObservation] else { | ||
print("Failed to process image or no text found.") | ||
ocrResult = "Error: Failed to process image or no text found" | ||
return | ||
} | ||
|
||
print("Number of text observations: \(observations.count)") | ||
|
||
for (index, observation) in observations.enumerated() { | ||
guard let topCandidate = observation.topCandidates(1).first else { | ||
print("No top candidate for observation \(index)") | ||
continue | ||
} | ||
ocrResult += "\(topCandidate.string)\n" | ||
} | ||
} | ||
|
||
request.recognitionLevel = .accurate | ||
|
||
let handler = VNImageRequestHandler(cgImage: cgImage, options: [:]) | ||
do { | ||
print("Performing OCR...") | ||
try handler.perform([request]) | ||
} catch { | ||
print("Failed to perform OCR: \(error)") | ||
return strdup("Error: Failed to perform OCR - \(error.localizedDescription)") | ||
} | ||
|
||
semaphore.wait() | ||
|
||
return strdup(ocrResult.isEmpty ? "No text found" : ocrResult) | ||
} | ||
|
||
// swiftc -emit-library -o screenpipe-vision/lib/libocr.dylib screenpipe-vision/src/ocr.swift | ||
// or | ||
// swiftc -emit-library -o /usr/local/lib/libocr.dylib screenpipe-vision/src/ocr.swift |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.