diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 160475ee..821cdf6a 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -72,63 +72,57 @@ jobs: stt_benchmark: name: Run STT benchmark - runs-on: macos-latest + runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - uses: dtolnay/rust-toolchain@stable - name: Install dependencies run: | - brew install cmake openblas lapack - - - name: Run STT benchmarks (MKL) + sudo apt-get update + sudo apt-get install -y ffmpeg tesseract-ocr libtesseract-dev libavformat-dev libavfilter-dev libavdevice-dev ffmpeg libasound2-dev libgtk-3-dev libsoup-3.0-dev libjavascriptcoregtk-4.1-dev libwebkit2gtk-4.1-dev + - name: Run STT benchmarks run: | - cargo bench --bench stt_benchmark --features mkl -- --output-format bencher | tee -a stt_output_mkl.txt - + cargo bench --bench stt_benchmark -- --output-format bencher | tee -a stt_output.txt - name: Upload STT benchmark artifact uses: actions/upload-artifact@v3 with: - name: stt-benchmark-data-macos - path: stt_output_mkl.txt + name: stt-benchmark-data + path: stt_output.txt analyze_benchmarks: - needs: [stt_benchmark] + needs: + [ + apple_ocr_benchmark, + tesseract_ocr_benchmark, + windows_ocr_benchmark, + stt_benchmark, + ] runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 + - name: Download benchmark data + uses: actions/download-artifact@v3 + with: + name: ocr-benchmark-data + path: ./cache/ocr + - name: Download STT benchmark data uses: actions/download-artifact@v3 with: - name: stt-benchmark-data-macos + name: stt-benchmark-data path: ./cache/stt - name: List contents of cache directory run: ls -R ./cache - - name: Analyze STT benchmarks + - name: Analyze OCR benchmarks uses: benchmark-action/github-action-benchmark@v1 with: - name: STT Benchmarks + name: OCR Benchmarks tool: "cargo" - output-file-path: ./cache/stt/stt_output_mkl.txt + output-file-path: ./cache/ocr/ocr_output.txt github-token: ${{ secrets.GH_PAGES_TOKEN }} auto-push: true - alert-threshold: "200%" - comment-on-alert: true - fail-on-alert: true - alert-comment-cc-users: "@louis030195" - - # todo broken - # - name: Analyze STT benchmarks - # uses: benchmark-action/github-action-benchmark@v1 - # with: - # name: STT Benchmarks - # tool: "cargo" - # output-file-path: ./cache/stt/stt_output.txt - # github-token: ${{ secrets.GH_PAGES_TOKEN }} - # auto-push: true - # alert-threshold: "200%" - # comment-on-alert: true - # fail-on-alert: true - # alert-comment-cc-users: "@louis030195" + alert-threshold: "200%" \ No newline at end of file diff --git a/.github/workflows/release-app.yml b/.github/workflows/release-app.yml index 147f48a4..b727ed28 100644 --- a/.github/workflows/release-app.yml +++ b/.github/workflows/release-app.yml @@ -33,18 +33,19 @@ jobs: fail-fast: false matrix: include: - - platform: "macos-latest" # for Arm based macs (M1 and above). + - platform: "macos-latest" args: "--target aarch64-apple-darwin --features metal" target: aarch64-apple-darwin - - platform: "macos-latest" # for Intel based macs. + - platform: "macos-latest" args: "--target x86_64-apple-darwin --features metal" target: x86_64-apple-darwin - - platform: "ubuntu-22.04" # Ubuntu x86_64 - args: "" # TODO CUDA, mkl - - platform: "windows-latest" # Windows x86_64 - args: "--target x86_64-pc-windows-msvc" # TODO CUDA, mkl? --features "openblas" - pre-build-args: "" # --openblas - # windows arm: https://github.com/ahqsoftwares/tauri-ahq-store/blob/2fbc2103c222662b3c6ee0cd71fcde664824f0ef/.github/workflows/publish.yml#L136 + - platform: "ubuntu-22.04" + args: "--features mkl" + target: x86_64-unknown-linux-gnu + - platform: "windows-latest" + args: "--target x86_64-pc-windows-msvc --features mkl" + target: x86_64-pc-windows-msvc + pre-build-args: "" runs-on: ${{ matrix.platform }} steps: @@ -150,6 +151,8 @@ jobs: export PKG_CONFIG_PATH="/usr/local/opt/ffmpeg/lib/pkgconfig:$PKG_CONFIG_PATH" export PKG_CONFIG_ALLOW_CROSS=1 export RUSTFLAGS="-C link-arg=-Wl,-rpath,@executable_path/../Frameworks -C link-arg=-Wl,-rpath,@loader_path/../Frameworks -C link-arg=-Wl,-install_name,@rpath/libscreenpipe.dylib" + elif [[ "${{ matrix.platform }}" == "ubuntu-22.04" || "${{ matrix.platform }}" == "windows-latest" ]]; then + export RUSTFLAGS="-C target-cpu=native" fi cargo build --release ${{ matrix.args }} ls -R target diff --git a/screenpipe-audio/Cargo.toml b/screenpipe-audio/Cargo.toml index 1c759ebe..017d9daa 100644 --- a/screenpipe-audio/Cargo.toml +++ b/screenpipe-audio/Cargo.toml @@ -31,9 +31,9 @@ chrono = { version = "0.4.31", features = ["serde"] } # Local Embeddings + STT # TODO: feature metal, cuda, etc. see https://github.com/huggingface/candle/blob/main/candle-core/Cargo.toml -candle = { workspace = true, features = ["mkl"] } -candle-nn = { workspace = true, features = ["mkl"] } -candle-transformers = { workspace = true, features = ["mkl"] } +candle = { workspace = true } +candle-nn = { workspace = true } +candle-transformers = { workspace = true } vad-rs = "0.1.3" tokenizers = { workspace = true } anyhow = "1.0.86" @@ -80,11 +80,12 @@ criterion = { workspace = true } memory-stats = "1.0" [features] -default = ["mkl"] +default = ["metal"] metal = ["candle/metal", "candle-nn/metal", "candle-transformers/metal"] -cuda = ["candle/cuda", "candle-nn/cuda", "candle-transformers/cuda"] mkl = ["candle/mkl", "candle-nn/mkl", "candle-transformers/mkl"] + + [[bin]] name = "screenpipe-audio" path = "src/bin/screenpipe-audio.rs" diff --git a/screenpipe-audio/benches/stt_benchmark.rs b/screenpipe-audio/benches/stt_benchmark.rs index 66b57f59..6df4fea4 100644 --- a/screenpipe-audio/benches/stt_benchmark.rs +++ b/screenpipe-audio/benches/stt_benchmark.rs @@ -1,12 +1,12 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use memory_stats::memory_stats; +use screenpipe_audio::vad_engine::SileroVad; use screenpipe_audio::{ - stt, AudioInput, AudioTranscriptionEngine, WhisperModel, vad_engine::SileroVad + create_whisper_channel, stt, AudioTranscriptionEngine, VadEngineEnum, WhisperModel, }; +use std::path::PathBuf; use std::sync::Arc; use std::time::Duration; -use std::path::PathBuf; -use std::fs::File; -use std::io::Read; fn criterion_benchmark(c: &mut Criterion) { let audio_transcription_engine = Arc::new(AudioTranscriptionEngine::WhisperTiny); @@ -14,33 +14,57 @@ fn criterion_benchmark(c: &mut Criterion) { let test_file_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")) .join("test_data") .join("selah.mp4"); - let mut audio_data = Vec::new(); - File::open(&test_file_path).unwrap().read_to_end(&mut audio_data).unwrap(); let mut group = c.benchmark_group("whisper_benchmarks"); group.sample_size(10); group.measurement_time(Duration::from_secs(60)); - group.bench_function("stt_mkl", |b| { + group.bench_function("create_whisper_channel", |b| { + b.iter(|| { + let _ = create_whisper_channel( + black_box(audio_transcription_engine.clone()), + black_box(VadEngineEnum::Silero), + None, + ); + }) + }); + + group.bench_function("stt", |b| { b.iter(|| { let mut vad_engine = Box::new(SileroVad::new().unwrap()); - let audio_input = AudioInput { - data: audio_data.clone().into_iter().map(|x| x as f32).collect(), - sample_rate: 16000, - channels: 1, - device: "test".to_string(), - }; let _ = stt( - black_box(&audio_input), + black_box(test_file_path.to_string_lossy().as_ref()), black_box(&whisper_model), black_box(audio_transcription_engine.clone()), - black_box(&mut *vad_engine), - black_box(None), - black_box(&PathBuf::from("test_output")), + &mut *vad_engine, + None, ); }) }); + group.bench_function("memory_usage_stt", |b| { + b.iter_custom(|iters| { + let mut total_duration = Duration::new(0, 0); + for _ in 0..iters { + let start = std::time::Instant::now(); + let before = memory_stats().unwrap().physical_mem; + let mut vad_engine = Box::new(SileroVad::new().unwrap()); + let _ = stt( + test_file_path.to_string_lossy().as_ref(), + &whisper_model, + audio_transcription_engine.clone(), + &mut *vad_engine, + None, + ); + let after = memory_stats().unwrap().physical_mem; + let duration = start.elapsed(); + total_duration += duration; + println!("Memory used: {} bytes", after - before); + } + total_duration + }) + }); + group.finish(); } diff --git a/screenpipe-audio/src/stt.rs b/screenpipe-audio/src/stt.rs index 587fe739..814430e9 100644 --- a/screenpipe-audio/src/stt.rs +++ b/screenpipe-audio/src/stt.rs @@ -88,14 +88,11 @@ impl WhisperModel { } fn get_optimal_device() -> Result { - #[cfg(feature = "mkl")] - { - info!("Using MKL-accelerated CPU"); - Ok(Device::Cpu) - } - #[cfg(not(feature = "mkl"))] - { - info!("Using standard CPU"); + if let Ok(device) = Device::new_metal(0) { + info!("Using Metal GPU"); + Ok(device) + } else { + info!("Metal not available, falling back to CPU"); Ok(Device::Cpu) } }