Skip to content

Commit

Permalink
feat(build): add MKL support for Windows and Linux builds
Browse files Browse the repository at this point in the history
- Update Cargo.toml to include MKL feature
- Modify release-app.yml to use MKL feature for Windows and Linux
- Keep Metal feature for macOS builds
- Set RUSTFLAGS for optimized builds on Windows and Linux

Signed-off-by: David Anyatonwu <[email protected]>
  • Loading branch information
onyedikachi-david committed Sep 16, 2024
1 parent dc37a35 commit f873fba
Show file tree
Hide file tree
Showing 5 changed files with 88 additions and 69 deletions.
56 changes: 25 additions & 31 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,63 +72,57 @@ jobs:

stt_benchmark:
name: Run STT benchmark
runs-on: macos-latest
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@stable

- name: Install dependencies
run: |
brew install cmake openblas lapack
- name: Run STT benchmarks (MKL)
sudo apt-get update
sudo apt-get install -y ffmpeg tesseract-ocr libtesseract-dev libavformat-dev libavfilter-dev libavdevice-dev ffmpeg libasound2-dev libgtk-3-dev libsoup-3.0-dev libjavascriptcoregtk-4.1-dev libwebkit2gtk-4.1-dev
- name: Run STT benchmarks
run: |
cargo bench --bench stt_benchmark --features mkl -- --output-format bencher | tee -a stt_output_mkl.txt
cargo bench --bench stt_benchmark -- --output-format bencher | tee -a stt_output.txt
- name: Upload STT benchmark artifact
uses: actions/upload-artifact@v3
with:
name: stt-benchmark-data-macos
path: stt_output_mkl.txt
name: stt-benchmark-data
path: stt_output.txt

analyze_benchmarks:
needs: [stt_benchmark]
needs:
[
apple_ocr_benchmark,
tesseract_ocr_benchmark,
windows_ocr_benchmark,
stt_benchmark,
]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3

- name: Download benchmark data
uses: actions/download-artifact@v3
with:
name: ocr-benchmark-data
path: ./cache/ocr

- name: Download STT benchmark data
uses: actions/download-artifact@v3
with:
name: stt-benchmark-data-macos
name: stt-benchmark-data
path: ./cache/stt

- name: List contents of cache directory
run: ls -R ./cache

- name: Analyze STT benchmarks
- name: Analyze OCR benchmarks
uses: benchmark-action/github-action-benchmark@v1
with:
name: STT Benchmarks
name: OCR Benchmarks
tool: "cargo"
output-file-path: ./cache/stt/stt_output_mkl.txt
output-file-path: ./cache/ocr/ocr_output.txt
github-token: ${{ secrets.GH_PAGES_TOKEN }}
auto-push: true
alert-threshold: "200%"
comment-on-alert: true
fail-on-alert: true
alert-comment-cc-users: "@louis030195"

# todo broken
# - name: Analyze STT benchmarks
# uses: benchmark-action/github-action-benchmark@v1
# with:
# name: STT Benchmarks
# tool: "cargo"
# output-file-path: ./cache/stt/stt_output.txt
# github-token: ${{ secrets.GH_PAGES_TOKEN }}
# auto-push: true
# alert-threshold: "200%"
# comment-on-alert: true
# fail-on-alert: true
# alert-comment-cc-users: "@louis030195"
alert-threshold: "200%"
19 changes: 11 additions & 8 deletions .github/workflows/release-app.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,18 +33,19 @@ jobs:
fail-fast: false
matrix:
include:
- platform: "macos-latest" # for Arm based macs (M1 and above).
- platform: "macos-latest"
args: "--target aarch64-apple-darwin --features metal"
target: aarch64-apple-darwin
- platform: "macos-latest" # for Intel based macs.
- platform: "macos-latest"
args: "--target x86_64-apple-darwin --features metal"
target: x86_64-apple-darwin
- platform: "ubuntu-22.04" # Ubuntu x86_64
args: "" # TODO CUDA, mkl
- platform: "windows-latest" # Windows x86_64
args: "--target x86_64-pc-windows-msvc" # TODO CUDA, mkl? --features "openblas"
pre-build-args: "" # --openblas
# windows arm: https://github.com/ahqsoftwares/tauri-ahq-store/blob/2fbc2103c222662b3c6ee0cd71fcde664824f0ef/.github/workflows/publish.yml#L136
- platform: "ubuntu-22.04"
args: "--features mkl"
target: x86_64-unknown-linux-gnu
- platform: "windows-latest"
args: "--target x86_64-pc-windows-msvc --features mkl"
target: x86_64-pc-windows-msvc
pre-build-args: ""

runs-on: ${{ matrix.platform }}
steps:
Expand Down Expand Up @@ -150,6 +151,8 @@ jobs:
export PKG_CONFIG_PATH="/usr/local/opt/ffmpeg/lib/pkgconfig:$PKG_CONFIG_PATH"
export PKG_CONFIG_ALLOW_CROSS=1
export RUSTFLAGS="-C link-arg=-Wl,-rpath,@executable_path/../Frameworks -C link-arg=-Wl,-rpath,@loader_path/../Frameworks -C link-arg=-Wl,-install_name,@rpath/libscreenpipe.dylib"
elif [[ "${{ matrix.platform }}" == "ubuntu-22.04" || "${{ matrix.platform }}" == "windows-latest" ]]; then
export RUSTFLAGS="-C target-cpu=native"
fi
cargo build --release ${{ matrix.args }}
ls -R target
Expand Down
11 changes: 6 additions & 5 deletions screenpipe-audio/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ chrono = { version = "0.4.31", features = ["serde"] }

# Local Embeddings + STT
# TODO: feature metal, cuda, etc. see https://github.com/huggingface/candle/blob/main/candle-core/Cargo.toml
candle = { workspace = true, features = ["mkl"] }
candle-nn = { workspace = true, features = ["mkl"] }
candle-transformers = { workspace = true, features = ["mkl"] }
candle = { workspace = true }
candle-nn = { workspace = true }
candle-transformers = { workspace = true }
vad-rs = "0.1.3"
tokenizers = { workspace = true }
anyhow = "1.0.86"
Expand Down Expand Up @@ -80,11 +80,12 @@ criterion = { workspace = true }
memory-stats = "1.0"

[features]
default = ["mkl"]
default = ["metal"]
metal = ["candle/metal", "candle-nn/metal", "candle-transformers/metal"]
cuda = ["candle/cuda", "candle-nn/cuda", "candle-transformers/cuda"]
mkl = ["candle/mkl", "candle-nn/mkl", "candle-transformers/mkl"]



[[bin]]
name = "screenpipe-audio"
path = "src/bin/screenpipe-audio.rs"
Expand Down
58 changes: 41 additions & 17 deletions screenpipe-audio/benches/stt_benchmark.rs
Original file line number Diff line number Diff line change
@@ -1,46 +1,70 @@
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use memory_stats::memory_stats;
use screenpipe_audio::vad_engine::SileroVad;
use screenpipe_audio::{
stt, AudioInput, AudioTranscriptionEngine, WhisperModel, vad_engine::SileroVad
create_whisper_channel, stt, AudioTranscriptionEngine, VadEngineEnum, WhisperModel,
};
use std::path::PathBuf;
use std::sync::Arc;
use std::time::Duration;
use std::path::PathBuf;
use std::fs::File;
use std::io::Read;

fn criterion_benchmark(c: &mut Criterion) {
let audio_transcription_engine = Arc::new(AudioTranscriptionEngine::WhisperTiny);
let whisper_model = WhisperModel::new(audio_transcription_engine.clone()).unwrap();
let test_file_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("test_data")
.join("selah.mp4");
let mut audio_data = Vec::new();
File::open(&test_file_path).unwrap().read_to_end(&mut audio_data).unwrap();

let mut group = c.benchmark_group("whisper_benchmarks");
group.sample_size(10);
group.measurement_time(Duration::from_secs(60));

group.bench_function("stt_mkl", |b| {
group.bench_function("create_whisper_channel", |b| {
b.iter(|| {
let _ = create_whisper_channel(
black_box(audio_transcription_engine.clone()),
black_box(VadEngineEnum::Silero),
None,
);
})
});

group.bench_function("stt", |b| {
b.iter(|| {
let mut vad_engine = Box::new(SileroVad::new().unwrap());
let audio_input = AudioInput {
data: audio_data.clone().into_iter().map(|x| x as f32).collect(),
sample_rate: 16000,
channels: 1,
device: "test".to_string(),
};
let _ = stt(
black_box(&audio_input),
black_box(test_file_path.to_string_lossy().as_ref()),
black_box(&whisper_model),
black_box(audio_transcription_engine.clone()),
black_box(&mut *vad_engine),
black_box(None),
black_box(&PathBuf::from("test_output")),
&mut *vad_engine,
None,
);
})
});

group.bench_function("memory_usage_stt", |b| {
b.iter_custom(|iters| {
let mut total_duration = Duration::new(0, 0);
for _ in 0..iters {
let start = std::time::Instant::now();
let before = memory_stats().unwrap().physical_mem;
let mut vad_engine = Box::new(SileroVad::new().unwrap());
let _ = stt(
test_file_path.to_string_lossy().as_ref(),
&whisper_model,
audio_transcription_engine.clone(),
&mut *vad_engine,
None,
);
let after = memory_stats().unwrap().physical_mem;
let duration = start.elapsed();
total_duration += duration;
println!("Memory used: {} bytes", after - before);
}
total_duration
})
});

group.finish();
}

Expand Down
13 changes: 5 additions & 8 deletions screenpipe-audio/src/stt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,14 +88,11 @@ impl WhisperModel {
}

fn get_optimal_device() -> Result<Device> {
#[cfg(feature = "mkl")]
{
info!("Using MKL-accelerated CPU");
Ok(Device::Cpu)
}
#[cfg(not(feature = "mkl"))]
{
info!("Using standard CPU");
if let Ok(device) = Device::new_metal(0) {
info!("Using Metal GPU");
Ok(device)
} else {
info!("Metal not available, falling back to CPU");
Ok(Device::Cpu)
}
}
Expand Down

0 comments on commit f873fba

Please sign in to comment.