From d59ea839453cb81d5f512cce35a7c707fa90a331 Mon Sep 17 00:00:00 2001 From: Louis Beaumont Date: Sat, 21 Sep 2024 19:23:53 -0700 Subject: [PATCH] chore: memory optimisation --- Cargo.toml | 2 +- screenpipe-app-tauri/src-tauri/Cargo.toml | 2 +- screenpipe-audio/src/core.rs | 2 +- screenpipe-audio/src/stt.rs | 17 +++++++++-------- 4 files changed, 12 insertions(+), 11 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 69fcb1ceb..ed64b37e5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,7 +13,7 @@ resolver = "2" [workspace.package] -version = "0.1.88" +version = "0.1.89" authors = ["louis030195 "] description = "" repository = "https://github.com/mediar-ai/screenpipe" diff --git a/screenpipe-app-tauri/src-tauri/Cargo.toml b/screenpipe-app-tauri/src-tauri/Cargo.toml index b2d0e1757..4af23f9d0 100644 --- a/screenpipe-app-tauri/src-tauri/Cargo.toml +++ b/screenpipe-app-tauri/src-tauri/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "screenpipe-app" -version = "0.2.61" +version = "0.2.62" description = "" authors = ["you"] license = "" diff --git a/screenpipe-audio/src/core.rs b/screenpipe-audio/src/core.rs index 18470c67d..dd4d9d4c0 100644 --- a/screenpipe-audio/src/core.rs +++ b/screenpipe-audio/src/core.rs @@ -297,7 +297,7 @@ pub async fn record_and_transcribe( let data = audio_data.lock().await; debug!("Sending audio of length {} to audio model", data.len()); if let Err(e) = whisper_sender.send(AudioInput { - data: data.clone(), + data: Arc::new(data.to_vec()), device: audio_device.clone(), sample_rate, channels, diff --git a/screenpipe-audio/src/stt.rs b/screenpipe-audio/src/stt.rs index b125ac381..15374dbd7 100644 --- a/screenpipe-audio/src/stt.rs +++ b/screenpipe-audio/src/stt.rs @@ -49,7 +49,7 @@ async fn transcribe_with_deepgram( let spec = WavSpec { channels: 1, sample_rate: match sample_rate { - 88200 => 16000, // Deepgram expects 16kHz for 88.2kHz + 88200 => 16000, // Deepgram expects 16kHz for 88.2kHz _ => sample_rate / 3, // Fallback for other sample rates }, bits_per_sample: 32, @@ -172,16 +172,17 @@ pub async fn stt( let mut mel_filters = vec![0f32; mel_bytes.len() / 4]; ::read_f32_into(mel_bytes, &mut mel_filters); - let mut audio_data = audio_input.data.clone(); - if audio_input.sample_rate != m::SAMPLE_RATE as u32 { + let audio_data = if audio_input.sample_rate != m::SAMPLE_RATE as u32 { info!( "device: {}, resampling from {} Hz to {} Hz", audio_input.device, audio_input.sample_rate, m::SAMPLE_RATE ); - audio_data = resample(audio_data, audio_input.sample_rate, m::SAMPLE_RATE as u32)?; - } + resample(audio_input.data.as_ref(), audio_input.sample_rate, m::SAMPLE_RATE as u32)? + } else { + audio_input.data.as_ref().to_vec() + }; let frame_size = 1600; // 100ms frame size for 16kHz audio let mut speech_frames = Vec::new(); @@ -378,7 +379,7 @@ pub async fn stt( Ok((transcription?, file_path_clone)) } -fn resample(input: Vec, from_sample_rate: u32, to_sample_rate: u32) -> Result> { +fn resample(input: &[f32], from_sample_rate: u32, to_sample_rate: u32) -> Result> { debug!("Resampling audio"); let params = SincInterpolationParameters { sinc_len: 256, @@ -396,7 +397,7 @@ fn resample(input: Vec, from_sample_rate: u32, to_sample_rate: u32) -> Resu 1, )?; - let waves_in = vec![input]; + let waves_in = vec![input.to_vec()]; debug!("Performing resampling"); let waves_out = resampler.process(&waves_in, None)?; debug!("Resampling complete"); @@ -405,7 +406,7 @@ fn resample(input: Vec, from_sample_rate: u32, to_sample_rate: u32) -> Resu #[derive(Debug, Clone)] pub struct AudioInput { - pub data: Vec, + pub data: Arc>, pub sample_rate: u32, pub channels: u16, pub device: Arc,