fix: new db data fields, snake_case warnings, ci cli, ci app, switch …

…from mp3 to mp4 for audio encoding (same compute/storage effect, less dependency hell), fix tests, fix all
mediar-ai · Jul 23, 2024 · dcd5773 · dcd5773
1 parent df2a11d
commit dcd5773
Show file tree

Hide file tree

Showing 22 changed files with 156 additions and 258 deletions.
diff --git a/.github/workflows/release-cli.yml b/.github/workflows/release-cli.yml
@@ -37,14 +37,12 @@ jobs:
       - name: Set up pkg-config for cross-compilation
         run: |
           if [ "${{ matrix.target }}" = "aarch64-apple-darwin" ]; then
-            export PKG_CONFIG_SYSROOT_DIR=/opt/homebrew
-            export PKG_CONFIG_PATH=/opt/homebrew/lib/pkgconfig
+            echo "PKG_CONFIG_PATH=/opt/homebrew/lib/pkgconfig" >> $GITHUB_ENV
+            echo "PKG_CONFIG_LIBDIR=/opt/homebrew/lib/pkgconfig" >> $GITHUB_ENV
           else
-            export PKG_CONFIG_SYSROOT_DIR=/usr/local
-            export PKG_CONFIG_PATH=/usr/local/lib/pkgconfig
+            echo "PKG_CONFIG_PATH=/usr/local/lib/pkgconfig" >> $GITHUB_ENV
+            echo "PKG_CONFIG_LIBDIR=/usr/local/lib/pkgconfig" >> $GITHUB_ENV
           fi
-          echo "PKG_CONFIG_SYSROOT_DIR=$PKG_CONFIG_SYSROOT_DIR" >> $GITHUB_ENV
-          echo "PKG_CONFIG_PATH=$PKG_CONFIG_PATH" >> $GITHUB_ENV
 
       - name: Build with Metal feature
         run: |
@@ -148,34 +146,30 @@ jobs:
           profile: minimal
           override: true
 
-      - name: Install FFmpeg
-        run: |
-          choco install ffmpeg
-
-      - name: Install pkg-config
+      - name: Download FFmpeg
         run: |
-          choco install pkgconfiglite
+          $ProgressPreference = 'SilentlyContinue'
+          Invoke-WebRequest -Uri "https://github.com/BtbN/FFmpeg-Builds/releases/download/latest/ffmpeg-master-latest-win64-gpl-shared.zip" -OutFile "ffmpeg.zip"
+          Expand-Archive -Path "ffmpeg.zip" -DestinationPath "ffmpeg"
+          Move-Item -Path "ffmpeg\ffmpeg-master-latest-win64-gpl-shared" -Destination "C:\ffmpeg"
 
-      - name: Set up pkg-config
+      - name: Set up environment
         run: |
-          echo "PKG_CONFIG_PATH=C:\ProgramData\chocolatey\lib\pkgconfiglite\tools\pkg-config\lib\pkgconfig" >> $GITHUB_ENV
-          echo "C:\ProgramData\chocolatey\lib\pkgconfiglite\tools" >> $GITHUB_PATH
+          echo "C:\ffmpeg\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
+          echo "FFMPEG_DIR=C:\ffmpeg" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
+          echo "PKG_CONFIG_PATH=C:\ffmpeg\lib\pkgconfig" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
 
       - name: Build release
         run: |
           cargo build --release --verbose
 
-      - name: Download FFmpeg
-        run: |
-          $ProgressPreference = 'SilentlyContinue'
-          Invoke-WebRequest -Uri "https://github.com/BtbN/FFmpeg-Builds/releases/download/latest/ffmpeg-master-latest-win64-gpl.zip" -OutFile "ffmpeg.zip"
-          Expand-Archive -Path "ffmpeg.zip" -DestinationPath "ffmpeg"
-          Move-Item -Path "ffmpeg\ffmpeg-master-latest-win64-gpl\bin\*" -Destination "target\release\"
-
       - name: Create deployment package
         run: |
           $VERSION = $env:GITHUB_REF -replace 'refs/tags/v', ''
-          Compress-Archive -Path target\release\screenpipe.exe, target\release\ffmpeg.exe, target\release\ffprobe.exe -DestinationPath screenpipe-$VERSION-x86_64-pc-windows-msvc.zip
+          New-Item -ItemType Directory -Path "screenpipe-win64"
+          Copy-Item "target\release\screenpipe.exe" -Destination "screenpipe-win64"
+          Copy-Item "C:\ffmpeg\bin\*.dll" -Destination "screenpipe-win64"
+          Compress-Archive -Path "screenpipe-win64" -DestinationPath "screenpipe-$VERSION-x86_64-pc-windows-msvc.zip"
 
       - name: Upload Artifact
         uses: actions/upload-artifact@v2

diff --git a/README.md b/README.md
@@ -251,7 +251,7 @@ ffplay "data/2024-07-12_01-14-14.mp4"
 Play a sample audio_recording from the database</summary>
 
 ```bash
-ffplay "data/Display 1 (output)_2024-07-12_01-14-11.mp3"
+ffplay "data/Display 1 (output)_2024-07-12_01-14-11.mp4"
 ```
 </details>
 

diff --git a/examples/apps/screenpipe-app-tauri/scripts/pre_build.js b/examples/apps/screenpipe-app-tauri/scripts/pre_build.js
@@ -53,7 +53,6 @@ const config = {
 			'libasound2-dev', // cpal
 			'libomp-dev', // OpenMP in ggml.ai
 			'libstdc++-12-dev', //ROCm
-			'libmp3lame-dev', // MP3 support
 		],
 	},
 	macos: {
@@ -78,7 +77,6 @@ if (platform == 'linux') {
 	if (hasFeature('opencl')) {
 		config.linux.aptPackages.push('libclblast-dev')
 	}
-	config.linux.aptPackages.push('libmp3lame0')  // Add MP3 support
 	for (const name of config.linux.aptPackages) {
 		await $`sudo apt-get install -y ${name}`
 	}
@@ -120,44 +118,14 @@ if (platform == 'windows') {
 }
 
 /* ########## macOS ########## */
-// if (platform == 'macos') {
-// 	// Install lame using Homebrew
-// 	await $`brew install lame`
-
-// 	// Setup FFMPEG
-// 	if (!(await fs.exists(config.ffmpegRealname))) {
-// 		await $`wget -nc --show-progress ${config.macos.ffmpegUrl} -O ${config.macos.ffmpegName}.tar.xz`
-// 		await $`tar xf ${config.macos.ffmpegName}.tar.xz`
-// 		await $`mv ${config.macos.ffmpegName} ${config.ffmpegRealname}`
-// 		await $`rm ${config.macos.ffmpegName}.tar.xz`
-// 	}
-
-// 	// Copy lame to ffmpeg ! NEED SUDO
-// 	await $`sudo cp -r /opt/homebrew/opt/lame/lib/* ${config.ffmpegRealname}/lib/`
-
-// 	// Set the DYLD_LIBRARY_PATH to include the FFmpeg lib directory
-// 	await fs.appendFile(process.env.DYLD_LIBRARY_PATH, path.join(cwd, config.ffmpegRealname, 'lib'))
-// }
 if (platform == 'macos') {
-	// Install FFmpeg and lame using Homebrew
-	await $`brew install ffmpeg lame`
-
-	// Set FFmpeg path to Homebrew's installation
-	config.ffmpegRealname = '/opt/homebrew/opt/ffmpeg'
-
-	// Create a symlink if it doesn't exist
-	if (!(await fs.exists(path.join(cwd, 'ffmpeg')))) {
-		await $`ln -s ${config.ffmpegRealname} ${path.join(cwd, 'ffmpeg')}`
+	// Setup FFMPEG
+	if (!(await fs.exists(config.ffmpegRealname))) {
+		await $`wget -nc --show-progress ${config.macos.ffmpegUrl} -O ${config.macos.ffmpegName}.tar.xz`
+		await $`tar xf ${config.macos.ffmpegName}.tar.xz`
+		await $`mv ${config.macos.ffmpegName} ${config.ffmpegRealname}`
+		await $`rm ${config.macos.ffmpegName}.tar.xz`
 	}
-
-	// Update the exports object
-	exports.ffmpeg = path.join(cwd, 'ffmpeg')
-
-	// Ensure DYLD_LIBRARY_PATH includes FFmpeg lib directory
-	const ffmpegLibPath = path.join(config.ffmpegRealname, 'lib')
-	process.env.DYLD_LIBRARY_PATH = `${process.env.DYLD_LIBRARY_PATH || ''}:${ffmpegLibPath}`
-
-	console.log(`FFmpeg installed and linked. Path: ${exports.ffmpeg}`)
 }
 
 // Nvidia

diff --git a/examples/apps/screenpipe-app-tauri/src-tauri/tauri.linux.conf.json b/examples/apps/screenpipe-app-tauri/src-tauri/tauri.linux.conf.json
@@ -4,8 +4,7 @@
             "deb": {
                 "depends": [
                     "ffmpeg",
-                    "libopenblas-dev",
-                    "libmp3lame0"
+                    "libopenblas-dev"
                 ]
             }
         }

diff --git a/examples/apps/screenpipe-app-tauri/src-tauri/tauri.macos.conf.json b/examples/apps/screenpipe-app-tauri/src-tauri/tauri.macos.conf.json
@@ -9,8 +9,7 @@
                 "ffmpeg/lib/libavutil.59.dylib",
                 "ffmpeg/lib/libffmpeg.7.dylib",
                 "ffmpeg/lib/libswresample.5.dylib",
-                "ffmpeg/lib/libswscale.8.dylib",
-                "ffmpeg/lib/libmp3lame.0.dylib"
+                "ffmpeg/lib/libswscale.8.dylib"
             ],
             "entitlements": "entitlements.plist",
             "signingIdentity": "-",

diff --git a/examples/ts/vercel-ai-chatbot/lib/chat/actions.tsx b/examples/ts/vercel-ai-chatbot/lib/chat/actions.tsx
@@ -107,6 +107,7 @@ async function submitUserMessage(content: string) {
         - Use the date & time args smartly to get the most relevant results. Current user date & time is ${new Date().toISOString()}
         - Generate 3-5 queries to get the most relevant results. Use a single keyword that would match the user intent per query
         - Use only one word per query (in the q field)
+        - Make sure to answer the user question, ignore the data in your prompt not relevant to the user question
         `,
         parameters: z.object({
           queries: z
@@ -187,7 +188,7 @@ async function submitUserMessage(content: string) {
             const dataForGPT = JSON.stringify(results, null, 2)
 
             // Create a prompt for GPT-4
-            const prompt = `Based on the following search results, please provide a concise and informative answer to the user's query "${content}":
+            const prompt = `Based on the following search results, please provide a concise and informative answer to the user's question "${content}":
 
             ${dataForGPT}
 

diff --git a/screenpipe-audio/Cargo.toml b/screenpipe-audio/Cargo.toml
@@ -39,7 +39,8 @@ base64 = "0.21.7"
 anyhow = "1.0.86"
 byteorder = "1.5.0"
 hf-hub = "0.3.2"
-symphonia = { version = "0.5.4", features = ["mp3"] }
+# https://github.com/pdeljanov/Symphonia/tree/master?tab=readme-ov-file#optimizations 
+symphonia = { version = "0.5.4", features = ["aac", "isomp4", "opt-simd"] }
 rand = "0.8.5"
 rubato = "0.15.0"
 

diff --git a/screenpipe-audio/benches/pcm_decode_benchmark.rs b/screenpipe-audio/benches/pcm_decode_benchmark.rs
@@ -7,7 +7,7 @@ fn benchmark_pcm_decode(c: &mut Criterion) {
     // Assuming you have a sample audio file in your project for testing
     let test_file_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
         .join("test_data")
-        .join("selah.mp3");
+        .join("selah.mp4");
 
     c.bench_function("pcm_decode", |b| {
         b.iter(|| {

diff --git a/screenpipe-audio/benches/stt_benchmark.rs b/screenpipe-audio/benches/stt_benchmark.rs
@@ -7,7 +7,7 @@ use std::path::PathBuf;
 fn benchmark_stt(c: &mut Criterion) {
     let test_file_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
         .join("test_data")
-        .join("selah.mp3");
+        .join("selah.mp4");
 
     // Initialize WhisperModel outside the benchmark loop
     let whisper_model = WhisperModel::new().expect("Failed to initialize WhisperModel");

diff --git a/screenpipe-audio/src/bin/screenpipe-audio.rs b/screenpipe-audio/src/bin/screenpipe-audio.rs
@@ -68,12 +68,12 @@ async fn main() -> Result<()> {
         return Err(anyhow!("No audio input devices found"));
     }
 
-    // delete .mp3 files (output*.mp3)
-    std::fs::remove_file("output_0.mp3").unwrap_or_default();
-    std::fs::remove_file("output_1.mp3").unwrap_or_default();
+    // delete .mp4 files (output*.mp4)
+    std::fs::remove_file("output_0.mp4").unwrap_or_default();
+    std::fs::remove_file("output_1.mp4").unwrap_or_default();
 
     let chunk_duration = Duration::from_secs(5);
-    let output_path = PathBuf::from("output.mp3");
+    let output_path = PathBuf::from("output.mp4");
     let (whisper_sender, mut whisper_receiver) = create_whisper_channel().await?;
     // Spawn threads for each device
     let recording_threads: Vec<_> = devices
@@ -82,7 +82,7 @@ async fn main() -> Result<()> {
         .map(|(i, device)| {
             let device = Arc::new(device);
             let whisper_sender = whisper_sender.clone();
-            let output_path = output_path.with_file_name(format!("output_{}.mp3", i));
+            let output_path = output_path.with_file_name(format!("output_{}.mp4", i));
             let device_control = Arc::new(AtomicBool::new(true));
             let device_clone = Arc::clone(&device);
 

diff --git a/screenpipe-audio/src/core.rs b/screenpipe-audio/src/core.rs
@@ -134,29 +134,31 @@ async fn run_ffmpeg(
             "-i",
             "pipe:0",
             "-c:a",
-            "libmp3lame",
+            "aac",
             "-b:a",
             "128k",
             "-f",
-            "mp3",
+            "mp4",
             output_path.to_str().unwrap(),
         ])
         .stdin(Stdio::piped())
         .stdout(Stdio::piped())
         .stderr(Stdio::piped());
 
+    // ! tmp hack shouldnt be needed
     // Explicitly set the library paths for the FFmpeg command
-    if let Ok(ld_library_path) = std::env::var("LD_LIBRARY_PATH") {
-        command.env("LD_LIBRARY_PATH", ld_library_path);
-    }
-    #[cfg(target_os = "macos")]
-    if let Ok(dyld_library_path) = std::env::var("DYLD_LIBRARY_PATH") {
-        command.env("DYLD_LIBRARY_PATH", dyld_library_path);
-    }
+    // if let Ok(ld_library_path) = std::env::var("LD_LIBRARY_PATH") {
+    //     command.env("LD_LIBRARY_PATH", ld_library_path);
+    // }
+    // #[cfg(target_os = "macos")]
+    // if let Ok(dyld_library_path) = std::env::var("DYLD_LIBRARY_PATH") {
+    //     command.env("DYLD_LIBRARY_PATH", dyld_library_path);
+    // }
 
     debug!("FFmpeg command: {:?}", command);
 
-    let mut ffmpeg: tokio::process::Child = command.spawn().expect("Failed to spawn FFmpeg process");
+    let mut ffmpeg: tokio::process::Child =
+        command.spawn().expect("Failed to spawn FFmpeg process");
     debug!("FFmpeg process spawned");
     let mut stdin = ffmpeg.stdin.take().expect("Failed to open stdin");
     let start_time = std::time::Instant::now();

diff --git a/screenpipe-audio/test_data/selah.mp4 b/screenpipe-audio/test_data/selah.mp4
diff --git a/screenpipe-audio/tests/core_tests.rs b/screenpipe-audio/tests/core_tests.rs
@@ -48,7 +48,7 @@ mod tests {
         let start = std::time::Instant::now();
         let whisper_model = WhisperModel::new().unwrap();
 
-        let text = stt("./test_data/selah.mp3", &whisper_model).unwrap();
+        let text = stt("./test_data/selah.mp4", &whisper_model).unwrap();
         let duration = start.elapsed();
 
         println!("Speech to text completed in {:?}", duration);
@@ -66,7 +66,7 @@ mod tests {
         let device_spec = Arc::new(default_output_device().unwrap());
         let duration = Duration::from_secs(30); // Record for 3 seconds
         let time = Utc::now().timestamp_millis();
-        let output_path = PathBuf::from(format!("test_output_{}.mp3", time));
+        let output_path = PathBuf::from(format!("test_output_{}.mp4", time));
         let (sender, mut receiver) = unbounded_channel();
         let is_running = Arc::new(AtomicBool::new(true));
 
@@ -122,7 +122,7 @@ mod tests {
         let device_spec = Arc::new(default_output_device().unwrap());
         let duration = Duration::from_secs(30);
         let time = Utc::now().timestamp_millis();
-        let output_path = PathBuf::from(format!("test_output_interrupt_{}.mp3", time));
+        let output_path = PathBuf::from(format!("test_output_interrupt_{}.mp4", time));
         let (sender, mut receiver) = unbounded_channel();
         let is_running = Arc::new(AtomicBool::new(true));
         let is_running_clone = Arc::clone(&is_running);
@@ -221,7 +221,7 @@ mod tests {
         // Setup
         let device_spec = Arc::new(default_output_device().unwrap());
         let output_path =
-            PathBuf::from(format!("test_output_{}.mp3", Utc::now().timestamp_millis()));
+            PathBuf::from(format!("test_output_{}.mp4", Utc::now().timestamp_millis()));
         let output_path_2 = output_path.clone();
         let (whisper_sender, mut whisper_receiver) = create_whisper_channel().await.unwrap();
         let is_running = Arc::new(AtomicBool::new(true));