Merge pull request #381 from tattle-made/development

merge dev to main
tattle-made · Sep 12, 2024 · 48bfc87 · 48bfc87
2 parents 2b9275a + cf64f4d
commit 48bfc87
Show file tree

Hide file tree

Showing 39 changed files with 5,219 additions and 987 deletions.
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
@@ -1,13 +1,5 @@
 version: 2
 updates:
-  # Enable version updates for npm
-  - package-ecosystem: "npm"
-    # Look for `package.json` and `lock` files in the `root` directory
-    directory: "/docs"
-    # Check the npm registry for updates every day (weekdays)
-    schedule:
-      interval: "weekly"
-
   # Enable version updates for Docker
   - package-ecosystem: "docker"
     # Look for a `Dockerfile` in the `root` directory

diff --git a/.github/workflows/pr-security.yml b/.github/workflows/pr-security.yml
@@ -88,7 +88,7 @@ jobs:
           output: 'trivy-results.sarif'
           limit-severities-for-sarif: true
           severity: 'HIGH,CRITICAL'
-          scanners: 'vuln,config,secret'
+          scanners: 'vuln,misconfig,secret'
           skip-dirs: '.vscode,docs'
           exit-code: '1'
       - name: Upload Trivy scan results to GitHub Security tab

diff --git a/src/Dockerfile b/src/Dockerfile
@@ -30,8 +30,7 @@ ENV PATH="/usr/app/venv/bin:$PATH"
 COPY --chown=python:python base_requirements.txt /usr/app/base_requirements.txt
 RUN pip install --no-cache-dir --require-hashes --no-deps -r /usr/app/base_requirements.txt
 
-RUN apt-get update && apt-get -y upgrade && apt-get install -y --no-install-recommends vim curl
-RUN apt-get install -y --no-install-recommends ffmpeg
+RUN apt-get update --fix-missing && apt-get -y upgrade && apt-get install -y --no-install-recommends vim curl ffmpeg
 # RUN apt-get update && \
 #     apt-get -y upgrade && \
 #     apt-get install -y tesseract-ocr tesseract-ocr-hin

diff --git a/src/base_requirements.in b/src/base_requirements.in
@@ -1 +1 @@
-pip==24.0
+pip==24.2
diff --git a/src/base_requirements.txt b/src/base_requirements.txt
@@ -1,12 +1,12 @@
 #
-# This file is autogenerated by pip-compile with Python 3.11
+# This file is autogenerated by pip-compile with Python 3.10
 # by the following command:
 #
 #    pip-compile --allow-unsafe --generate-hashes base_requirements.in
 #
 
 # The following packages are considered to be unsafe in a requirements file:
-pip==24.0 \
-    --hash=sha256:ba0d021a166865d2265246961bec0152ff124de910c5cc39f1156ce3fa7c69dc \
-    --hash=sha256:ea9bd1a847e8c5774a5777bb398c19e80bcd4e2aa16a4b301b718fe6f593aba2
+pip==24.2 \
+    --hash=sha256:2cd581cf58ab7fcfca4ce8efa6dcacd0de5bf8d0a3eb9ec927e07405f4d9e2a2 \
+    --hash=sha256:5b5e490b5e9cb275c879595064adce9ebd31b854e3e803740b72f9ccf34a45b8
     # via -r base_requirements.in
diff --git a/src/core/models/media_factory.py b/src/core/models/media_factory.py
@@ -83,7 +83,7 @@ def make_from_url(video_url):
             try:
                 print("Downloading video from URL")
                 wget.download(video_url, out=file_path)
-                print("Video downloaded")
+                print("\nVideo downloaded")
             except Exception as e:
                 print("Error downloading video:", e)
                 raise Exception("Error Downloading Video")
@@ -95,7 +95,7 @@ def make_from_url(video_url):
             try:
                 print("Downloading video from S3")
                 AWSS3Utils.download_file_from_s3(bucket_name, file_key, file_path)
-                print("Video downloaded")
+                print("\nVideo downloaded")
             except Exception as e:
                 print("Error downloading video from S3:", e)
                 raise Exception("Error Downloading Video")
@@ -126,7 +126,7 @@ def make_from_url(audio_url):
             try:
                 print("Downloading audio from URL")
                 wget.download(audio_url, out=file_path)
-                print("Audio downloaded")
+                print("\nAudio downloaded")
             except Exception as e:
                 print("Error downloading audio:", e)
                 raise Exception("Error Downloading audio")
@@ -138,13 +138,13 @@ def make_from_url(audio_url):
             try:
                 print("Downloading audio from S3")
                 AWSS3Utils.download_file_from_s3(bucket_name, file_key, file_path)
-                print("Audio downloaded")
+                print("\nAudio downloaded")
             except Exception as e:
                 print("Error downloading audio from S3:", e)
                 raise Exception("Error Downloading audio")
 
         return {"path": file_path}
-    
+
     @staticmethod
     def make_from_url_to_wav(audio_url):
         temp_dir = tempfile.gettempdir()
@@ -156,7 +156,7 @@ def make_from_url_to_wav(audio_url):
             print("Downloading audio from URL")
             wget.download(audio_url, out=audio_file)
             print("\naudio downloaded")
-            
+
             _, file_extension = os.path.splitext(file_name)
             if file_extension != '.wav':
                 audio = AudioSegment.from_file(audio_file, format=file_extension[1:])
@@ -172,7 +172,7 @@ def make_from_url_to_wav(audio_url):
     @staticmethod
     def make_from_file_on_disk(audio_path):
         return {"path": audio_path}
-    
+
 
 
 media_factory = {

diff --git a/src/core/operators/audio_vec_embedding_clap.py b/src/core/operators/audio_vec_embedding_clap.py
@@ -0,0 +1,68 @@
+"""
+Operator to get audio representation using LAION-CLAP - https://huggingface.co/laion/larger_clap_general
+"""
+
+def initialize(param):
+    """
+    Initializes the operator.
+    
+    Args: 
+        param (dict): A dict to initialize and load the model.
+    
+    """
+    global model, processor, librosa, contextmanager, os, torch, device
+
+    import librosa
+    from contextlib import contextmanager
+    import os
+    from transformers import ClapModel, ClapProcessor
+    import torch
+
+    # Load the model and processor
+    model = ClapModel.from_pretrained("laion/larger_clap_general")
+    processor = ClapProcessor.from_pretrained("laion/larger_clap_general")
+
+    # Set the device
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model.to(device)
+
+    print("audio CLAP Model successfully initialized and loaded onto", device)
+
+
+def run(audio_file):
+    """
+    Runs the operator and computes inference on the audio file.
+
+    Args:
+        audio_file (dict): `AudioFactory` file object.
+
+    Returns:
+        audio_emb (list): A 512-length vector embedding representing the audio. 
+
+    """
+    audio = audio_file["path"]
+
+    @contextmanager
+    def audio_load(fname):
+        """
+        Loads audio and removes the file after use.
+
+        Args:
+            fname (str): Path to the audio file.
+        
+        Yields:
+            numpy.ndarray: Loaded audio data.
+        """
+        a, _ = librosa.load(fname, sr=48000)
+        try:
+            yield a
+        finally:
+            os.remove(fname)
+
+    with audio_load(audio) as audio_var:
+        inputs = processor(audios=audio_var, return_tensors="pt")
+        inputs = {k: v.to(device) for k, v in inputs.items()} 
+        with torch.no_grad():
+            audio_emb = model.get_audio_features(**inputs)
+        audio_emb = audio_emb.squeeze(0).tolist()
+        return audio_emb
diff --git a/src/core/operators/audio_vec_embedding_clap_requirements.in b/src/core/operators/audio_vec_embedding_clap_requirements.in
@@ -0,0 +1,3 @@
+librosa==0.10.2.post1
+transformers==4.44.0
+torch==2.4.0