Skip to content

Commit

Permalink
Merge pull request #381 from tattle-made/development
Browse files Browse the repository at this point in the history
merge dev to main
  • Loading branch information
aatmanvaidya authored Sep 12, 2024
2 parents 2b9275a + cf64f4d commit 48bfc87
Show file tree
Hide file tree
Showing 39 changed files with 5,219 additions and 987 deletions.
8 changes: 0 additions & 8 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -1,13 +1,5 @@
version: 2
updates:
# Enable version updates for npm
- package-ecosystem: "npm"
# Look for `package.json` and `lock` files in the `root` directory
directory: "/docs"
# Check the npm registry for updates every day (weekdays)
schedule:
interval: "weekly"

# Enable version updates for Docker
- package-ecosystem: "docker"
# Look for a `Dockerfile` in the `root` directory
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pr-security.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ jobs:
output: 'trivy-results.sarif'
limit-severities-for-sarif: true
severity: 'HIGH,CRITICAL'
scanners: 'vuln,config,secret'
scanners: 'vuln,misconfig,secret'
skip-dirs: '.vscode,docs'
exit-code: '1'
- name: Upload Trivy scan results to GitHub Security tab
Expand Down
3 changes: 1 addition & 2 deletions src/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@ ENV PATH="/usr/app/venv/bin:$PATH"
COPY --chown=python:python base_requirements.txt /usr/app/base_requirements.txt
RUN pip install --no-cache-dir --require-hashes --no-deps -r /usr/app/base_requirements.txt

RUN apt-get update && apt-get -y upgrade && apt-get install -y --no-install-recommends vim curl
RUN apt-get install -y --no-install-recommends ffmpeg
RUN apt-get update --fix-missing && apt-get -y upgrade && apt-get install -y --no-install-recommends vim curl ffmpeg
# RUN apt-get update && \
# apt-get -y upgrade && \
# apt-get install -y tesseract-ocr tesseract-ocr-hin
Expand Down
2 changes: 1 addition & 1 deletion src/base_requirements.in
Original file line number Diff line number Diff line change
@@ -1 +1 @@
pip==24.0
pip==24.2
8 changes: 4 additions & 4 deletions src/base_requirements.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
#
# This file is autogenerated by pip-compile with Python 3.11
# This file is autogenerated by pip-compile with Python 3.10
# by the following command:
#
# pip-compile --allow-unsafe --generate-hashes base_requirements.in
#

# The following packages are considered to be unsafe in a requirements file:
pip==24.0 \
--hash=sha256:ba0d021a166865d2265246961bec0152ff124de910c5cc39f1156ce3fa7c69dc \
--hash=sha256:ea9bd1a847e8c5774a5777bb398c19e80bcd4e2aa16a4b301b718fe6f593aba2
pip==24.2 \
--hash=sha256:2cd581cf58ab7fcfca4ce8efa6dcacd0de5bf8d0a3eb9ec927e07405f4d9e2a2 \
--hash=sha256:5b5e490b5e9cb275c879595064adce9ebd31b854e3e803740b72f9ccf34a45b8
# via -r base_requirements.in
14 changes: 7 additions & 7 deletions src/core/models/media_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def make_from_url(video_url):
try:
print("Downloading video from URL")
wget.download(video_url, out=file_path)
print("Video downloaded")
print("\nVideo downloaded")
except Exception as e:
print("Error downloading video:", e)
raise Exception("Error Downloading Video")
Expand All @@ -95,7 +95,7 @@ def make_from_url(video_url):
try:
print("Downloading video from S3")
AWSS3Utils.download_file_from_s3(bucket_name, file_key, file_path)
print("Video downloaded")
print("\nVideo downloaded")
except Exception as e:
print("Error downloading video from S3:", e)
raise Exception("Error Downloading Video")
Expand Down Expand Up @@ -126,7 +126,7 @@ def make_from_url(audio_url):
try:
print("Downloading audio from URL")
wget.download(audio_url, out=file_path)
print("Audio downloaded")
print("\nAudio downloaded")
except Exception as e:
print("Error downloading audio:", e)
raise Exception("Error Downloading audio")
Expand All @@ -138,13 +138,13 @@ def make_from_url(audio_url):
try:
print("Downloading audio from S3")
AWSS3Utils.download_file_from_s3(bucket_name, file_key, file_path)
print("Audio downloaded")
print("\nAudio downloaded")
except Exception as e:
print("Error downloading audio from S3:", e)
raise Exception("Error Downloading audio")

return {"path": file_path}

@staticmethod
def make_from_url_to_wav(audio_url):
temp_dir = tempfile.gettempdir()
Expand All @@ -156,7 +156,7 @@ def make_from_url_to_wav(audio_url):
print("Downloading audio from URL")
wget.download(audio_url, out=audio_file)
print("\naudio downloaded")

_, file_extension = os.path.splitext(file_name)
if file_extension != '.wav':
audio = AudioSegment.from_file(audio_file, format=file_extension[1:])
Expand All @@ -172,7 +172,7 @@ def make_from_url_to_wav(audio_url):
@staticmethod
def make_from_file_on_disk(audio_path):
return {"path": audio_path}



media_factory = {
Expand Down
68 changes: 68 additions & 0 deletions src/core/operators/audio_vec_embedding_clap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
"""
Operator to get audio representation using LAION-CLAP - https://huggingface.co/laion/larger_clap_general
"""

def initialize(param):
"""
Initializes the operator.
Args:
param (dict): A dict to initialize and load the model.
"""
global model, processor, librosa, contextmanager, os, torch, device

import librosa
from contextlib import contextmanager
import os
from transformers import ClapModel, ClapProcessor
import torch

# Load the model and processor
model = ClapModel.from_pretrained("laion/larger_clap_general")
processor = ClapProcessor.from_pretrained("laion/larger_clap_general")

# Set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

print("audio CLAP Model successfully initialized and loaded onto", device)


def run(audio_file):
"""
Runs the operator and computes inference on the audio file.
Args:
audio_file (dict): `AudioFactory` file object.
Returns:
audio_emb (list): A 512-length vector embedding representing the audio.
"""
audio = audio_file["path"]

@contextmanager
def audio_load(fname):
"""
Loads audio and removes the file after use.
Args:
fname (str): Path to the audio file.
Yields:
numpy.ndarray: Loaded audio data.
"""
a, _ = librosa.load(fname, sr=48000)
try:
yield a
finally:
os.remove(fname)

with audio_load(audio) as audio_var:
inputs = processor(audios=audio_var, return_tensors="pt")
inputs = {k: v.to(device) for k, v in inputs.items()}
with torch.no_grad():
audio_emb = model.get_audio_features(**inputs)
audio_emb = audio_emb.squeeze(0).tolist()
return audio_emb
3 changes: 3 additions & 0 deletions src/core/operators/audio_vec_embedding_clap_requirements.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
librosa==0.10.2.post1
transformers==4.44.0
torch==2.4.0
Loading

0 comments on commit 48bfc87

Please sign in to comment.