From 530b9834b39312893a294b047be9035bd0953cb7 Mon Sep 17 00:00:00 2001 From: priyash7 Date: Mon, 21 Oct 2024 13:17:30 +0530 Subject: [PATCH] removed hardcoded config --- src/core/feluda.py | 9 ++--- src/notebooks/search_similar_videos.ipynb | 42 +++++++++++++++++------ 2 files changed, 37 insertions(+), 14 deletions(-) diff --git a/src/core/feluda.py b/src/core/feluda.py index 9e6880d..4949f15 100644 --- a/src/core/feluda.py +++ b/src/core/feluda.py @@ -23,6 +23,7 @@ def __init__(self, configPath): from core.operators import Operator self.operators = Operator(self.config.operators) + if self.config.store: from core import store @@ -58,13 +59,13 @@ def generator_doc(): def store_video(self,video_url): filename = video_url.split('/')[-1] video = VideoFactory.make_from_url(video_url) - operator = self.operators.get()["vid_vec_rep_clip"] + operator = self.operators.get()[self.config.operators.parameters[0].type] embedding = operator.run(video) if self.store: doc = self.generate_document(filename,embedding) media_type = MediaType.VIDEO - result = self.store['es_vec'].store(media_type,doc) + result = self.store[self.config.store.entities[0].type].store(media_type,doc) return("result:",result) else: raise Exception("Store is not Configured") @@ -72,12 +73,12 @@ def store_video(self,video_url): def search_video(self,video_url): file_name = video_url.split('/')[-1] video = VideoFactory.make_from_url(video_url) - operator = self.operators.get()["vid_vec_rep_clip"] + operator = self.operators.get()[self.config.operators.parameters[0].type] embedding = operator.run(video) average_vector = next(embedding) if self.store: - result = self.store['es_vec'].find("video",average_vector.get("vid_vec")) + result = self.store[self.config.store.entities[0].type].find("video",average_vector.get("vid_vec")) return result else: raise Exception("Store is not Configured") diff --git a/src/notebooks/search_similar_videos.ipynb b/src/notebooks/search_similar_videos.ipynb index 69aafb3..1cf70af 100644 --- a/src/notebooks/search_similar_videos.ipynb +++ b/src/notebooks/search_similar_videos.ipynb @@ -30,14 +30,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[StoreEntity(label='Data Store', type='es_vec', parameters=StoreESParameters(host_name='es', image_index_name='image', text_index_name='text', video_index_name='video', audio_index_name='audio'))]\n", "Installing packages for vid_vec_rep_clip\n" ] }, @@ -46,7 +45,9 @@ "output_type": "stream", "text": [ "/usr/app/venv/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" + " from .autonotebook import tqdm as notebook_tqdm\n", + "/usr/app/venv/lib/python3.11/site-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n", + " warnings.warn(\n" ] } ], @@ -57,7 +58,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -67,7 +68,16 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# self.config.operators.parameters[0].type" + ] + }, + { + "cell_type": "code", + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -75,7 +85,7 @@ "output_type": "stream", "text": [ "Downloading video from URL\n", - "100% [....................................................] 48488 / 48488\n", + "100% [..............................................................................] 48488 / 48488\n", "Video downloaded\n", "----> 6 (2, [])\n" ] @@ -86,7 +96,7 @@ "('result:', {'message': 'multiple media stored'})" ] }, - "execution_count": 9, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -99,7 +109,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -107,7 +117,7 @@ "output_type": "stream", "text": [ "Downloading video from URL\n", - "100% [....................................................] 48488 / 48488\n", + "100% [..............................................................................] 48488 / 48488\n", "Video downloaded\n", "calculation: 1 / (1 + l2norm(params.query_vector, 'vid_vec'))\n" ] @@ -138,10 +148,22 @@ " 'dataset': 'en-speech.mp4',\n", " 'e_kosh_id': '',\n", " 'text': None,\n", + " 'metadata': None},\n", + " {'doc_id': 'UgILrpIBZCwUx_cLPRAI',\n", + " 'dist': 0.9999998,\n", + " 'dataset': 'en-speech.mp4',\n", + " 'e_kosh_id': '',\n", + " 'text': None,\n", + " 'metadata': None},\n", + " {'doc_id': 'UwILrpIBZCwUx_cLPRAI',\n", + " 'dist': 0.9999998,\n", + " 'dataset': 'en-speech.mp4',\n", + " 'e_kosh_id': '',\n", + " 'text': None,\n", " 'metadata': None}]" ] }, - "execution_count": 10, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" }