From 2a6d3270a491a37360202eaa28dc0f8dd65d39a7 Mon Sep 17 00:00:00 2001
From: xorsuyash <suyashsrivastavam87@gmail.com>
Date: Wed, 7 Feb 2024 17:42:19 +0530
Subject: [PATCH 1/2] youtube_embeddings added

---
 requirements.txt                              | 156 ++++++++++++++++++
 src/chunking/MPNet/local/model.py             |   2 +-
 src/youtube_embedding/Dockerfile              |  15 ++
 src/youtube_embedding/README.md               |   0
 src/youtube_embedding/__init__.py             |   0
 src/youtube_embedding/api.py                  |  68 ++++++++
 src/youtube_embedding/chunking/__init__.py    |   1 +
 src/youtube_embedding/chunking/transform.py   | 128 ++++++++++++++
 src/youtube_embedding/model.py                |  50 ++++++
 src/youtube_embedding/request.py              |  14 ++
 src/youtube_embedding/requirements.txt        |   8 +
 src/youtube_embedding/scraper/__init__.py     |   1 +
 src/youtube_embedding/scraper/scrape_audio.py |   4 +
 .../scraper/scrape_transcript.py              |  56 +++++++
 14 files changed, 502 insertions(+), 1 deletion(-)
 create mode 100644 requirements.txt
 create mode 100644 src/youtube_embedding/Dockerfile
 create mode 100644 src/youtube_embedding/README.md
 create mode 100644 src/youtube_embedding/__init__.py
 create mode 100644 src/youtube_embedding/api.py
 create mode 100644 src/youtube_embedding/chunking/__init__.py
 create mode 100644 src/youtube_embedding/chunking/transform.py
 create mode 100644 src/youtube_embedding/model.py
 create mode 100644 src/youtube_embedding/request.py
 create mode 100644 src/youtube_embedding/requirements.txt
 create mode 100644 src/youtube_embedding/scraper/__init__.py
 create mode 100644 src/youtube_embedding/scraper/scrape_audio.py
 create mode 100644 src/youtube_embedding/scraper/scrape_transcript.py

diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..29c5767
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,156 @@
+aiofiles==23.2.1
+aiohttp==3.9.1
+aiosignal==1.3.1
+annotated-types==0.6.0
+anyio==4.2.0
+asttokens @ file:///home/conda/feedstock_root/build_artifacts/asttokens_1698341106958/work
+async-timeout==4.0.3
+attrs==23.2.0
+bitarray==2.9.2
+blinker==1.7.0
+catalogue==2.0.10
+certifi==2024.2.2
+charset-normalizer==3.3.2
+click==8.1.7
+cmake==3.28.1
+colbert-ai==0.2.18
+comm @ file:///home/conda/feedstock_root/build_artifacts/comm_1704278392174/work
+dataclasses-json==0.6.4
+datasets==2.16.1
+debugpy @ file:///home/conda/feedstock_root/build_artifacts/debugpy_1695534305529/work
+decorator @ file:///home/conda/feedstock_root/build_artifacts/decorator_1641555617451/work
+Deprecated==1.2.14
+dill==0.3.7
+dirtyjson==1.0.8
+distro==1.9.0
+exceptiongroup @ file:///home/conda/feedstock_root/build_artifacts/exceptiongroup_1704921103267/work
+executing @ file:///home/conda/feedstock_root/build_artifacts/executing_1698579936712/work
+faiss==1.7.4
+faiss-cpu==1.7.4
+faiss-gpu==1.7.2
+filelock==3.13.1
+Flask==3.0.2
+frozenlist==1.4.1
+fsspec==2023.10.0
+git-python==1.0.3
+gitdb==4.0.11
+GitPython==3.1.41
+greenlet==3.0.3
+h11==0.14.0
+h2==4.1.0
+hpack==4.0.0
+httpcore==1.0.2
+httpx==0.26.0
+huggingface-hub==0.20.3
+Hypercorn==0.16.0
+hyperframe==6.0.1
+idna==3.6
+importlib-metadata @ file:///home/conda/feedstock_root/build_artifacts/importlib-metadata_1703269254275/work
+ipykernel @ file:///home/conda/feedstock_root/build_artifacts/ipykernel_1707182759703/work
+ipython @ file:///home/conda/feedstock_root/build_artifacts/ipython_1701831663892/work
+itsdangerous==2.1.2
+jedi @ file:///home/conda/feedstock_root/build_artifacts/jedi_1696326070614/work
+Jinja2==3.1.3
+joblib==1.3.2
+jsonpatch==1.33
+jsonpointer==2.4
+jupyter_client @ file:///home/conda/feedstock_root/build_artifacts/jupyter_client_1699283905679/work
+jupyter_core @ file:///home/conda/feedstock_root/build_artifacts/jupyter_core_1704727023078/work
+langchain==0.1.5
+langchain-community==0.0.17
+langchain-core==0.1.19
+langsmith==0.0.86
+llama-index==0.9.44
+MarkupSafe==2.1.5
+marshmallow==3.20.2
+matplotlib-inline @ file:///home/conda/feedstock_root/build_artifacts/matplotlib-inline_1660814786464/work
+mpmath==1.3.0
+multidict==6.0.5
+multiprocess==0.70.15
+mypy-extensions==1.0.0
+nest_asyncio @ file:///home/conda/feedstock_root/build_artifacts/nest-asyncio_1705850609492/work
+networkx==3.2.1
+ninja==1.11.1.1
+nltk==3.8.1
+numpy @ file:///home/conda/feedstock_root/build_artifacts/numpy_1707225342954/work/dist/numpy-1.26.4-cp39-cp39-linux_x86_64.whl#sha256=c799942b5898f6e6c60264d1663a6469a475290e758c654aeeb78e2596463abd
+nvidia-cublas-cu12==12.1.3.1
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-cuda-nvrtc-cu12==12.1.105
+nvidia-cuda-runtime-cu12==12.1.105
+nvidia-cudnn-cu12==8.9.2.26
+nvidia-cufft-cu12==11.0.2.54
+nvidia-curand-cu12==10.3.2.106
+nvidia-cusolver-cu12==11.4.5.107
+nvidia-cusparse-cu12==12.1.0.106
+nvidia-nccl-cu12==2.19.3
+nvidia-nvjitlink-cu12==12.3.101
+nvidia-nvtx-cu12==12.1.105
+onnx==1.15.0
+openai==1.11.1
+packaging @ file:///home/conda/feedstock_root/build_artifacts/packaging_1696202382185/work
+pandas==2.2.0
+parso @ file:///home/conda/feedstock_root/build_artifacts/parso_1638334955874/work
+pexpect @ file:///home/conda/feedstock_root/build_artifacts/pexpect_1706113125309/work
+pickleshare @ file:///home/conda/feedstock_root/build_artifacts/pickleshare_1602536217715/work
+pillow==10.2.0
+platformdirs @ file:///home/conda/feedstock_root/build_artifacts/platformdirs_1706713388748/work
+priority==2.0.0
+prompt-toolkit @ file:///home/conda/feedstock_root/build_artifacts/prompt-toolkit_1702399386289/work
+protobuf==4.25.2
+psutil @ file:///home/conda/feedstock_root/build_artifacts/psutil_1705722404069/work
+ptyprocess @ file:///home/conda/feedstock_root/build_artifacts/ptyprocess_1609419310487/work/dist/ptyprocess-0.7.0-py2.py3-none-any.whl
+pure-eval @ file:///home/conda/feedstock_root/build_artifacts/pure_eval_1642875951954/work
+pyarrow==15.0.0
+pyarrow-hotfix==0.6
+pydantic==2.6.1
+pydantic_core==2.16.2
+Pygments @ file:///home/conda/feedstock_root/build_artifacts/pygments_1700607939962/work
+python-dateutil @ file:///home/conda/feedstock_root/build_artifacts/python-dateutil_1626286286081/work
+python-dotenv==1.0.1
+pytz==2024.1
+PyYAML==6.0.1
+pyzmq @ file:///home/conda/feedstock_root/build_artifacts/pyzmq_1701783166972/work
+Quart==0.19.4
+RAGatouille==0.0.6b5
+regex==2023.12.25
+requests==2.31.0
+ruff==0.1.15
+safetensors==0.4.2
+scikit-learn==1.4.0
+scipy==1.12.0
+sentence-transformers==2.3.1
+sentencepiece==0.1.99
+six @ file:///home/conda/feedstock_root/build_artifacts/six_1620240208055/work
+smmap==5.0.1
+sniffio==1.3.0
+SQLAlchemy==2.0.25
+srsly==2.4.8
+stack-data @ file:///home/conda/feedstock_root/build_artifacts/stack_data_1669632077133/work
+sympy==1.12
+taskgroup==0.0.0a4
+tenacity==8.2.3
+threadpoolctl==3.2.0
+tiktoken==0.5.2
+tokenizers==0.15.1
+tomli==2.0.1
+torch==2.2.0
+tornado @ file:///home/conda/feedstock_root/build_artifacts/tornado_1695373443748/work
+tqdm==4.66.1
+traitlets @ file:///home/conda/feedstock_root/build_artifacts/traitlets_1704212992681/work
+transformers==4.37.2
+triton==2.2.0
+typing-inspect==0.9.0
+typing_extensions @ file:///home/conda/feedstock_root/build_artifacts/typing_extensions_1702176139754/work
+tzdata==2023.4
+ujson==5.9.0
+urllib3==2.2.0
+voyager==2.0.2
+wcwidth @ file:///home/conda/feedstock_root/build_artifacts/wcwidth_1704731205417/work
+Werkzeug==3.0.1
+wrapt==1.16.0
+wsproto==1.2.0
+xxhash==3.4.1
+yarl==1.9.4
+youtube-dl==2021.12.17
+youtube-transcript-api==0.6.2
+zipp @ file:///home/conda/feedstock_root/build_artifacts/zipp_1695255097490/work
diff --git a/src/chunking/MPNet/local/model.py b/src/chunking/MPNet/local/model.py
index a5faa1a..b6eada0 100644
--- a/src/chunking/MPNet/local/model.py
+++ b/src/chunking/MPNet/local/model.py
@@ -158,4 +158,4 @@ async def inference(self, request: ModelRequest):
 
         # Properly escape the CSV string
         
-        return csv_string
+        return csv_string
\ No newline at end of file
diff --git a/src/youtube_embedding/Dockerfile b/src/youtube_embedding/Dockerfile
new file mode 100644
index 0000000..dfb36ba
--- /dev/null
+++ b/src/youtube_embedding/Dockerfile
@@ -0,0 +1,15 @@
+
+FROM python:3.9-slim
+
+WORKDIR /app
+
+
+#install requirements
+COPY requirements.txt requirements.txt
+RUN pip3 install -r requirements.txt
+
+# Copy the rest of the application code to the working directory
+COPY . /app/
+EXPOSE 8000
+# Set the entrypoint for the container
+CMD ["hypercorn", "--bind", "0.0.0.0:8000", "api:app"]
diff --git a/src/youtube_embedding/README.md b/src/youtube_embedding/README.md
new file mode 100644
index 0000000..e69de29
diff --git a/src/youtube_embedding/__init__.py b/src/youtube_embedding/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/youtube_embedding/api.py b/src/youtube_embedding/api.py
new file mode 100644
index 0000000..c3c560b
--- /dev/null
+++ b/src/youtube_embedding/api.py
@@ -0,0 +1,68 @@
+from quart import Quart, request, jsonify
+from scraper import transcript  
+import aiohttp
+import io 
+from model import Model
+from request import ModelRequest
+from chunking import TranscriptChunker
+import json
+
+app = Quart(__name__)
+
+
+model = None
+
+@app.before_serving
+async def startup():
+    app.client = aiohttp.ClientSession()
+    global model
+    model = Model(app)
+
+
+transcript_data_store = {}
+@app.route('/get_transcript', methods=['POST'])
+async def get_transcript():
+    data = await request.get_json()
+
+    if 'url' not in data:
+        return jsonify({'error': 'URL is required'}), 400
+
+    url = data['url']
+    transcript_path, transcript_content = transcript(url)
+
+    transcript_data_store[url] = {
+        'transcript_path': transcript_path,
+        'transcript_data': transcript_content
+    }
+    return jsonify({
+        'transcript_path': transcript_path,
+        'transcript_data': transcript_content
+    })
+
+
+@app.route('/Query',methods=['POST'])
+async def query():
+    global model
+    data= await request.get_json()
+
+    
+    if 'url' not in data or 'query' not in data:
+        return jsonify({'error': 'URL and query are required'}), 400
+    
+    url=data['url']
+    user_query = data['query']
+
+    if url not in transcript_data_store:
+        transcript_path, transcript_content = transcript(url)
+        transcript_data_store[url] = {
+        'transcript_path': transcript_path,
+        'transcript_data': transcript_content
+        }
+    else:
+        req=ModelRequest(data,transcript_data_store)
+        response=await model.inference(req)
+
+    return jsonify({
+        'search_results':response
+    })
+
diff --git a/src/youtube_embedding/chunking/__init__.py b/src/youtube_embedding/chunking/__init__.py
new file mode 100644
index 0000000..020e1d6
--- /dev/null
+++ b/src/youtube_embedding/chunking/__init__.py
@@ -0,0 +1 @@
+from chunking.transform import *
\ No newline at end of file
diff --git a/src/youtube_embedding/chunking/transform.py b/src/youtube_embedding/chunking/transform.py
new file mode 100644
index 0000000..d470f24
--- /dev/null
+++ b/src/youtube_embedding/chunking/transform.py
@@ -0,0 +1,128 @@
+"""
+transform transcipted data and devides it into chunks.
+Initially chunking 4mins video frame is implemented, 
+other more optimized algorithms will be implemented in 
+further iterations. 
+"""
+
+import json 
+from typing import List,Dict,Any
+import pandas as pd 
+
+
+class TranscriptChunker:
+    """
+    TranscriptChunker class for processing youtube transcript.
+
+    Attributes:
+        chunk_size_seconds (int): The size of each chunk in seconds.
+    
+    Methods:
+        fit(transcript_path: str) -> None:
+            Reads the transcript data from a JSON file and prepares the class for transformation.
+
+        _transform() -> Dict[str,List[Dict[str,Any]]]:
+            Transforms the transcript data into chunks of specified size.
+
+        chunks() ->Dict[str,List[Dict[str,Any]]] :
+            Returns the resulting chunks .
+
+        metadata() -> Dict[str,Dict[str,Any]]:
+            Returns metadata about the chunks, such as the number of chunks and their durations.
+    
+    
+        Example:
+            chunker = TranscriptChunker(chunk_size_seconds=240)
+            chunker.fit('transcript.json')
+            chunks_df = chunker.chunks()
+            metadata = chunker.metadata()
+    """
+
+    def __init__(self,chunk_size_seconds:int=240)->None:
+        self.chunk_size_seconds:int=chunk_size_seconds
+        # transcipt will be converted to pandas dataframe for better data manipulation 
+        self.transcript_df:pd.DataFrame = None
+        self.result_chunks:Dict[str,List[Dict[str,Any]]] = None
+        
+        
+    def fit(self,transcript_path:str)->None:
+        with open(transcript_path,'r') as file:
+            transcript_data=json.load(file)
+        self.transcript_df=pd.DataFrame(transcript_data)
+        self.result_chunks=self._transform() 
+        
+    
+    def _transform(self)->List[Dict[str,Any]]:
+        
+        if self.transcript_df is None:
+            raise ValueError("Transcript data not provided.")
+        
+        current_chunk=[]
+        current_chunk_duration = 0 
+        
+        # Dictionary to store all chunks 
+        self.all_chunks={}
+
+        chunk_counter=1
+
+        for index,row in self.transcript_df.iterrows():
+
+            if current_chunk_duration+row['duration']<=self.chunk_size_seconds:
+
+                current_chunk.append(row.to_dict())
+                current_chunk_duration+=row['duration']
+            else:
+                self.all_chunks[f'chunk{chunk_counter}']=current_chunk
+                current_chunk=[row.to_dict()]
+                current_chunk_duration = row['duration']
+                chunk_counter+=1
+        
+        if current_chunk:
+            self.all_chunks[f'chunk{chunk_counter}'] = current_chunk
+
+        return self.all_chunks
+
+
+    
+    def chunks(self)->Dict[str,List[Dict[str,Any]]]:
+        if self.result_chunks is None:
+            raise ValueError("Call .fit() method first to transform data into chunks")
+        
+        return self.result_chunks  
+         
+   
+    #this method returns meta data about chunks like size of each chunk 
+    #start duration and end duration of chunk  
+    def metadata(self)->Dict[str,Dict[str,Any]]:
+        if self.result_chunks is None:
+            raise ValueError("Call .fit() method first to transform data into chunks")
+        
+        self.meta_dict={}
+        for chunk in self.result_chunks.keys():
+            dict={}
+    
+            #calculating length of chunk(number of words)
+            text=" "
+            for item in self.result_chunks[chunk]:
+                text=text+" "+item['text']
+                chunk_length=len(text.split())
+                dict['chunk_length']=chunk_length
+
+            #calculating duration of each chunk in minutes 
+                start_time=self.result_chunks[chunk][0]['start']
+                length=len(self.result_chunks[chunk])
+                end_time=self.result_chunks[chunk][length-1]['start']+self.result_chunks[chunk][length-1]['duration']
+
+                dict['start_time']=round((start_time)/60,2)
+                dict['end_time']=round((end_time)/60,2)
+                self.meta_dict[chunk]=dict
+
+        
+        return self.meta_dict
+            
+
+if __name__=='__main__':
+
+    chunks=TranscriptChunker()
+    chunks.fit('/home/suyash/samagra/ai-tools/src/youtube_embedding/scraper/transcript.json')
+    print(chunks.metadata())
diff --git a/src/youtube_embedding/model.py b/src/youtube_embedding/model.py
new file mode 100644
index 0000000..fd73ea8
--- /dev/null
+++ b/src/youtube_embedding/model.py
@@ -0,0 +1,50 @@
+from request import Modelrequest
+from ragatouille import RAGPretrainedModel
+from chunking import TranscriptChunker
+
+
+
+class Model():
+    def __new__(cls,context):
+        cls.context=context 
+        if not hasattr(cls,'instance'):
+            cls.instance= super(Model,cls).__new__(cls)
+        model_name="colbert-ir/colbertv2.0"
+        cls.model=RAGPretrainedModel.from_pretrained(model_name)
+            
+        return cls.instance 
+
+    async def inference(self,request:Modelrequest):
+
+        url=request.url 
+        query=request.query
+        trasncript_data=request.transcript_data
+        transcript_path=request.transcript_path 
+        
+        #chunking 
+        chunker=TranscriptChunker()
+        chunker.fit(transcript_path)
+        chunked_data=chunker.chunks()
+        chunked_meta_data=chunker.metadata()
+
+        #embeddings and index creation
+        RAG_DICT={}
+        for chunks in chunked_data.keys():
+            text_data=" "
+            for data in chunked_data[chunks]:
+                text_data=text_data+" "+data['text']
+
+            RAG_DICT[chunks]=text_data
+
+        RAG_DATA=[]
+        for chunks in RAG_DATA.keys():
+            RAG_DATA.append(RAG_DATA[chunks])
+
+        index_path=self.model.index(index_name="my-index",collection=RAG_DATA)
+
+        #query 
+        RAG=RAGPretrainedModel.from_index(index_path)
+        response=RAG.search(query)
+
+
+        return response
\ No newline at end of file
diff --git a/src/youtube_embedding/request.py b/src/youtube_embedding/request.py
new file mode 100644
index 0000000..43e1391
--- /dev/null
+++ b/src/youtube_embedding/request.py
@@ -0,0 +1,14 @@
+import json 
+
+
+class ModelRequest():
+
+    def __init__(self,data,trasnscript_data_store):
+        self.query=data['query']
+        self.url=data['url']
+        self.transcript_path=trasnscript_data_store[self.url]['transcript_path']
+        self.transcript_data=trasnscript_data_store[self.url]['transcript_data']
+
+
+    def to_json(self):
+        return json.dump(self,default=lambda o:o.__dict__,sort_keys=2,indent=4)
\ No newline at end of file
diff --git a/src/youtube_embedding/requirements.txt b/src/youtube_embedding/requirements.txt
new file mode 100644
index 0000000..e8305fe
--- /dev/null
+++ b/src/youtube_embedding/requirements.txt
@@ -0,0 +1,8 @@
+faiss-cpu==1.7.4
+openai==1.11.1
+pandas==2.2.0
+Quart==0.19.4
+RAGatouille==0.0.6b5
+youtube-dl==2021.12.17
+youtube-transcript-api==0.6.2
+
diff --git a/src/youtube_embedding/scraper/__init__.py b/src/youtube_embedding/scraper/__init__.py
new file mode 100644
index 0000000..e9c2a4a
--- /dev/null
+++ b/src/youtube_embedding/scraper/__init__.py
@@ -0,0 +1 @@
+from scraper.scrape_transcript import * 
\ No newline at end of file
diff --git a/src/youtube_embedding/scraper/scrape_audio.py b/src/youtube_embedding/scraper/scrape_audio.py
new file mode 100644
index 0000000..c4a0b88
--- /dev/null
+++ b/src/youtube_embedding/scraper/scrape_audio.py
@@ -0,0 +1,4 @@
+"""
+scrapes the audio from the youtube vedio and then use it 
+for transcription.  
+"""
diff --git a/src/youtube_embedding/scraper/scrape_transcript.py b/src/youtube_embedding/scraper/scrape_transcript.py
new file mode 100644
index 0000000..55b3091
--- /dev/null
+++ b/src/youtube_embedding/scraper/scrape_transcript.py
@@ -0,0 +1,56 @@
+"""
+scrape transcript of youtube vedios along with time frames.
+"""
+from youtube_transcript_api import YouTubeTranscriptApi
+import json 
+import re
+import os 
+
+
+def vid_id(Url:str)->str:
+    """
+    retrieves vedio id from url. 
+    args:
+        Url:url of the vedio in the form of string. 
+    returns: 
+        vedio_id:returns vedio_id os the url.  
+    """
+    try:
+        match = re.search(r'(?<=v=)[^&]+', Url)
+        video_id = match.group(0) if match else None
+    except AttributeError:
+        print("Video ID not foud in URL.")
+    
+    return video_id
+
+def transcript(Url:str)->str:
+    """
+    retrieves the transcript from the youtube video along with timeframe 
+    and stores it in json file
+    args:
+        Url:Url of the vedio.
+    returns: 
+        output_path:returns absolute path of transcript. 
+    """
+    output_file='transcript.json'
+    v_id=vid_id(Url)
+
+    try:
+        transcript = YouTubeTranscriptApi.get_transcript(v_id)
+        with open('transcript.json', 'w') as f:
+            json.dump(transcript, f)
+        
+        print("Transcript successfully saved to transcript.json")
+    
+    except Exception as e:
+         print(f"An error occurred: {e}")
+    
+    absolute_path=os.path.abspath(output_file)
+    return absolute_path,transcript
+
+
+
+
+if __name__=='__main__':
+    
+    pass 
\ No newline at end of file

From a7b29ab49bae46853dca45c517b4f4a1824a22ef Mon Sep 17 00:00:00 2001
From: xorsuyash <suyashsrivastavam87@gmail.com>
Date: Wed, 7 Feb 2024 17:57:24 +0530
Subject: [PATCH 2/2] corrected last commit

---
 requirements.txt | 156 -----------------------------------------------
 1 file changed, 156 deletions(-)
 delete mode 100644 requirements.txt

diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 29c5767..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1,156 +0,0 @@
-aiofiles==23.2.1
-aiohttp==3.9.1
-aiosignal==1.3.1
-annotated-types==0.6.0
-anyio==4.2.0
-asttokens @ file:///home/conda/feedstock_root/build_artifacts/asttokens_1698341106958/work
-async-timeout==4.0.3
-attrs==23.2.0
-bitarray==2.9.2
-blinker==1.7.0
-catalogue==2.0.10
-certifi==2024.2.2
-charset-normalizer==3.3.2
-click==8.1.7
-cmake==3.28.1
-colbert-ai==0.2.18
-comm @ file:///home/conda/feedstock_root/build_artifacts/comm_1704278392174/work
-dataclasses-json==0.6.4
-datasets==2.16.1
-debugpy @ file:///home/conda/feedstock_root/build_artifacts/debugpy_1695534305529/work
-decorator @ file:///home/conda/feedstock_root/build_artifacts/decorator_1641555617451/work
-Deprecated==1.2.14
-dill==0.3.7
-dirtyjson==1.0.8
-distro==1.9.0
-exceptiongroup @ file:///home/conda/feedstock_root/build_artifacts/exceptiongroup_1704921103267/work
-executing @ file:///home/conda/feedstock_root/build_artifacts/executing_1698579936712/work
-faiss==1.7.4
-faiss-cpu==1.7.4
-faiss-gpu==1.7.2
-filelock==3.13.1
-Flask==3.0.2
-frozenlist==1.4.1
-fsspec==2023.10.0
-git-python==1.0.3
-gitdb==4.0.11
-GitPython==3.1.41
-greenlet==3.0.3
-h11==0.14.0
-h2==4.1.0
-hpack==4.0.0
-httpcore==1.0.2
-httpx==0.26.0
-huggingface-hub==0.20.3
-Hypercorn==0.16.0
-hyperframe==6.0.1
-idna==3.6
-importlib-metadata @ file:///home/conda/feedstock_root/build_artifacts/importlib-metadata_1703269254275/work
-ipykernel @ file:///home/conda/feedstock_root/build_artifacts/ipykernel_1707182759703/work
-ipython @ file:///home/conda/feedstock_root/build_artifacts/ipython_1701831663892/work
-itsdangerous==2.1.2
-jedi @ file:///home/conda/feedstock_root/build_artifacts/jedi_1696326070614/work
-Jinja2==3.1.3
-joblib==1.3.2
-jsonpatch==1.33
-jsonpointer==2.4
-jupyter_client @ file:///home/conda/feedstock_root/build_artifacts/jupyter_client_1699283905679/work
-jupyter_core @ file:///home/conda/feedstock_root/build_artifacts/jupyter_core_1704727023078/work
-langchain==0.1.5
-langchain-community==0.0.17
-langchain-core==0.1.19
-langsmith==0.0.86
-llama-index==0.9.44
-MarkupSafe==2.1.5
-marshmallow==3.20.2
-matplotlib-inline @ file:///home/conda/feedstock_root/build_artifacts/matplotlib-inline_1660814786464/work
-mpmath==1.3.0
-multidict==6.0.5
-multiprocess==0.70.15
-mypy-extensions==1.0.0
-nest_asyncio @ file:///home/conda/feedstock_root/build_artifacts/nest-asyncio_1705850609492/work
-networkx==3.2.1
-ninja==1.11.1.1
-nltk==3.8.1
-numpy @ file:///home/conda/feedstock_root/build_artifacts/numpy_1707225342954/work/dist/numpy-1.26.4-cp39-cp39-linux_x86_64.whl#sha256=c799942b5898f6e6c60264d1663a6469a475290e758c654aeeb78e2596463abd
-nvidia-cublas-cu12==12.1.3.1
-nvidia-cuda-cupti-cu12==12.1.105
-nvidia-cuda-nvrtc-cu12==12.1.105
-nvidia-cuda-runtime-cu12==12.1.105
-nvidia-cudnn-cu12==8.9.2.26
-nvidia-cufft-cu12==11.0.2.54
-nvidia-curand-cu12==10.3.2.106
-nvidia-cusolver-cu12==11.4.5.107
-nvidia-cusparse-cu12==12.1.0.106
-nvidia-nccl-cu12==2.19.3
-nvidia-nvjitlink-cu12==12.3.101
-nvidia-nvtx-cu12==12.1.105
-onnx==1.15.0
-openai==1.11.1
-packaging @ file:///home/conda/feedstock_root/build_artifacts/packaging_1696202382185/work
-pandas==2.2.0
-parso @ file:///home/conda/feedstock_root/build_artifacts/parso_1638334955874/work
-pexpect @ file:///home/conda/feedstock_root/build_artifacts/pexpect_1706113125309/work
-pickleshare @ file:///home/conda/feedstock_root/build_artifacts/pickleshare_1602536217715/work
-pillow==10.2.0
-platformdirs @ file:///home/conda/feedstock_root/build_artifacts/platformdirs_1706713388748/work
-priority==2.0.0
-prompt-toolkit @ file:///home/conda/feedstock_root/build_artifacts/prompt-toolkit_1702399386289/work
-protobuf==4.25.2
-psutil @ file:///home/conda/feedstock_root/build_artifacts/psutil_1705722404069/work
-ptyprocess @ file:///home/conda/feedstock_root/build_artifacts/ptyprocess_1609419310487/work/dist/ptyprocess-0.7.0-py2.py3-none-any.whl
-pure-eval @ file:///home/conda/feedstock_root/build_artifacts/pure_eval_1642875951954/work
-pyarrow==15.0.0
-pyarrow-hotfix==0.6
-pydantic==2.6.1
-pydantic_core==2.16.2
-Pygments @ file:///home/conda/feedstock_root/build_artifacts/pygments_1700607939962/work
-python-dateutil @ file:///home/conda/feedstock_root/build_artifacts/python-dateutil_1626286286081/work
-python-dotenv==1.0.1
-pytz==2024.1
-PyYAML==6.0.1
-pyzmq @ file:///home/conda/feedstock_root/build_artifacts/pyzmq_1701783166972/work
-Quart==0.19.4
-RAGatouille==0.0.6b5
-regex==2023.12.25
-requests==2.31.0
-ruff==0.1.15
-safetensors==0.4.2
-scikit-learn==1.4.0
-scipy==1.12.0
-sentence-transformers==2.3.1
-sentencepiece==0.1.99
-six @ file:///home/conda/feedstock_root/build_artifacts/six_1620240208055/work
-smmap==5.0.1
-sniffio==1.3.0
-SQLAlchemy==2.0.25
-srsly==2.4.8
-stack-data @ file:///home/conda/feedstock_root/build_artifacts/stack_data_1669632077133/work
-sympy==1.12
-taskgroup==0.0.0a4
-tenacity==8.2.3
-threadpoolctl==3.2.0
-tiktoken==0.5.2
-tokenizers==0.15.1
-tomli==2.0.1
-torch==2.2.0
-tornado @ file:///home/conda/feedstock_root/build_artifacts/tornado_1695373443748/work
-tqdm==4.66.1
-traitlets @ file:///home/conda/feedstock_root/build_artifacts/traitlets_1704212992681/work
-transformers==4.37.2
-triton==2.2.0
-typing-inspect==0.9.0
-typing_extensions @ file:///home/conda/feedstock_root/build_artifacts/typing_extensions_1702176139754/work
-tzdata==2023.4
-ujson==5.9.0
-urllib3==2.2.0
-voyager==2.0.2
-wcwidth @ file:///home/conda/feedstock_root/build_artifacts/wcwidth_1704731205417/work
-Werkzeug==3.0.1
-wrapt==1.16.0
-wsproto==1.2.0
-xxhash==3.4.1
-yarl==1.9.4
-youtube-dl==2021.12.17
-youtube-transcript-api==0.6.2
-zipp @ file:///home/conda/feedstock_root/build_artifacts/zipp_1695255097490/work