From 2a6d3270a491a37360202eaa28dc0f8dd65d39a7 Mon Sep 17 00:00:00 2001 From: xorsuyash Date: Wed, 7 Feb 2024 17:42:19 +0530 Subject: [PATCH 1/2] youtube_embeddings added --- requirements.txt | 156 ++++++++++++++++++ src/chunking/MPNet/local/model.py | 2 +- src/youtube_embedding/Dockerfile | 15 ++ src/youtube_embedding/README.md | 0 src/youtube_embedding/__init__.py | 0 src/youtube_embedding/api.py | 68 ++++++++ src/youtube_embedding/chunking/__init__.py | 1 + src/youtube_embedding/chunking/transform.py | 128 ++++++++++++++ src/youtube_embedding/model.py | 50 ++++++ src/youtube_embedding/request.py | 14 ++ src/youtube_embedding/requirements.txt | 8 + src/youtube_embedding/scraper/__init__.py | 1 + src/youtube_embedding/scraper/scrape_audio.py | 4 + .../scraper/scrape_transcript.py | 56 +++++++ 14 files changed, 502 insertions(+), 1 deletion(-) create mode 100644 requirements.txt create mode 100644 src/youtube_embedding/Dockerfile create mode 100644 src/youtube_embedding/README.md create mode 100644 src/youtube_embedding/__init__.py create mode 100644 src/youtube_embedding/api.py create mode 100644 src/youtube_embedding/chunking/__init__.py create mode 100644 src/youtube_embedding/chunking/transform.py create mode 100644 src/youtube_embedding/model.py create mode 100644 src/youtube_embedding/request.py create mode 100644 src/youtube_embedding/requirements.txt create mode 100644 src/youtube_embedding/scraper/__init__.py create mode 100644 src/youtube_embedding/scraper/scrape_audio.py create mode 100644 src/youtube_embedding/scraper/scrape_transcript.py diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..29c5767 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,156 @@ +aiofiles==23.2.1 +aiohttp==3.9.1 +aiosignal==1.3.1 +annotated-types==0.6.0 +anyio==4.2.0 +asttokens @ file:///home/conda/feedstock_root/build_artifacts/asttokens_1698341106958/work +async-timeout==4.0.3 +attrs==23.2.0 +bitarray==2.9.2 +blinker==1.7.0 +catalogue==2.0.10 +certifi==2024.2.2 +charset-normalizer==3.3.2 +click==8.1.7 +cmake==3.28.1 +colbert-ai==0.2.18 +comm @ file:///home/conda/feedstock_root/build_artifacts/comm_1704278392174/work +dataclasses-json==0.6.4 +datasets==2.16.1 +debugpy @ file:///home/conda/feedstock_root/build_artifacts/debugpy_1695534305529/work +decorator @ file:///home/conda/feedstock_root/build_artifacts/decorator_1641555617451/work +Deprecated==1.2.14 +dill==0.3.7 +dirtyjson==1.0.8 +distro==1.9.0 +exceptiongroup @ file:///home/conda/feedstock_root/build_artifacts/exceptiongroup_1704921103267/work +executing @ file:///home/conda/feedstock_root/build_artifacts/executing_1698579936712/work +faiss==1.7.4 +faiss-cpu==1.7.4 +faiss-gpu==1.7.2 +filelock==3.13.1 +Flask==3.0.2 +frozenlist==1.4.1 +fsspec==2023.10.0 +git-python==1.0.3 +gitdb==4.0.11 +GitPython==3.1.41 +greenlet==3.0.3 +h11==0.14.0 +h2==4.1.0 +hpack==4.0.0 +httpcore==1.0.2 +httpx==0.26.0 +huggingface-hub==0.20.3 +Hypercorn==0.16.0 +hyperframe==6.0.1 +idna==3.6 +importlib-metadata @ file:///home/conda/feedstock_root/build_artifacts/importlib-metadata_1703269254275/work +ipykernel @ file:///home/conda/feedstock_root/build_artifacts/ipykernel_1707182759703/work +ipython @ file:///home/conda/feedstock_root/build_artifacts/ipython_1701831663892/work +itsdangerous==2.1.2 +jedi @ file:///home/conda/feedstock_root/build_artifacts/jedi_1696326070614/work +Jinja2==3.1.3 +joblib==1.3.2 +jsonpatch==1.33 +jsonpointer==2.4 +jupyter_client @ file:///home/conda/feedstock_root/build_artifacts/jupyter_client_1699283905679/work +jupyter_core @ file:///home/conda/feedstock_root/build_artifacts/jupyter_core_1704727023078/work +langchain==0.1.5 +langchain-community==0.0.17 +langchain-core==0.1.19 +langsmith==0.0.86 +llama-index==0.9.44 +MarkupSafe==2.1.5 +marshmallow==3.20.2 +matplotlib-inline @ file:///home/conda/feedstock_root/build_artifacts/matplotlib-inline_1660814786464/work +mpmath==1.3.0 +multidict==6.0.5 +multiprocess==0.70.15 +mypy-extensions==1.0.0 +nest_asyncio @ file:///home/conda/feedstock_root/build_artifacts/nest-asyncio_1705850609492/work +networkx==3.2.1 +ninja==1.11.1.1 +nltk==3.8.1 +numpy @ file:///home/conda/feedstock_root/build_artifacts/numpy_1707225342954/work/dist/numpy-1.26.4-cp39-cp39-linux_x86_64.whl#sha256=c799942b5898f6e6c60264d1663a6469a475290e758c654aeeb78e2596463abd +nvidia-cublas-cu12==12.1.3.1 +nvidia-cuda-cupti-cu12==12.1.105 +nvidia-cuda-nvrtc-cu12==12.1.105 +nvidia-cuda-runtime-cu12==12.1.105 +nvidia-cudnn-cu12==8.9.2.26 +nvidia-cufft-cu12==11.0.2.54 +nvidia-curand-cu12==10.3.2.106 +nvidia-cusolver-cu12==11.4.5.107 +nvidia-cusparse-cu12==12.1.0.106 +nvidia-nccl-cu12==2.19.3 +nvidia-nvjitlink-cu12==12.3.101 +nvidia-nvtx-cu12==12.1.105 +onnx==1.15.0 +openai==1.11.1 +packaging @ file:///home/conda/feedstock_root/build_artifacts/packaging_1696202382185/work +pandas==2.2.0 +parso @ file:///home/conda/feedstock_root/build_artifacts/parso_1638334955874/work +pexpect @ file:///home/conda/feedstock_root/build_artifacts/pexpect_1706113125309/work +pickleshare @ file:///home/conda/feedstock_root/build_artifacts/pickleshare_1602536217715/work +pillow==10.2.0 +platformdirs @ file:///home/conda/feedstock_root/build_artifacts/platformdirs_1706713388748/work +priority==2.0.0 +prompt-toolkit @ file:///home/conda/feedstock_root/build_artifacts/prompt-toolkit_1702399386289/work +protobuf==4.25.2 +psutil @ file:///home/conda/feedstock_root/build_artifacts/psutil_1705722404069/work +ptyprocess @ file:///home/conda/feedstock_root/build_artifacts/ptyprocess_1609419310487/work/dist/ptyprocess-0.7.0-py2.py3-none-any.whl +pure-eval @ file:///home/conda/feedstock_root/build_artifacts/pure_eval_1642875951954/work +pyarrow==15.0.0 +pyarrow-hotfix==0.6 +pydantic==2.6.1 +pydantic_core==2.16.2 +Pygments @ file:///home/conda/feedstock_root/build_artifacts/pygments_1700607939962/work +python-dateutil @ file:///home/conda/feedstock_root/build_artifacts/python-dateutil_1626286286081/work +python-dotenv==1.0.1 +pytz==2024.1 +PyYAML==6.0.1 +pyzmq @ file:///home/conda/feedstock_root/build_artifacts/pyzmq_1701783166972/work +Quart==0.19.4 +RAGatouille==0.0.6b5 +regex==2023.12.25 +requests==2.31.0 +ruff==0.1.15 +safetensors==0.4.2 +scikit-learn==1.4.0 +scipy==1.12.0 +sentence-transformers==2.3.1 +sentencepiece==0.1.99 +six @ file:///home/conda/feedstock_root/build_artifacts/six_1620240208055/work +smmap==5.0.1 +sniffio==1.3.0 +SQLAlchemy==2.0.25 +srsly==2.4.8 +stack-data @ file:///home/conda/feedstock_root/build_artifacts/stack_data_1669632077133/work +sympy==1.12 +taskgroup==0.0.0a4 +tenacity==8.2.3 +threadpoolctl==3.2.0 +tiktoken==0.5.2 +tokenizers==0.15.1 +tomli==2.0.1 +torch==2.2.0 +tornado @ file:///home/conda/feedstock_root/build_artifacts/tornado_1695373443748/work +tqdm==4.66.1 +traitlets @ file:///home/conda/feedstock_root/build_artifacts/traitlets_1704212992681/work +transformers==4.37.2 +triton==2.2.0 +typing-inspect==0.9.0 +typing_extensions @ file:///home/conda/feedstock_root/build_artifacts/typing_extensions_1702176139754/work +tzdata==2023.4 +ujson==5.9.0 +urllib3==2.2.0 +voyager==2.0.2 +wcwidth @ file:///home/conda/feedstock_root/build_artifacts/wcwidth_1704731205417/work +Werkzeug==3.0.1 +wrapt==1.16.0 +wsproto==1.2.0 +xxhash==3.4.1 +yarl==1.9.4 +youtube-dl==2021.12.17 +youtube-transcript-api==0.6.2 +zipp @ file:///home/conda/feedstock_root/build_artifacts/zipp_1695255097490/work diff --git a/src/chunking/MPNet/local/model.py b/src/chunking/MPNet/local/model.py index a5faa1a..b6eada0 100644 --- a/src/chunking/MPNet/local/model.py +++ b/src/chunking/MPNet/local/model.py @@ -158,4 +158,4 @@ async def inference(self, request: ModelRequest): # Properly escape the CSV string - return csv_string + return csv_string \ No newline at end of file diff --git a/src/youtube_embedding/Dockerfile b/src/youtube_embedding/Dockerfile new file mode 100644 index 0000000..dfb36ba --- /dev/null +++ b/src/youtube_embedding/Dockerfile @@ -0,0 +1,15 @@ + +FROM python:3.9-slim + +WORKDIR /app + + +#install requirements +COPY requirements.txt requirements.txt +RUN pip3 install -r requirements.txt + +# Copy the rest of the application code to the working directory +COPY . /app/ +EXPOSE 8000 +# Set the entrypoint for the container +CMD ["hypercorn", "--bind", "0.0.0.0:8000", "api:app"] diff --git a/src/youtube_embedding/README.md b/src/youtube_embedding/README.md new file mode 100644 index 0000000..e69de29 diff --git a/src/youtube_embedding/__init__.py b/src/youtube_embedding/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/youtube_embedding/api.py b/src/youtube_embedding/api.py new file mode 100644 index 0000000..c3c560b --- /dev/null +++ b/src/youtube_embedding/api.py @@ -0,0 +1,68 @@ +from quart import Quart, request, jsonify +from scraper import transcript +import aiohttp +import io +from model import Model +from request import ModelRequest +from chunking import TranscriptChunker +import json + +app = Quart(__name__) + + +model = None + +@app.before_serving +async def startup(): + app.client = aiohttp.ClientSession() + global model + model = Model(app) + + +transcript_data_store = {} +@app.route('/get_transcript', methods=['POST']) +async def get_transcript(): + data = await request.get_json() + + if 'url' not in data: + return jsonify({'error': 'URL is required'}), 400 + + url = data['url'] + transcript_path, transcript_content = transcript(url) + + transcript_data_store[url] = { + 'transcript_path': transcript_path, + 'transcript_data': transcript_content + } + return jsonify({ + 'transcript_path': transcript_path, + 'transcript_data': transcript_content + }) + + +@app.route('/Query',methods=['POST']) +async def query(): + global model + data= await request.get_json() + + + if 'url' not in data or 'query' not in data: + return jsonify({'error': 'URL and query are required'}), 400 + + url=data['url'] + user_query = data['query'] + + if url not in transcript_data_store: + transcript_path, transcript_content = transcript(url) + transcript_data_store[url] = { + 'transcript_path': transcript_path, + 'transcript_data': transcript_content + } + else: + req=ModelRequest(data,transcript_data_store) + response=await model.inference(req) + + return jsonify({ + 'search_results':response + }) + diff --git a/src/youtube_embedding/chunking/__init__.py b/src/youtube_embedding/chunking/__init__.py new file mode 100644 index 0000000..020e1d6 --- /dev/null +++ b/src/youtube_embedding/chunking/__init__.py @@ -0,0 +1 @@ +from chunking.transform import * \ No newline at end of file diff --git a/src/youtube_embedding/chunking/transform.py b/src/youtube_embedding/chunking/transform.py new file mode 100644 index 0000000..d470f24 --- /dev/null +++ b/src/youtube_embedding/chunking/transform.py @@ -0,0 +1,128 @@ +""" +transform transcipted data and devides it into chunks. +Initially chunking 4mins video frame is implemented, +other more optimized algorithms will be implemented in +further iterations. +""" + +import json +from typing import List,Dict,Any +import pandas as pd + + +class TranscriptChunker: + """ + TranscriptChunker class for processing youtube transcript. + + Attributes: + chunk_size_seconds (int): The size of each chunk in seconds. + + Methods: + fit(transcript_path: str) -> None: + Reads the transcript data from a JSON file and prepares the class for transformation. + + _transform() -> Dict[str,List[Dict[str,Any]]]: + Transforms the transcript data into chunks of specified size. + + chunks() ->Dict[str,List[Dict[str,Any]]] : + Returns the resulting chunks . + + metadata() -> Dict[str,Dict[str,Any]]: + Returns metadata about the chunks, such as the number of chunks and their durations. + + + Example: + chunker = TranscriptChunker(chunk_size_seconds=240) + chunker.fit('transcript.json') + chunks_df = chunker.chunks() + metadata = chunker.metadata() + """ + + def __init__(self,chunk_size_seconds:int=240)->None: + self.chunk_size_seconds:int=chunk_size_seconds + # transcipt will be converted to pandas dataframe for better data manipulation + self.transcript_df:pd.DataFrame = None + self.result_chunks:Dict[str,List[Dict[str,Any]]] = None + + + def fit(self,transcript_path:str)->None: + with open(transcript_path,'r') as file: + transcript_data=json.load(file) + self.transcript_df=pd.DataFrame(transcript_data) + self.result_chunks=self._transform() + + + def _transform(self)->List[Dict[str,Any]]: + + if self.transcript_df is None: + raise ValueError("Transcript data not provided.") + + current_chunk=[] + current_chunk_duration = 0 + + # Dictionary to store all chunks + self.all_chunks={} + + chunk_counter=1 + + for index,row in self.transcript_df.iterrows(): + + if current_chunk_duration+row['duration']<=self.chunk_size_seconds: + + current_chunk.append(row.to_dict()) + current_chunk_duration+=row['duration'] + else: + self.all_chunks[f'chunk{chunk_counter}']=current_chunk + current_chunk=[row.to_dict()] + current_chunk_duration = row['duration'] + chunk_counter+=1 + + if current_chunk: + self.all_chunks[f'chunk{chunk_counter}'] = current_chunk + + return self.all_chunks + + + + def chunks(self)->Dict[str,List[Dict[str,Any]]]: + if self.result_chunks is None: + raise ValueError("Call .fit() method first to transform data into chunks") + + return self.result_chunks + + + #this method returns meta data about chunks like size of each chunk + #start duration and end duration of chunk + def metadata(self)->Dict[str,Dict[str,Any]]: + if self.result_chunks is None: + raise ValueError("Call .fit() method first to transform data into chunks") + + self.meta_dict={} + for chunk in self.result_chunks.keys(): + dict={} + + #calculating length of chunk(number of words) + text=" " + for item in self.result_chunks[chunk]: + text=text+" "+item['text'] + chunk_length=len(text.split()) + dict['chunk_length']=chunk_length + + #calculating duration of each chunk in minutes + start_time=self.result_chunks[chunk][0]['start'] + length=len(self.result_chunks[chunk]) + end_time=self.result_chunks[chunk][length-1]['start']+self.result_chunks[chunk][length-1]['duration'] + + dict['start_time']=round((start_time)/60,2) + dict['end_time']=round((end_time)/60,2) + self.meta_dict[chunk]=dict + + + return self.meta_dict + + +if __name__=='__main__': + + chunks=TranscriptChunker() + chunks.fit('/home/suyash/samagra/ai-tools/src/youtube_embedding/scraper/transcript.json') + print(chunks.metadata()) diff --git a/src/youtube_embedding/model.py b/src/youtube_embedding/model.py new file mode 100644 index 0000000..fd73ea8 --- /dev/null +++ b/src/youtube_embedding/model.py @@ -0,0 +1,50 @@ +from request import Modelrequest +from ragatouille import RAGPretrainedModel +from chunking import TranscriptChunker + + + +class Model(): + def __new__(cls,context): + cls.context=context + if not hasattr(cls,'instance'): + cls.instance= super(Model,cls).__new__(cls) + model_name="colbert-ir/colbertv2.0" + cls.model=RAGPretrainedModel.from_pretrained(model_name) + + return cls.instance + + async def inference(self,request:Modelrequest): + + url=request.url + query=request.query + trasncript_data=request.transcript_data + transcript_path=request.transcript_path + + #chunking + chunker=TranscriptChunker() + chunker.fit(transcript_path) + chunked_data=chunker.chunks() + chunked_meta_data=chunker.metadata() + + #embeddings and index creation + RAG_DICT={} + for chunks in chunked_data.keys(): + text_data=" " + for data in chunked_data[chunks]: + text_data=text_data+" "+data['text'] + + RAG_DICT[chunks]=text_data + + RAG_DATA=[] + for chunks in RAG_DATA.keys(): + RAG_DATA.append(RAG_DATA[chunks]) + + index_path=self.model.index(index_name="my-index",collection=RAG_DATA) + + #query + RAG=RAGPretrainedModel.from_index(index_path) + response=RAG.search(query) + + + return response \ No newline at end of file diff --git a/src/youtube_embedding/request.py b/src/youtube_embedding/request.py new file mode 100644 index 0000000..43e1391 --- /dev/null +++ b/src/youtube_embedding/request.py @@ -0,0 +1,14 @@ +import json + + +class ModelRequest(): + + def __init__(self,data,trasnscript_data_store): + self.query=data['query'] + self.url=data['url'] + self.transcript_path=trasnscript_data_store[self.url]['transcript_path'] + self.transcript_data=trasnscript_data_store[self.url]['transcript_data'] + + + def to_json(self): + return json.dump(self,default=lambda o:o.__dict__,sort_keys=2,indent=4) \ No newline at end of file diff --git a/src/youtube_embedding/requirements.txt b/src/youtube_embedding/requirements.txt new file mode 100644 index 0000000..e8305fe --- /dev/null +++ b/src/youtube_embedding/requirements.txt @@ -0,0 +1,8 @@ +faiss-cpu==1.7.4 +openai==1.11.1 +pandas==2.2.0 +Quart==0.19.4 +RAGatouille==0.0.6b5 +youtube-dl==2021.12.17 +youtube-transcript-api==0.6.2 + diff --git a/src/youtube_embedding/scraper/__init__.py b/src/youtube_embedding/scraper/__init__.py new file mode 100644 index 0000000..e9c2a4a --- /dev/null +++ b/src/youtube_embedding/scraper/__init__.py @@ -0,0 +1 @@ +from scraper.scrape_transcript import * \ No newline at end of file diff --git a/src/youtube_embedding/scraper/scrape_audio.py b/src/youtube_embedding/scraper/scrape_audio.py new file mode 100644 index 0000000..c4a0b88 --- /dev/null +++ b/src/youtube_embedding/scraper/scrape_audio.py @@ -0,0 +1,4 @@ +""" +scrapes the audio from the youtube vedio and then use it +for transcription. +""" diff --git a/src/youtube_embedding/scraper/scrape_transcript.py b/src/youtube_embedding/scraper/scrape_transcript.py new file mode 100644 index 0000000..55b3091 --- /dev/null +++ b/src/youtube_embedding/scraper/scrape_transcript.py @@ -0,0 +1,56 @@ +""" +scrape transcript of youtube vedios along with time frames. +""" +from youtube_transcript_api import YouTubeTranscriptApi +import json +import re +import os + + +def vid_id(Url:str)->str: + """ + retrieves vedio id from url. + args: + Url:url of the vedio in the form of string. + returns: + vedio_id:returns vedio_id os the url. + """ + try: + match = re.search(r'(?<=v=)[^&]+', Url) + video_id = match.group(0) if match else None + except AttributeError: + print("Video ID not foud in URL.") + + return video_id + +def transcript(Url:str)->str: + """ + retrieves the transcript from the youtube video along with timeframe + and stores it in json file + args: + Url:Url of the vedio. + returns: + output_path:returns absolute path of transcript. + """ + output_file='transcript.json' + v_id=vid_id(Url) + + try: + transcript = YouTubeTranscriptApi.get_transcript(v_id) + with open('transcript.json', 'w') as f: + json.dump(transcript, f) + + print("Transcript successfully saved to transcript.json") + + except Exception as e: + print(f"An error occurred: {e}") + + absolute_path=os.path.abspath(output_file) + return absolute_path,transcript + + + + +if __name__=='__main__': + + pass \ No newline at end of file From a7b29ab49bae46853dca45c517b4f4a1824a22ef Mon Sep 17 00:00:00 2001 From: xorsuyash Date: Wed, 7 Feb 2024 17:57:24 +0530 Subject: [PATCH 2/2] corrected last commit --- requirements.txt | 156 ----------------------------------------------- 1 file changed, 156 deletions(-) delete mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 29c5767..0000000 --- a/requirements.txt +++ /dev/null @@ -1,156 +0,0 @@ -aiofiles==23.2.1 -aiohttp==3.9.1 -aiosignal==1.3.1 -annotated-types==0.6.0 -anyio==4.2.0 -asttokens @ file:///home/conda/feedstock_root/build_artifacts/asttokens_1698341106958/work -async-timeout==4.0.3 -attrs==23.2.0 -bitarray==2.9.2 -blinker==1.7.0 -catalogue==2.0.10 -certifi==2024.2.2 -charset-normalizer==3.3.2 -click==8.1.7 -cmake==3.28.1 -colbert-ai==0.2.18 -comm @ file:///home/conda/feedstock_root/build_artifacts/comm_1704278392174/work -dataclasses-json==0.6.4 -datasets==2.16.1 -debugpy @ file:///home/conda/feedstock_root/build_artifacts/debugpy_1695534305529/work -decorator @ file:///home/conda/feedstock_root/build_artifacts/decorator_1641555617451/work -Deprecated==1.2.14 -dill==0.3.7 -dirtyjson==1.0.8 -distro==1.9.0 -exceptiongroup @ file:///home/conda/feedstock_root/build_artifacts/exceptiongroup_1704921103267/work -executing @ file:///home/conda/feedstock_root/build_artifacts/executing_1698579936712/work -faiss==1.7.4 -faiss-cpu==1.7.4 -faiss-gpu==1.7.2 -filelock==3.13.1 -Flask==3.0.2 -frozenlist==1.4.1 -fsspec==2023.10.0 -git-python==1.0.3 -gitdb==4.0.11 -GitPython==3.1.41 -greenlet==3.0.3 -h11==0.14.0 -h2==4.1.0 -hpack==4.0.0 -httpcore==1.0.2 -httpx==0.26.0 -huggingface-hub==0.20.3 -Hypercorn==0.16.0 -hyperframe==6.0.1 -idna==3.6 -importlib-metadata @ file:///home/conda/feedstock_root/build_artifacts/importlib-metadata_1703269254275/work -ipykernel @ file:///home/conda/feedstock_root/build_artifacts/ipykernel_1707182759703/work -ipython @ file:///home/conda/feedstock_root/build_artifacts/ipython_1701831663892/work -itsdangerous==2.1.2 -jedi @ file:///home/conda/feedstock_root/build_artifacts/jedi_1696326070614/work -Jinja2==3.1.3 -joblib==1.3.2 -jsonpatch==1.33 -jsonpointer==2.4 -jupyter_client @ file:///home/conda/feedstock_root/build_artifacts/jupyter_client_1699283905679/work -jupyter_core @ file:///home/conda/feedstock_root/build_artifacts/jupyter_core_1704727023078/work -langchain==0.1.5 -langchain-community==0.0.17 -langchain-core==0.1.19 -langsmith==0.0.86 -llama-index==0.9.44 -MarkupSafe==2.1.5 -marshmallow==3.20.2 -matplotlib-inline @ file:///home/conda/feedstock_root/build_artifacts/matplotlib-inline_1660814786464/work -mpmath==1.3.0 -multidict==6.0.5 -multiprocess==0.70.15 -mypy-extensions==1.0.0 -nest_asyncio @ file:///home/conda/feedstock_root/build_artifacts/nest-asyncio_1705850609492/work -networkx==3.2.1 -ninja==1.11.1.1 -nltk==3.8.1 -numpy @ file:///home/conda/feedstock_root/build_artifacts/numpy_1707225342954/work/dist/numpy-1.26.4-cp39-cp39-linux_x86_64.whl#sha256=c799942b5898f6e6c60264d1663a6469a475290e758c654aeeb78e2596463abd -nvidia-cublas-cu12==12.1.3.1 -nvidia-cuda-cupti-cu12==12.1.105 -nvidia-cuda-nvrtc-cu12==12.1.105 -nvidia-cuda-runtime-cu12==12.1.105 -nvidia-cudnn-cu12==8.9.2.26 -nvidia-cufft-cu12==11.0.2.54 -nvidia-curand-cu12==10.3.2.106 -nvidia-cusolver-cu12==11.4.5.107 -nvidia-cusparse-cu12==12.1.0.106 -nvidia-nccl-cu12==2.19.3 -nvidia-nvjitlink-cu12==12.3.101 -nvidia-nvtx-cu12==12.1.105 -onnx==1.15.0 -openai==1.11.1 -packaging @ file:///home/conda/feedstock_root/build_artifacts/packaging_1696202382185/work -pandas==2.2.0 -parso @ file:///home/conda/feedstock_root/build_artifacts/parso_1638334955874/work -pexpect @ file:///home/conda/feedstock_root/build_artifacts/pexpect_1706113125309/work -pickleshare @ file:///home/conda/feedstock_root/build_artifacts/pickleshare_1602536217715/work -pillow==10.2.0 -platformdirs @ file:///home/conda/feedstock_root/build_artifacts/platformdirs_1706713388748/work -priority==2.0.0 -prompt-toolkit @ file:///home/conda/feedstock_root/build_artifacts/prompt-toolkit_1702399386289/work -protobuf==4.25.2 -psutil @ file:///home/conda/feedstock_root/build_artifacts/psutil_1705722404069/work -ptyprocess @ file:///home/conda/feedstock_root/build_artifacts/ptyprocess_1609419310487/work/dist/ptyprocess-0.7.0-py2.py3-none-any.whl -pure-eval @ file:///home/conda/feedstock_root/build_artifacts/pure_eval_1642875951954/work -pyarrow==15.0.0 -pyarrow-hotfix==0.6 -pydantic==2.6.1 -pydantic_core==2.16.2 -Pygments @ file:///home/conda/feedstock_root/build_artifacts/pygments_1700607939962/work -python-dateutil @ file:///home/conda/feedstock_root/build_artifacts/python-dateutil_1626286286081/work -python-dotenv==1.0.1 -pytz==2024.1 -PyYAML==6.0.1 -pyzmq @ file:///home/conda/feedstock_root/build_artifacts/pyzmq_1701783166972/work -Quart==0.19.4 -RAGatouille==0.0.6b5 -regex==2023.12.25 -requests==2.31.0 -ruff==0.1.15 -safetensors==0.4.2 -scikit-learn==1.4.0 -scipy==1.12.0 -sentence-transformers==2.3.1 -sentencepiece==0.1.99 -six @ file:///home/conda/feedstock_root/build_artifacts/six_1620240208055/work -smmap==5.0.1 -sniffio==1.3.0 -SQLAlchemy==2.0.25 -srsly==2.4.8 -stack-data @ file:///home/conda/feedstock_root/build_artifacts/stack_data_1669632077133/work -sympy==1.12 -taskgroup==0.0.0a4 -tenacity==8.2.3 -threadpoolctl==3.2.0 -tiktoken==0.5.2 -tokenizers==0.15.1 -tomli==2.0.1 -torch==2.2.0 -tornado @ file:///home/conda/feedstock_root/build_artifacts/tornado_1695373443748/work -tqdm==4.66.1 -traitlets @ file:///home/conda/feedstock_root/build_artifacts/traitlets_1704212992681/work -transformers==4.37.2 -triton==2.2.0 -typing-inspect==0.9.0 -typing_extensions @ file:///home/conda/feedstock_root/build_artifacts/typing_extensions_1702176139754/work -tzdata==2023.4 -ujson==5.9.0 -urllib3==2.2.0 -voyager==2.0.2 -wcwidth @ file:///home/conda/feedstock_root/build_artifacts/wcwidth_1704731205417/work -Werkzeug==3.0.1 -wrapt==1.16.0 -wsproto==1.2.0 -xxhash==3.4.1 -yarl==1.9.4 -youtube-dl==2021.12.17 -youtube-transcript-api==0.6.2 -zipp @ file:///home/conda/feedstock_root/build_artifacts/zipp_1695255097490/work