diff --git a/app.py b/app.py index 7093128..115b2e5 100644 --- a/app.py +++ b/app.py @@ -3,12 +3,11 @@ from streamlit_extras.add_vertical_space import add_vertical_space from PyPDF2 import PdfReader from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain.embeddings.openai import OpenAIEmbeddings -from langchain.vectorstores import FAISS -from langchain.llms import OpenAI +from langchain_openai import OpenAIEmbeddings +from langchain_community.vectorstores import FAISS +from langchain_openai import ChatOpenAI from langchain.chains.question_answering import load_qa_chain -from langchain.callbacks import get_openai_callback -import pickle +from langchain_community.callbacks.manager import get_openai_callback import os @@ -55,17 +54,15 @@ def main(): chunks = text_splitter.split_text(text = content) # Check for existing store or create new one - store_name = pdf.name[:-4] - if os.path.exists(f"{store_name}.pkl"): - with open(f"{store_name}.pkl", "rb") as f: - VectorStore = pickle.load(f) + store_name = pdf.name[:-4] + '.faiss' + embeddings = OpenAIEmbeddings() + if os.path.exists(store_name): + VectorStore = FAISS.load_local(store_name, embeddings, allow_dangerous_deserialization=True) else: # Convert chunks -> Embeddings - embeddings = OpenAIEmbeddings() VectorStore = FAISS.from_texts(chunks, embedding=embeddings) - with open(f"{store_name}.pkl", "wb") as f: - pickle.dump(VectorStore, f) + VectorStore.save_local(store_name) # Accept Questions query = st.text_input("Ask questions about your PDF File: ") @@ -73,18 +70,16 @@ def main(): docs = VectorStore.similarity_search(query = query, k = 3) # Setup LLM - llm = OpenAI(temperature=0, model_name = "gpt-3.5-turbo") + llm = ChatOpenAI(temperature=0, model_name = "gpt-3.5-turbo") # Setup QA Chain and query it chain = load_qa_chain(llm = llm, chain_type = "stuff") + input_data = {'input_documents' : docs, 'question' : query} with get_openai_callback() as cb: - response = chain.run(input_documents = docs, question = query) + response = chain.invoke(input=input_data) print(cb) - st.write(response) - - - - + # breakpoint() + st.write(response['output_text']) if __name__ == '__main__': main() \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 005e130..c63b197 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,99 +1,110 @@ -aiohttp==3.8.5 +aiohttp==3.9.5 aiosignal==1.3.1 -altair==5.1.1 -annotated-types==0.5.0 +altair==5.3.0 +annotated-types==0.7.0 +anyio==4.4.0 async-timeout==4.0.3 -attrs==23.1.0 -beautifulsoup4==4.12.2 -blinker==1.6.2 -cachetools==5.3.1 -certifi==2023.7.22 -charset-normalizer==3.2.0 +attrs==23.2.0 +beautifulsoup4==4.12.3 +blinker==1.8.2 +cachetools==5.3.3 +certifi==2024.7.4 +charset-normalizer==3.3.2 click==8.1.7 -contourpy==1.1.0 -cycler==0.11.0 -dataclasses-json==0.5.14 -faiss-cpu==1.7.4 -Faker==19.6.1 +contourpy==1.2.1 +cycler==0.12.1 +dataclasses-json==0.6.7 +distro==1.9.0 +entrypoints==0.4 +exceptiongroup==1.2.2 +faiss-gpu==1.7.2 +Faker==26.0.0 favicon==0.7.0 -fonttools==4.42.1 -frozenlist==1.4.0 -gitdb==4.0.10 -GitPython==3.1.35 -greenlet==2.0.2 +fonttools==4.53.1 +frozenlist==1.4.1 +gitdb==4.0.11 +GitPython==3.1.43 +greenlet==3.0.3 +h11==0.14.0 htbuilder==0.6.2 -idna==3.4 -importlib-metadata==6.8.0 -Jinja2==3.1.2 -jsonschema==4.19.0 -jsonschema-specifications==2023.7.1 +httpcore==1.0.5 +httpx==0.27.0 +idna==3.7 +Jinja2==3.1.4 +jsonpatch==1.33 +jsonpointer==3.0.0 +jsonschema==4.23.0 +jsonschema-specifications==2023.12.1 kiwisolver==1.4.5 -langchain==0.0.286 -langsmith==0.0.35 -lxml==4.9.3 -Markdown==3.4.4 +langchain==0.2.7 +langchain-community==0.2.7 +langchain-core==0.2.19 +langchain-openai==0.1.16 +langchain-text-splitters==0.2.2 +langsmith==0.1.85 +lxml==5.2.2 +Markdown==3.6 markdown-it-py==3.0.0 markdownlit==0.0.7 -MarkupSafe==2.1.3 -marshmallow==3.20.1 -matplotlib==3.7.2 +MarkupSafe==2.1.5 +marshmallow==3.21.3 +matplotlib==3.9.1 mdurl==0.1.2 -more-itertools==10.1.0 -multidict==6.0.4 +more-itertools==10.3.0 +multidict==6.0.5 mypy-extensions==1.0.0 -numexpr==2.8.5 -numpy==1.25.2 -openai==0.28.0 -packaging==23.1 -pandas==2.1.0 -Pillow==9.5.0 -protobuf==4.24.3 -pyarrow==13.0.0 -pydantic==2.3.0 -pydantic_core==2.6.3 -pydeck==0.8.0 -Pygments==2.16.1 -pymdown-extensions==10.3 -Pympler==1.0.1 -pyparsing==3.0.9 +numpy==1.26.4 +openai==1.35.13 +orjson==3.10.6 +packaging==24.1 +pandas==2.2.2 +pillow==10.4.0 +prometheus_client==0.20.0 +protobuf==5.27.2 +pyarrow==16.1.0 +pydantic==2.8.2 +pydantic_core==2.20.1 +pydeck==0.9.1 +Pygments==2.18.0 +pymdown-extensions==10.8.1 +pyparsing==3.1.2 PyPDF2==3.0.1 -python-dateutil==2.8.2 -python-dotenv==1.0.0 -pytz==2023.3.post1 -pytz-deprecation-shim==0.1.0.post0 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +pytz==2024.1 PyYAML==6.0.1 -referencing==0.30.2 -regex==2023.8.8 -requests==2.31.0 -rich==13.5.2 -rpds-py==0.10.2 +referencing==0.35.1 +regex==2024.5.15 +requests==2.32.3 +rich==13.7.1 +rpds-py==0.19.0 six==1.16.0 -smmap==5.0.0 +smmap==5.0.1 +sniffio==1.3.1 soupsieve==2.5 -SQLAlchemy==2.0.20 +SQLAlchemy==2.0.31 st-annotated-text==4.0.1 -streamlit==1.26.0 +st-theme==1.2.3 +streamlit==1.36.0 streamlit-camera-input-live==0.2.0 -streamlit-card==0.0.61 +streamlit-card==1.0.2 streamlit-embedcode==0.1.2 -streamlit-extras==0.3.2 -streamlit-faker==0.0.2 -streamlit-image-coordinates==0.1.6 -streamlit-keyup==0.2.0 +streamlit-extras==0.4.3 +streamlit-faker==0.0.3 +streamlit-image-coordinates==0.1.9 +streamlit-keyup==0.2.4 streamlit-toggle-switch==1.0.2 -streamlit-vertical-slider==1.0.2 -tenacity==8.2.3 -tiktoken==0.4.0 +streamlit-vertical-slider==2.5.5 +tenacity==8.5.0 +tiktoken==0.7.0 toml==0.10.2 -toolz==0.12.0 -tornado==6.3.3 -tqdm==4.66.1 +toolz==0.12.1 +tornado==6.4.1 +tqdm==4.66.4 typing-inspect==0.9.0 -typing_extensions==4.7.1 -tzdata==2023.3 -tzlocal==4.3.1 -urllib3==2.0.4 -validators==0.22.0 -watchdog==3.0.0 -yarl==1.9.2 -zipp==3.16.2 +typing_extensions==4.12.2 +tzdata==2024.1 +urllib3==2.2.2 +validators==0.33.0 +watchdog==4.0.1 +yarl==1.9.4