From c8f03932ce1fe28dc4c381465f1459dd6b71793b Mon Sep 17 00:00:00 2001 From: sarthak247 Date: Tue, 16 Jul 2024 11:05:49 +0930 Subject: [PATCH] Add support from Llama2 model from Llama-cpp --- .gitignore | 3 ++ app.py | 110 +++++++++++++++++++++++++++-------------------- llama_chat.py | 65 ++++++++++++++++++++++++++++ openai_chat.py | 47 ++++++++++++++++++++ requirements.txt | 2 + 5 files changed, 180 insertions(+), 47 deletions(-) create mode 100644 llama_chat.py create mode 100644 openai_chat.py diff --git a/.gitignore b/.gitignore index a2b6d02..40ef306 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,6 @@ venv/* .env *.pdf *.pkl +models/ +*.faiss/ +__pycache__/ diff --git a/app.py b/app.py index 115b2e5..7c56bdf 100644 --- a/app.py +++ b/app.py @@ -1,18 +1,48 @@ +""" +Filename: app.py +Description: Implements functions and methods needed for interacting with NekoPDF +Run: streamlit run app.py +""" import streamlit as st from dotenv import load_dotenv from streamlit_extras.add_vertical_space import add_vertical_space from PyPDF2 import PdfReader from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain_openai import OpenAIEmbeddings -from langchain_community.vectorstores import FAISS -from langchain_openai import ChatOpenAI -from langchain.chains.question_answering import load_qa_chain -from langchain_community.callbacks.manager import get_openai_callback -import os +from openai_chat import get_openai_embeddings, get_openai_answers +from llama_chat import get_llama_embeddings, get_llama_answers +def read_pdf(pdf): + """ + Parameters: + - pdf: path to the PDF file + Return: Returns the contents of the PDF file + """ + pdf_reader = PdfReader(pdf) + + content = "" + for page in pdf_reader.pages: + content += page.extract_text() + return content + +def split_into_chunks(content): + """ + Parameters: + - content: the content read from the PDf file + Return: Returns the contents split into chunks + """ + text_splitter = RecursiveCharacterTextSplitter( + chunk_size = 1000, + chunk_overlap = 200, + length_function = len + ) + chunks = text_splitter.split_text(text = content) + return chunks # Favicon and Title -st.set_page_config(page_title="NekoPDF 📖 - Chat with PDF", page_icon="🐱", layout="centered", initial_sidebar_state="auto", menu_items=None) +st.set_page_config(page_title="NekoPDF 📖 - Chat with PDF", + page_icon="🐱", layout="centered", + initial_sidebar_state="auto", + menu_items=None) # SideBar with st.sidebar: @@ -29,57 +59,43 @@ def main(): # Load Environment Variables load_dotenv() - + # Main App st.header("🐱 NekoPDF - Chat with PDF 📖") + # Select LLM + option = st.selectbox('Select LLM', ('GPT 3.5 - Turbo', 'LLama 2 7B')) + + # Select top-k similarity search + k = st.slider('Top K', 1, 5, 1) + # Upload PDF File pdf = st.file_uploader("Upload your PDF", type = 'pdf') - + # Read PDF if pdf is not None: - pdf_reader = PdfReader(pdf) - - content = "" - for page in pdf_reader.pages: - content += page.extract_text() + # Read PDF content + content = read_pdf(pdf) # Build chunks of text - text_splitter = RecursiveCharacterTextSplitter( - chunk_size = 1000, - chunk_overlap = 200, - length_function = len - ) - - chunks = text_splitter.split_text(text = content) + chunks = split_into_chunks(content) - # Check for existing store or create new one - store_name = pdf.name[:-4] + '.faiss' - embeddings = OpenAIEmbeddings() - if os.path.exists(store_name): - VectorStore = FAISS.load_local(store_name, embeddings, allow_dangerous_deserialization=True) - else: - # Convert chunks -> Embeddings - VectorStore = FAISS.from_texts(chunks, embedding=embeddings) - - VectorStore.save_local(store_name) - # Accept Questions query = st.text_input("Ask questions about your PDF File: ") - if query: - docs = VectorStore.similarity_search(query = query, k = 3) - - # Setup LLM - llm = ChatOpenAI(temperature=0, model_name = "gpt-3.5-turbo") - - # Setup QA Chain and query it - chain = load_qa_chain(llm = llm, chain_type = "stuff") - input_data = {'input_documents' : docs, 'question' : query} - with get_openai_callback() as cb: - response = chain.invoke(input=input_data) - print(cb) - # breakpoint() - st.write(response['output_text']) + if option == 'GPT 3.5 - Turbo': + # Check for existing store or create new one + store_name = pdf.name[:-4] + '.openai.faiss' + vectorstore = get_openai_embeddings(chunks, store_name) + if query: + response = get_openai_answers(vectorstore, query, k) + st.write(response) + elif option == 'LLama 2 7B': + # Check for existing store or create one + store_name = pdf.name[:-4] + '.llama.faiss' + vectorstore = get_llama_embeddings(chunks, store_name) + if query: + response = get_llama_answers(vectorstore, query, k) + st.write(response) if __name__ == '__main__': - main() \ No newline at end of file + main() diff --git a/llama_chat.py b/llama_chat.py new file mode 100644 index 0000000..b239f9b --- /dev/null +++ b/llama_chat.py @@ -0,0 +1,65 @@ +""" +Filename: openai.py +Description: Implements functions needed to work around with Llama.cpp for QA +""" +import os +from langchain_community.embeddings import LlamaCppEmbeddings +from langchain_community.vectorstores import FAISS +from langchain_community.llms import LlamaCpp +from langchain_core.prompts import PromptTemplate + +# Setup LLM +llm = LlamaCpp(model_path = './models/llama-2-7b-chat.Q2_K.gguf', + temperature = 0.75, + max_tokens = 2000, + top_p = 1, + verbose = False, + n_gpu_layers = -1, + n_batch = 128, + n_ctx = 1024) + +# Sample Template +TEMPLATE = """Use the following pieces of context to answer the question at the end. +If you don't know the answer, just say that you don't know, don't try to make up an answer. +{context} +Question: {question} +Answer:""" +prompt = PromptTemplate.from_template(TEMPLATE) + +def get_llama_embeddings(chunks, store_name): + """ + Parameters: + - chunks: text to turn into embeddings + - store_name : The name of the store from which to load in + case of existing embeddings or create and save to + Return: An instance of FAISS Vectorstore + """ + embeddings = LlamaCppEmbeddings(model_path = './models/llama-2-7b-chat.Q2_K.gguf', + n_gpu_layers = -1, verbose = False) + if os.path.exists(store_name): + vectorstore = FAISS.load_local(store_name, embeddings, allow_dangerous_deserialization=True) + else: + # Convert chunks -> Embeddings + vectorstore = FAISS.from_texts(chunks, embedding=embeddings) + vectorstore.save_local(store_name) + return vectorstore + +def get_llama_answers(vectorstore, query, k): + """ + Parameters: + - vectorstore: Vector Store of chunks of texts and their embeddings + - Query: Question to ask to the LLM + - k: Number of top k matching documents from similarity search + Return: Response from llama model + """ + docs = vectorstore.similarity_search(query, k) + + # Extract context + context = '' + for doc in docs: + context += doc.page_content + + # Setup chain + llm_chain = prompt | llm + response = llm_chain.invoke({'context' : context, 'question' : query}) + return response diff --git a/openai_chat.py b/openai_chat.py new file mode 100644 index 0000000..594dd8b --- /dev/null +++ b/openai_chat.py @@ -0,0 +1,47 @@ +""" +Filename: openai.py +Description: Implements functions needed to work around with OpenAI API +""" +import os +from langchain_openai import OpenAIEmbeddings, ChatOpenAI +from langchain_community.vectorstores import FAISS +from langchain.chains.question_answering import load_qa_chain +from langchain_community.callbacks.manager import get_openai_callback + +def get_openai_embeddings(chunks, store_name): + """ + Parameters: + - chunks: text to turn into embeddings + - store_name : The name of the store from which to load in + case of existing embeddings or create and save to + Return: An instance of FAISS Vectorstore + """ + embeddings = OpenAIEmbeddings() + if os.path.exists(store_name): + vectorstore = FAISS.load_local(store_name, embeddings, allow_dangerous_deserialization=True) + else: + # Convert chunks -> Embeddings + vectorstore = FAISS.from_texts(chunks, embedding=embeddings) + vectorstore.save_local(store_name) + return vectorstore + + +def get_openai_answers(vectorstore, query, k): + """ + Parameters: + - vectorstore: Vector Store of chunks of texts and their embeddings + - Query: Question to ask to the LLM + - k: Number of top k matching documents from similarity search + Return: Response from OpenAI API + """ + docs = vectorstore.similarity_search(query, k) + # Setup LLM + llm = ChatOpenAI(temperature=0, model_name = "gpt-3.5-turbo") + + # Setup QA Chain and query it + chain = load_qa_chain(llm = llm, chain_type = "stuff") + input_data = {'input_documents' : docs, 'question' : query} + with get_openai_callback() as cb: + response = chain.invoke(input=input_data) + print(cb) + return response['output_text'] diff --git a/requirements.txt b/requirements.txt index c63b197..36e042c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,6 +14,7 @@ click==8.1.7 contourpy==1.2.1 cycler==0.12.1 dataclasses-json==0.6.7 +diskcache==5.6.3 distro==1.9.0 entrypoints==0.4 exceptiongroup==1.2.2 @@ -42,6 +43,7 @@ langchain-core==0.2.19 langchain-openai==0.1.16 langchain-text-splitters==0.2.2 langsmith==0.1.85 +llama_cpp_python==0.2.55 lxml==5.2.2 Markdown==3.6 markdown-it-py==3.0.0