Merge pull request #4 from sarthak247/llama

Add support from Llama2 model from Llama-cpp
sarthak247 · Jul 16, 2024 · 1029b2b · 1029b2b
2 parents c1a99d2 + c8f0393
commit 1029b2b
Show file tree

Hide file tree

Showing 5 changed files with 180 additions and 47 deletions.
diff --git a/.gitignore b/.gitignore
@@ -2,3 +2,6 @@ venv/*
 .env
 *.pdf
 *.pkl
+models/
+*.faiss/
+__pycache__/
diff --git a/app.py b/app.py
@@ -1,18 +1,48 @@
+"""
+Filename: app.py
+Description: Implements functions and methods needed for interacting with NekoPDF
+Run: streamlit run app.py
+"""
 import streamlit as st
 from dotenv import load_dotenv
 from streamlit_extras.add_vertical_space import add_vertical_space
 from PyPDF2 import PdfReader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_openai import OpenAIEmbeddings
-from langchain_community.vectorstores import FAISS
-from langchain_openai import ChatOpenAI
-from langchain.chains.question_answering import load_qa_chain
-from langchain_community.callbacks.manager import get_openai_callback
-import os
+from openai_chat import get_openai_embeddings, get_openai_answers
+from llama_chat import get_llama_embeddings, get_llama_answers
 
+def read_pdf(pdf):
+    """
+    Parameters:
+        - pdf: path to the PDF file
+    Return: Returns the contents of the PDF file
+    """
+    pdf_reader = PdfReader(pdf)
+
+    content = ""
+    for page in pdf_reader.pages:
+        content += page.extract_text()
+    return content
+
+def split_into_chunks(content):
+    """
+    Parameters:
+        - content: the content read from the PDf file
+    Return: Returns the contents split into chunks
+    """
+    text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size = 1000,
+            chunk_overlap = 200,
+            length_function = len
+        )
+    chunks = text_splitter.split_text(text = content)
+    return chunks
 
 # Favicon and Title
-st.set_page_config(page_title="NekoPDF 📖 - Chat with PDF", page_icon="🐱", layout="centered", initial_sidebar_state="auto", menu_items=None)
+st.set_page_config(page_title="NekoPDF 📖 - Chat with PDF",
+                   page_icon="🐱", layout="centered",
+                   initial_sidebar_state="auto",
+                   menu_items=None)
 
 # SideBar
 with st.sidebar:
@@ -29,57 +59,43 @@
 def main():
     # Load Environment Variables
     load_dotenv()
-    
+
     # Main App
     st.header("🐱 NekoPDF - Chat with PDF 📖")
 
+    # Select LLM
+    option = st.selectbox('Select LLM', ('GPT 3.5 - Turbo', 'LLama 2 7B'))
+
+    # Select top-k similarity search
+    k = st.slider('Top K', 1, 5, 1)
+
     # Upload PDF File
     pdf = st.file_uploader("Upload your PDF", type = 'pdf')
-    
+
     # Read PDF
     if pdf is not None:
-        pdf_reader = PdfReader(pdf)
-
-        content = ""
-        for page in pdf_reader.pages:
-            content += page.extract_text()
+        # Read PDF content
+        content = read_pdf(pdf)
 
         # Build chunks of text
-        text_splitter = RecursiveCharacterTextSplitter(
-            chunk_size = 1000,
-            chunk_overlap = 200,
-            length_function = len
-        )
-
-        chunks = text_splitter.split_text(text = content)
+        chunks = split_into_chunks(content)
 
-        # Check for existing store or create new one
-        store_name = pdf.name[:-4] + '.faiss'
-        embeddings = OpenAIEmbeddings()
-        if os.path.exists(store_name):
-            VectorStore = FAISS.load_local(store_name, embeddings, allow_dangerous_deserialization=True)
-        else:
-            # Convert chunks -> Embeddings
-            VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
-
-            VectorStore.save_local(store_name)
-
         # Accept Questions
         query = st.text_input("Ask questions about your PDF File: ")
-        if query:
-            docs = VectorStore.similarity_search(query = query, k = 3)
-
-            # Setup LLM
-            llm = ChatOpenAI(temperature=0, model_name = "gpt-3.5-turbo")
-
-            # Setup QA Chain and query it
-            chain = load_qa_chain(llm = llm, chain_type = "stuff")
-            input_data = {'input_documents' : docs, 'question' : query}
-            with get_openai_callback() as cb:
-                response = chain.invoke(input=input_data)
-                print(cb)
-            # breakpoint()
-            st.write(response['output_text'])
+        if option == 'GPT 3.5 - Turbo':
+            # Check for existing store or create new one
+            store_name = pdf.name[:-4] + '.openai.faiss'
+            vectorstore = get_openai_embeddings(chunks, store_name)
+            if query:
+                response = get_openai_answers(vectorstore, query, k)
+                st.write(response)
+        elif option == 'LLama 2 7B':
+            # Check for existing store or create one
+            store_name = pdf.name[:-4] + '.llama.faiss'
+            vectorstore = get_llama_embeddings(chunks, store_name)
+            if query:
+                response = get_llama_answers(vectorstore, query, k)
+                st.write(response)
 
 if __name__ == '__main__':
-    main()
+    main()
diff --git a/llama_chat.py b/llama_chat.py
@@ -0,0 +1,65 @@
+"""
+Filename: openai.py
+Description: Implements functions needed to work around with Llama.cpp for QA
+"""
+import os
+from langchain_community.embeddings import LlamaCppEmbeddings
+from langchain_community.vectorstores import FAISS
+from langchain_community.llms import LlamaCpp
+from langchain_core.prompts import PromptTemplate
+
+# Setup LLM
+llm = LlamaCpp(model_path = './models/llama-2-7b-chat.Q2_K.gguf',
+               temperature = 0.75,
+               max_tokens = 2000,
+               top_p = 1,
+               verbose = False,
+               n_gpu_layers = -1,
+               n_batch = 128,
+               n_ctx = 1024)
+
+# Sample Template
+TEMPLATE = """Use the following pieces of context to answer the question at the end.
+If you don't know the answer, just say that you don't know, don't try to make up an answer.
+{context}
+Question: {question}
+Answer:"""
+prompt = PromptTemplate.from_template(TEMPLATE)
+
+def get_llama_embeddings(chunks, store_name):
+    """
+    Parameters:
+        - chunks: text to turn into embeddings
+        - store_name : The name of the store from which to load in
+                        case of existing embeddings or create and save to
+    Return: An instance of FAISS Vectorstore
+    """
+    embeddings = LlamaCppEmbeddings(model_path = './models/llama-2-7b-chat.Q2_K.gguf',
+                                    n_gpu_layers = -1, verbose = False)
+    if os.path.exists(store_name):
+        vectorstore = FAISS.load_local(store_name, embeddings, allow_dangerous_deserialization=True)
+    else:
+        # Convert chunks -> Embeddings
+        vectorstore = FAISS.from_texts(chunks, embedding=embeddings)
+        vectorstore.save_local(store_name)
+    return vectorstore
+
+def get_llama_answers(vectorstore, query, k):
+    """
+    Parameters:
+        - vectorstore: Vector Store of chunks of texts and their embeddings
+        - Query: Question to ask to the LLM
+        - k: Number of top k matching documents from similarity search
+    Return: Response from llama model
+    """
+    docs = vectorstore.similarity_search(query, k)
+
+    # Extract context
+    context = ''
+    for doc in docs:
+        context += doc.page_content
+
+    # Setup chain
+    llm_chain = prompt | llm
+    response = llm_chain.invoke({'context' : context, 'question' : query})
+    return response
diff --git a/openai_chat.py b/openai_chat.py
@@ -0,0 +1,47 @@
+"""
+Filename: openai.py
+Description: Implements functions needed to work around with OpenAI API
+"""
+import os
+from langchain_openai import OpenAIEmbeddings, ChatOpenAI
+from langchain_community.vectorstores import FAISS
+from langchain.chains.question_answering import load_qa_chain
+from langchain_community.callbacks.manager import get_openai_callback
+
+def get_openai_embeddings(chunks, store_name):
+    """
+    Parameters:
+        - chunks: text to turn into embeddings
+        - store_name : The name of the store from which to load in
+                        case of existing embeddings or create and save to
+    Return: An instance of FAISS Vectorstore
+    """
+    embeddings = OpenAIEmbeddings()
+    if os.path.exists(store_name):
+        vectorstore = FAISS.load_local(store_name, embeddings, allow_dangerous_deserialization=True)
+    else:
+        # Convert chunks -> Embeddings
+        vectorstore = FAISS.from_texts(chunks, embedding=embeddings)
+        vectorstore.save_local(store_name)
+    return vectorstore
+
+
+def get_openai_answers(vectorstore, query, k):
+    """
+    Parameters:
+        - vectorstore: Vector Store of chunks of texts and their embeddings
+        - Query: Question to ask to the LLM
+        - k: Number of top k matching documents from similarity search
+    Return: Response from OpenAI API
+    """
+    docs = vectorstore.similarity_search(query, k)
+    # Setup LLM
+    llm = ChatOpenAI(temperature=0, model_name = "gpt-3.5-turbo")
+
+    # Setup QA Chain and query it
+    chain = load_qa_chain(llm = llm, chain_type = "stuff")
+    input_data = {'input_documents' : docs, 'question' : query}
+    with get_openai_callback() as cb:
+        response = chain.invoke(input=input_data)
+        print(cb)
+    return response['output_text']
diff --git a/requirements.txt b/requirements.txt
@@ -14,6 +14,7 @@ click==8.1.7
 contourpy==1.2.1
 cycler==0.12.1
 dataclasses-json==0.6.7
+diskcache==5.6.3
 distro==1.9.0
 entrypoints==0.4
 exceptiongroup==1.2.2
@@ -42,6 +43,7 @@ langchain-core==0.2.19
 langchain-openai==0.1.16
 langchain-text-splitters==0.2.2
 langsmith==0.1.85
+llama_cpp_python==0.2.55
 lxml==5.2.2
 Markdown==3.6
 markdown-it-py==3.0.0
-Original file line number
+Diff line change
@@ Expand Up / @@ -2,3 +2,6 @@ venv/* @@
     .env
     *.pdf
     *.pkl
+    models/
+    *.faiss/
+    __pycache__/