Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add FastAPI Backend #5

Merged
merged 1 commit into from
Jul 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 9 additions & 37 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,37 +6,9 @@
import streamlit as st
from dotenv import load_dotenv
from streamlit_extras.add_vertical_space import add_vertical_space
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from openai_chat import get_openai_embeddings, get_openai_answers
from llama_chat import get_llama_embeddings, get_llama_answers

def read_pdf(pdf):
"""
Parameters:
- pdf: path to the PDF file
Return: Returns the contents of the PDF file
"""
pdf_reader = PdfReader(pdf)

content = ""
for page in pdf_reader.pages:
content += page.extract_text()
return content

def split_into_chunks(content):
"""
Parameters:
- content: the content read from the PDf file
Return: Returns the contents split into chunks
"""
text_splitter = RecursiveCharacterTextSplitter(
chunk_size = 1000,
chunk_overlap = 200,
length_function = len
)
chunks = text_splitter.split_text(text = content)
return chunks
from utils import read_pdf, split_into_chunks
import requests
import json

# Favicon and Title
st.set_page_config(page_title="NekoPDF 📖 - Chat with PDF",
Expand Down Expand Up @@ -85,17 +57,17 @@ def main():
if option == 'GPT 3.5 - Turbo':
# Check for existing store or create new one
store_name = pdf.name[:-4] + '.openai.faiss'
vectorstore = get_openai_embeddings(chunks, store_name)
payload = {'chunks': chunks, 'store_name' : store_name, 'query' : query, 'k': k}
if query:
response = get_openai_answers(vectorstore, query, k)
st.write(response)
response = requests.post(url='http://127.0.0.1:8000/qa/openai', data = json.dumps(payload))
st.write(response.text)
elif option == 'LLama 2 7B':
# Check for existing store or create one
store_name = pdf.name[:-4] + '.llama.faiss'
vectorstore = get_llama_embeddings(chunks, store_name)
payload = {'chunks' : chunks, 'store_name' : store_name, 'query' : query, 'k' : k}
if query:
response = get_llama_answers(vectorstore, query, k)
st.write(response)
response = requests.post(url='http://127.0.0.1:8000/qa/llama', data = json.dumps(payload))
st.write(response.text)

if __name__ == '__main__':
main()
5 changes: 3 additions & 2 deletions llama_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@
n_batch = 128,
n_ctx = 1024)

embeddings = LlamaCppEmbeddings(model_path = './models/llama-2-7b-chat.Q2_K.gguf',
n_gpu_layers = -1, verbose = False)

# Sample Template
TEMPLATE = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Expand All @@ -34,8 +37,6 @@ def get_llama_embeddings(chunks, store_name):
case of existing embeddings or create and save to
Return: An instance of FAISS Vectorstore
"""
embeddings = LlamaCppEmbeddings(model_path = './models/llama-2-7b-chat.Q2_K.gguf',
n_gpu_layers = -1, verbose = False)
if os.path.exists(store_name):
vectorstore = FAISS.load_local(store_name, embeddings, allow_dangerous_deserialization=True)
else:
Expand Down
29 changes: 29 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from fastapi import FastAPI
from pydantic import BaseModel
from llama_chat import get_llama_embeddings, get_llama_answers
from typing import List
from openai_chat import get_openai_answers, get_openai_embeddings

app = FastAPI()

class QA(BaseModel):
chunks : List
store_name : str
query : str
k: int

@app.post('/qa/openai')
def openai_response(input: QA):
vectorstore = get_openai_embeddings(input.chunks, input.store_name)
if input.query:
response = get_openai_answers(vectorstore, input.query, input.k)
return response

@app.post('/qa/llama')
def llama_response(input: QA):
vectorstore = get_llama_embeddings(input.chunks, input.store_name)
if input.query:
response = get_llama_answers(vectorstore, input.query, input.k)
return response


34 changes: 34 additions & 0 deletions utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
"""
Filename: utils.py
Description: Implements functions and methods needed for reading text from files
and splitting into chunks, etc
"""
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter

def read_pdf(pdf):
"""
Parameters:
- pdf: path to the PDF file
Return: Returns the contents of the PDF file
"""
pdf_reader = PdfReader(pdf)

content = ""
for page in pdf_reader.pages:
content += page.extract_text()
return content

def split_into_chunks(content):
"""
Parameters:
- content: the content read from the PDf file
Return: Returns the contents split into chunks
"""
text_splitter = RecursiveCharacterTextSplitter(
chunk_size = 1000,
chunk_overlap = 200,
length_function = len
)
chunks = text_splitter.split_text(text = content)
return chunks
Loading