Skip to content

Commit

Permalink
add FastAPI Backend
Browse files Browse the repository at this point in the history
  • Loading branch information
sarthak247 committed Jul 16, 2024
1 parent c8f0393 commit 584ed58
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 39 deletions.
46 changes: 9 additions & 37 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,37 +6,9 @@
import streamlit as st
from dotenv import load_dotenv
from streamlit_extras.add_vertical_space import add_vertical_space
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from openai_chat import get_openai_embeddings, get_openai_answers
from llama_chat import get_llama_embeddings, get_llama_answers

def read_pdf(pdf):
"""
Parameters:
- pdf: path to the PDF file
Return: Returns the contents of the PDF file
"""
pdf_reader = PdfReader(pdf)

content = ""
for page in pdf_reader.pages:
content += page.extract_text()
return content

def split_into_chunks(content):
"""
Parameters:
- content: the content read from the PDf file
Return: Returns the contents split into chunks
"""
text_splitter = RecursiveCharacterTextSplitter(
chunk_size = 1000,
chunk_overlap = 200,
length_function = len
)
chunks = text_splitter.split_text(text = content)
return chunks
from utils import read_pdf, split_into_chunks
import requests
import json

# Favicon and Title
st.set_page_config(page_title="NekoPDF 📖 - Chat with PDF",
Expand Down Expand Up @@ -85,17 +57,17 @@ def main():
if option == 'GPT 3.5 - Turbo':
# Check for existing store or create new one
store_name = pdf.name[:-4] + '.openai.faiss'
vectorstore = get_openai_embeddings(chunks, store_name)
payload = {'chunks': chunks, 'store_name' : store_name, 'query' : query, 'k': k}
if query:
response = get_openai_answers(vectorstore, query, k)
st.write(response)
response = requests.post(url='http://127.0.0.1:8000/qa/openai', data = json.dumps(payload))
st.write(response.text)
elif option == 'LLama 2 7B':
# Check for existing store or create one
store_name = pdf.name[:-4] + '.llama.faiss'
vectorstore = get_llama_embeddings(chunks, store_name)
payload = {'chunks' : chunks, 'store_name' : store_name, 'query' : query, 'k' : k}
if query:
response = get_llama_answers(vectorstore, query, k)
st.write(response)
response = requests.post(url='http://127.0.0.1:8000/qa/llama', data = json.dumps(payload))
st.write(response.text)

if __name__ == '__main__':
main()
5 changes: 3 additions & 2 deletions llama_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@
n_batch = 128,
n_ctx = 1024)

embeddings = LlamaCppEmbeddings(model_path = './models/llama-2-7b-chat.Q2_K.gguf',
n_gpu_layers = -1, verbose = False)

# Sample Template
TEMPLATE = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Expand All @@ -34,8 +37,6 @@ def get_llama_embeddings(chunks, store_name):
case of existing embeddings or create and save to
Return: An instance of FAISS Vectorstore
"""
embeddings = LlamaCppEmbeddings(model_path = './models/llama-2-7b-chat.Q2_K.gguf',
n_gpu_layers = -1, verbose = False)
if os.path.exists(store_name):
vectorstore = FAISS.load_local(store_name, embeddings, allow_dangerous_deserialization=True)
else:
Expand Down
29 changes: 29 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from fastapi import FastAPI
from pydantic import BaseModel
from llama_chat import get_llama_embeddings, get_llama_answers
from typing import List
from openai_chat import get_openai_answers, get_openai_embeddings

app = FastAPI()

class QA(BaseModel):
chunks : List
store_name : str
query : str
k: int

@app.post('/qa/openai')
def openai_response(input: QA):
vectorstore = get_openai_embeddings(input.chunks, input.store_name)
if input.query:
response = get_openai_answers(vectorstore, input.query, input.k)
return response

@app.post('/qa/llama')
def llama_response(input: QA):
vectorstore = get_llama_embeddings(input.chunks, input.store_name)
if input.query:
response = get_llama_answers(vectorstore, input.query, input.k)
return response


34 changes: 34 additions & 0 deletions utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
"""
Filename: utils.py
Description: Implements functions and methods needed for reading text from files
and splitting into chunks, etc
"""
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter

def read_pdf(pdf):
"""
Parameters:
- pdf: path to the PDF file
Return: Returns the contents of the PDF file
"""
pdf_reader = PdfReader(pdf)

content = ""
for page in pdf_reader.pages:
content += page.extract_text()
return content

def split_into_chunks(content):
"""
Parameters:
- content: the content read from the PDf file
Return: Returns the contents split into chunks
"""
text_splitter = RecursiveCharacterTextSplitter(
chunk_size = 1000,
chunk_overlap = 200,
length_function = len
)
chunks = text_splitter.split_text(text = content)
return chunks

0 comments on commit 584ed58

Please sign in to comment.