diff --git a/app.py b/app.py
index 7c56bdf..34beed3 100644
--- a/app.py
+++ b/app.py
@@ -6,37 +6,9 @@
 import streamlit as st
 from dotenv import load_dotenv
 from streamlit_extras.add_vertical_space import add_vertical_space
-from PyPDF2 import PdfReader
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from openai_chat import get_openai_embeddings, get_openai_answers
-from llama_chat import get_llama_embeddings, get_llama_answers
-
-def read_pdf(pdf):
-    """
-    Parameters:
-        - pdf: path to the PDF file
-    Return: Returns the contents of the PDF file
-    """
-    pdf_reader = PdfReader(pdf)
-
-    content = ""
-    for page in pdf_reader.pages:
-        content += page.extract_text()
-    return content
-
-def split_into_chunks(content):
-    """
-    Parameters:
-        - content: the content read from the PDf file
-    Return: Returns the contents split into chunks
-    """
-    text_splitter = RecursiveCharacterTextSplitter(
-            chunk_size = 1000,
-            chunk_overlap = 200,
-            length_function = len
-        )
-    chunks = text_splitter.split_text(text = content)
-    return chunks
+from utils import read_pdf, split_into_chunks
+import requests
+import json
 
 # Favicon and Title
 st.set_page_config(page_title="NekoPDF 📖 - Chat with PDF",
@@ -85,17 +57,17 @@ def main():
         if option == 'GPT 3.5 - Turbo':
             # Check for existing store or create new one
             store_name = pdf.name[:-4] + '.openai.faiss'
-            vectorstore = get_openai_embeddings(chunks, store_name)
+            payload = {'chunks': chunks, 'store_name' : store_name, 'query' : query, 'k': k}
             if query:
-                response = get_openai_answers(vectorstore, query, k)
-                st.write(response)
+                response = requests.post(url='http://127.0.0.1:8000/qa/openai', data = json.dumps(payload))
+                st.write(response.text)
         elif option == 'LLama 2 7B':
             # Check for existing store or create one
             store_name = pdf.name[:-4] + '.llama.faiss'
-            vectorstore = get_llama_embeddings(chunks, store_name)
+            payload = {'chunks' : chunks, 'store_name' : store_name, 'query' : query, 'k' : k}
             if query:
-                response = get_llama_answers(vectorstore, query, k)
-                st.write(response)
+                response = requests.post(url='http://127.0.0.1:8000/qa/llama', data = json.dumps(payload))
+                st.write(response.text)
 
 if __name__ == '__main__':
     main()
diff --git a/llama_chat.py b/llama_chat.py
index b239f9b..493c308 100644
--- a/llama_chat.py
+++ b/llama_chat.py
@@ -18,6 +18,9 @@
                n_batch = 128,
                n_ctx = 1024)
 
+embeddings = LlamaCppEmbeddings(model_path = './models/llama-2-7b-chat.Q2_K.gguf',
+                                    n_gpu_layers = -1, verbose = False)
+
 # Sample Template
 TEMPLATE = """Use the following pieces of context to answer the question at the end.
 If you don't know the answer, just say that you don't know, don't try to make up an answer.
@@ -34,8 +37,6 @@ def get_llama_embeddings(chunks, store_name):
                         case of existing embeddings or create and save to
     Return: An instance of FAISS Vectorstore
     """
-    embeddings = LlamaCppEmbeddings(model_path = './models/llama-2-7b-chat.Q2_K.gguf',
-                                    n_gpu_layers = -1, verbose = False)
     if os.path.exists(store_name):
         vectorstore = FAISS.load_local(store_name, embeddings, allow_dangerous_deserialization=True)
     else:
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..8be9c0e
--- /dev/null
+++ b/main.py
@@ -0,0 +1,29 @@
+from fastapi import FastAPI
+from pydantic import BaseModel
+from llama_chat import get_llama_embeddings, get_llama_answers
+from typing import List
+from openai_chat import get_openai_answers, get_openai_embeddings
+
+app = FastAPI()
+
+class QA(BaseModel):
+    chunks : List
+    store_name : str
+    query : str
+    k: int
+
+@app.post('/qa/openai')
+def openai_response(input: QA):
+    vectorstore = get_openai_embeddings(input.chunks, input.store_name)
+    if input.query:
+        response = get_openai_answers(vectorstore, input.query, input.k)
+        return response
+    
+@app.post('/qa/llama')
+def llama_response(input: QA):
+    vectorstore = get_llama_embeddings(input.chunks, input.store_name)
+    if input.query:
+        response = get_llama_answers(vectorstore, input.query, input.k)
+        return response
+
+    
\ No newline at end of file
diff --git a/utils.py b/utils.py
new file mode 100644
index 0000000..a2b766c
--- /dev/null
+++ b/utils.py
@@ -0,0 +1,34 @@
+"""
+Filename: utils.py
+Description: Implements functions and methods needed for reading text from files
+                and splitting into chunks, etc
+"""
+from PyPDF2 import PdfReader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+
+def read_pdf(pdf):
+    """
+    Parameters:
+        - pdf: path to the PDF file
+    Return: Returns the contents of the PDF file
+    """
+    pdf_reader = PdfReader(pdf)
+
+    content = ""
+    for page in pdf_reader.pages:
+        content += page.extract_text()
+    return content
+
+def split_into_chunks(content):
+    """
+    Parameters:
+        - content: the content read from the PDf file
+    Return: Returns the contents split into chunks
+    """
+    text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size = 1000,
+            chunk_overlap = 200,
+            length_function = len
+        )
+    chunks = text_splitter.split_text(text = content)
+    return chunks
\ No newline at end of file