Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added streamlit app in the examples folder and modified .gitignore #48

Merged
merged 4 commits into from
May 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -159,3 +159,7 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

# Streamlit
.DS_Store
uploaded_files/
6 changes: 3 additions & 3 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ helps, and credit will always be given.

## Get Started!

Ready to contribute? Here's how to set up `genai_stack` for local development.
Ready to contribute? Here's how to set up `beyondllm` for local development.

1. Fork the `beyondllm` repo on GitHub.
2. Clone your fork locally
Expand Down Expand Up @@ -71,8 +71,8 @@ and "help wanted" is open to whoever wants to implement it.

**Write Documentation**

GenAI Stack could always use more documentation, whether as part of the
official GenAI Stack docs, in docstrings, or even on the web in blog posts,
BeyondLLM could always use more documentation, whether as part of the
official BeyondLLM docs, in docstrings, or even on the web in blog posts,
articles, and such.

**Submit Feedback**
Expand Down
91 changes: 91 additions & 0 deletions examples/streamlit_app/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import os
import streamlit as st
from beyondllm import generator

from beyondllm.llms import GeminiModel
from beyondllm import generator
from ingest import get_retriever

st.title("Chat with CSV file")

st.text("Enter Google API Key")
google_api_key = st.text_input("Google API Key:", type="password")
os.environ['GOOGLE_API_KEY'] = google_api_key

vectordb_options = ['Chroma', 'Pinecone']
with st.sidebar:
st.title("VectorDB Options")
vectordb_type = st.selectbox("Select VectorDB Type",
vectordb_options,
index=0)

if vectordb_type == 'Pinecone':
# get the pinecone api key and index name
pinecone_api_key = st.text_input("Pinecone API Key:",
type="password")
pinecone_index_name = st.text_input("Pinecone Index Name:")

# choose whether to use an existing index or create a new one
st.subheader("Pinecone Options")
pinecone_option = st.radio("Choose Option",
('Existing', 'Create New'),
index=0)

# choose whether you want to use the default embedding dimension or specify your own
pinecone_embedding_dim = st.number_input("Embedding Dimension",
min_value=1,
max_value=2048,
value=768)

# choose whether you want to use the default metric or specify your own, choose between "cosine" and "euclidean"
pinecone_metric = st.selectbox("Metric",
["cosine", "euclidean"],
index=0)

# choose whether you want to use the default cloud or specify your own
pinecone_cloud = st.selectbox("Cloud",
["aws", "gcp", "azure"],
index=0)

# put the name of the region you want to use
pinecone_region = st.text_input("Region:")

if google_api_key:
st.success("Google API Key entered successfully!")
uploaded_file = st.file_uploader("Choose a CSV file", type=['csv'])
if uploaded_file is not None:
st.success("file uploaded successfully!")
question = st.text_input("Enter your question")

if uploaded_file is not None and question:
# Get the retriever
if vectordb_type == 'Pinecone':
retriever = get_retriever(uploaded_file,
google_api_key,
vector_db=vectordb_type.lower(),
pinecone_api_key=pinecone_api_key,
pinecone_index_name=pinecone_index_name,
pinecone_option=pinecone_option,
pinecone_embedding_dim=pinecone_embedding_dim,
pinecone_metric=pinecone_metric,
pinecone_cloud=pinecone_cloud,
pinecone_region=pinecone_region)
elif vectordb_type == 'Chroma':
retriever = get_retriever(uploaded_file,
google_api_key,
vector_db=vectordb_type.lower())
# Initialize the LLM
llm = GeminiModel(model_name="gemini-pro",
google_api_key = os.environ.get('GOOGLE_API_KEY'))
# Initialize the system prompt
system_prompt = "You are an AI assistant, who answers questions based on uploaded csv files. You can answer anything about the data."
# Initialize the generator
pipeline = generator.Generate(question=question,
retriever=retriever,
llm=llm,
system_prompt=system_prompt)
# Generate the response
response = pipeline.call()
# display the response
st.write(response)

49 changes: 49 additions & 0 deletions examples/streamlit_app/ingest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import os
from beyondllm.retrieve import auto_retriever
from beyondllm.vectordb import ChromaVectorDb, PineconeVectorDb
from beyondllm.embeddings import GeminiEmbeddings
from beyondllm import source

def get_retriever(uploaded_file, google_api_key, vector_db='chroma', pinecone_api_key=None, pinecone_index_name=None, pinecone_option=None, pinecone_embedding_dim=None, pinecone_metric=None, pinecone_cloud=None, pinecone_region=None):
if google_api_key:
# Save the uploaded file
save_path = "./uploaded_files" # change this to your desired path or leave it as is
if not os.path.exists(save_path):
os.makedirs(save_path)
file_path = os.path.join(save_path, uploaded_file.name)
with open(file_path, "wb") as f:
f.write(uploaded_file.getbuffer())

# Fit the data
data = source.fit(file_path, dtype="csv",
chunk_size=512,
chunk_overlap=50)
# Initialize your embedding model
embed_model = GeminiEmbeddings(api_key=google_api_key,
model_name="models/embedding-001")
# Initialize your vector store
if vector_db == 'chroma':
vector_store = ChromaVectorDb(collection_name='my_persistent_collection', # change this to your desired collection name
persist_directory='./db/chroma/')
elif vector_db == 'pinecone':
if pinecone_option == 'Existing':
# Initialize an existing Pinecone index
vector_store = PineconeVectorDb(api_key=pinecone_api_key,
index_name=pinecone_index_name)
else:
# Create a new serverless Pinecone index
vector_store = PineconeVectorDb(
create=True,
api_key=pinecone_api_key,
index_name=pinecone_index_name,
embedding_dim=pinecone_embedding_dim,
metric=pinecone_metric,
cloud=pinecone_cloud,
region=pinecone_region,
)
# Initialize the retriever
retriever = auto_retriever(data=data, embed_model=embed_model, type="normal", top_k=5, vectordb=vector_store)

return retriever

return None
Loading