diff --git a/community/5_mins_rag_no_gpu/.streamlit/config.toml b/community/5_mins_rag_no_gpu/.streamlit/config.toml deleted file mode 100644 index 6a6ca7d5..00000000 --- a/community/5_mins_rag_no_gpu/.streamlit/config.toml +++ /dev/null @@ -1,9 +0,0 @@ -[client] -showErrorDetails = false - -[theme] -primaryColor = "#76b900" -backgroundColor = "white" - -[browser] -gatherUsageStats = false \ No newline at end of file diff --git a/community/5_mins_rag_no_gpu/README.md b/community/5_mins_rag_no_gpu/README.md index ded05559..86a9fa4b 100644 --- a/community/5_mins_rag_no_gpu/README.md +++ b/community/5_mins_rag_no_gpu/README.md @@ -1,13 +1,17 @@ -# Tutorial for a Generic RAG-Based Chatbot +# RAG in 5 Minutes -This is a tutorial for how to build your own generic RAG chatbot. It is intended as a foundation for building more complex, domain-specific RAG bots. Note that no GPU is needed to run this as it is using NIMs from the NVIDIA catalog. +This implementation is tied to the [YouTube video on NVIDIA Developer](https://youtu.be/N_OOfkEWcOk). -## Acknowledgements +This is a simple standalone implementation showing a minimal RAG pipeline that uses models available from [NVIDIA API Catalog](https://catalog.ngc.nvidia.com/ai-foundation-models). +The catalog enables you to experience state-of-the-art LLMs accelerated by NVIDIA. +Developers get free credits for 10K requests to any of the models. - - This implementation is based on [Rag in 5 Minutes](https://github.com/NVIDIA/GenerativeAIExamples/tree/4e86d75c813bcc41d4e92e430019053920d08c94/community/5_mins_rag_no_gpu), with changes primarily made to the UI. - - Alyssa Sawyer also contributed to updating and further developing this repo during her intern project, [Resume RAG Bot](https://github.com/alysawyer/resume-rag-nv), at NVIDIA. +The example uses an [integration package to LangChain](https://python.langchain.com/docs/integrations/providers/nvidia) to access the models. +NVIDIA engineers develop, test, and maintain the open source integration. +This example uses a simple [Streamlit](https://streamlit.io/) based user interface and has a one-file implementation. +Because the example uses the models from the NVIDIA API Catalog, you do not need a GPU to run the example. -## Steps +### Steps 1. Create a python virtual environment and activate it: @@ -16,10 +20,10 @@ This is a tutorial for how to build your own generic RAG chatbot. It is intended source genai/bin/activate ``` -1. From the root of this repository, install the requirements: +1. From the root of this repository, `GenerativeAIExamples`, install the requirements: ```console - pip install -r requirements.txt + pip install -r community/5_mins_rag_no_gpu/requirements.txt ``` 1. Add your NVIDIA API key as an environment variable: @@ -28,15 +32,17 @@ This is a tutorial for how to build your own generic RAG chatbot. It is intended export NVIDIA_API_KEY="nvapi-*" ``` - If you don't already have an API key, visit the [NVIDIA API Catalog](https://build.ngc.nvidia.com/explore/), select on any model, then click on `Get API Key`. + If you don't already have an API key, visit the [NVIDIA API Catalog](https://build.ngc.nvidia.com/explore/), select on any model, then click on `Get API Key`. 1. Run the example using Streamlit: ```console - streamlit run main.py + streamlit run community/5_mins_rag_no_gpu/main.py ``` 1. Test the deployed example by going to `http://:8501` in a web browser. - Click **Browse Files** and select the documents for your knowledge base. - After selecting, click **Upload!** to complete the ingestion process. \ No newline at end of file + Click **Browse Files** and select your knowledge source. + After selecting, click **Upload!** to complete the ingestion process. + +You are all set now! Try out queries related to the knowledge base using text from the user interface. diff --git a/community/5_mins_rag_no_gpu/main.py b/community/5_mins_rag_no_gpu/main.py index 35c19d20..04a0fdba 100644 --- a/community/5_mins_rag_no_gpu/main.py +++ b/community/5_mins_rag_no_gpu/main.py @@ -13,120 +13,65 @@ # See the License for the specific language governing permissions and # limitations under the License. -# This is a simple standalone implementation showing rag pipeline using Nvidia AI Foundational Models. +# This is a simple standalone implementation showing rag pipeline using Nvidia AI Foundational models. # It uses a simple Streamlit UI and one file implementation of a minimalistic RAG pipeline. - -############################################ -# Component #0.5 - UI / Header -############################################ - import streamlit as st import os +from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings +from langchain.text_splitter import CharacterTextSplitter +from langchain_community.document_loaders import DirectoryLoader +from langchain_community.vectorstores import FAISS +import pickle +from langchain_core.output_parsers import StrOutputParser +from langchain_core.prompts import ChatPromptTemplate -# Page settings -st.set_page_config( - layout="wide", - page_title="RAG Chatbot", - page_icon = "🤖", - initial_sidebar_state="expanded") - -# Page title -st.header('Generic RAG Chatbot Demo 🤖📝', divider='rainbow') - -# Custom CSS -def local_css(file_name): - with open(file_name, "r") as f: - st.markdown(f"", unsafe_allow_html=True) -local_css("style.css") - -# Page description -st.markdown('''Manually looking through vast amounts of data can be tedious and time-consuming. This chatbot can expedite that process by providing a platform to query your documents.''') -st.warning("This is a proof of concept, and any output from the AI agent should be used in conjunction with the original data.", icon="⚠️") - -############################################ -# Component #1 - Document Loader -############################################ +st.set_page_config(layout="wide") +# Component #1 - Document Upload with st.sidebar: - st.subheader("Upload Your Documents") - DOCS_DIR = os.path.abspath("./uploaded_docs") - - # Make dir to store uploaded documents if not os.path.exists(DOCS_DIR): os.makedirs(DOCS_DIR) - - # Define form on Streamlit page for uploading files to KB st.subheader("Add to the Knowledge Base") with st.form("my-form", clear_on_submit=True): uploaded_files = st.file_uploader("Upload a file to the Knowledge Base:", accept_multiple_files=True) submitted = st.form_submit_button("Upload!") - # Acknowledge successful file uploads if uploaded_files and submitted: for uploaded_file in uploaded_files: st.success(f"File {uploaded_file.name} uploaded successfully!") with open(os.path.join(DOCS_DIR, uploaded_file.name), "wb") as f: f.write(uploaded_file.read()) -############################################ -# Component #2 - Initalizing Embedding Model and LLM -############################################ +# Component #2 - Embedding Model and LLM +llm = ChatNVIDIA(model="meta/llama3-70b-instruct") +document_embedder = NVIDIAEmbeddings(model="nvidia/nv-embedqa-e5-v5", model_type="passage") -from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings - -#Make sure to export your NGC NV-Developer API key as NVIDIA_API_KEY! -API_KEY = os.environ['NVIDIA_API_KEY'] - -# Select embedding model and LLM -document_embedder = NVIDIAEmbeddings(model="NV-Embed-QA", api_key=API_KEY, model_type="passage", truncate="END") -llm = ChatNVIDIA(model="meta/llama3-70b-instruct", api_key=API_KEY, temperature=0) - -############################################ # Component #3 - Vector Database Store -############################################ - -import pickle -from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain_community.document_loaders import DirectoryLoader -from langchain_community.vectorstores import FAISS -from langchain_core.output_parsers import StrOutputParser -from langchain_core.prompts import ChatPromptTemplate -from langchain_core.retrievers import BaseRetriever - -# Option for using an existing vector store with st.sidebar: use_existing_vector_store = st.radio("Use existing vector store if available", ["Yes", "No"], horizontal=True) -# Load raw documents from the directory -DOCS_DIR = os.path.abspath("./uploaded_docs") +vector_store_path = "vectorstore.pkl" raw_documents = DirectoryLoader(DOCS_DIR).load() -# Check for existing vector store file -vector_store_path = "vectorstore.pkl" vector_store_exists = os.path.exists(vector_store_path) vectorstore = None - if use_existing_vector_store == "Yes" and vector_store_exists: - # Load existing vector store with open(vector_store_path, "rb") as f: vectorstore = pickle.load(f) with st.sidebar: - st.info("Existing vector store loaded successfully.") + st.success("Existing vector store loaded successfully.") else: with st.sidebar: if raw_documents and use_existing_vector_store == "Yes": - # Chunk documents with st.spinner("Splitting documents into chunks..."): - text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=100) + text_splitter = CharacterTextSplitter(chunk_size=512, chunk_overlap=200) documents = text_splitter.split_documents(raw_documents) - # Convert document chunks to embeddings, and save in a vector store with st.spinner("Adding document chunks to vector database..."): vectorstore = FAISS.from_documents(documents, document_embedder) - # Save vector store with st.spinner("Saving vector store"): with open(vector_store_path, "wb") as f: pickle.dump(vectorstore, f) @@ -134,13 +79,9 @@ def local_css(file_name): else: st.warning("No documents available to process!", icon="⚠️") -############################################ # Component #4 - LLM Response Generation and Chat -############################################ - -st.subheader("Query your data") +st.subheader("Chat with your AI Assistant, Envie!") -# Save chat history for this user session if "messages" not in st.session_state: st.session_state.messages = [] @@ -148,41 +89,34 @@ def local_css(file_name): with st.chat_message(message["role"]): st.markdown(message["content"]) -# Define prompt for LLM prompt_template = ChatPromptTemplate.from_messages([ - ("system", "You are a helpful AI assistant. Use the provided context to inform your responses. If no context is available, please state that."), + ("system", "You are a helpful AI assistant named Envie. If provided with context, use it to inform your responses. If no context is available, use your general knowledge to provide a helpful response."), ("human", "{input}") ]) -# Define simple prompt chain chain = prompt_template | llm | StrOutputParser() -# Display an example query for user -user_query = st.chat_input("Please summarize these documents.") +user_input = st.chat_input("Can you tell me what NVIDIA is known for?") -if user_query: - st.session_state.messages.append({"role": "user", "content": user_query}) +if user_input: + st.session_state.messages.append({"role": "user", "content": user_input}) with st.chat_message("user"): - st.markdown(user_query) + st.markdown(user_input) with st.chat_message("assistant"): message_placeholder = st.empty() full_response = "" if vectorstore is not None and use_existing_vector_store == "Yes": - # Retrieve relevant chunks for the given user query from the vector store retriever = vectorstore.as_retriever() - retrieved_docs = retriever.invoke(user_query) - - # Concatenate retrieved chunks together as context for LLM - context = "\n\n".join([doc.page_content for doc in retrieved_docs]) - augmented_user_input = f"Context: {context}\n\nQuestion: {user_query}\n" + docs = retriever.invoke(user_input) + context = "\n\n".join([doc.page_content for doc in docs]) + augmented_user_input = f"Context: {context}\n\nQuestion: {user_input}\n" else: - augmented_user_input = f"Question: {user_query}\n" + augmented_user_input = f"Question: {user_input}\n" - # Get output from LLM for response in chain.stream({"input": augmented_user_input}): full_response += response message_placeholder.markdown(full_response + "▌") message_placeholder.markdown(full_response) - st.session_state.messages.append({"role": "assistant", "content": full_response}) \ No newline at end of file + st.session_state.messages.append({"role": "assistant", "content": full_response}) diff --git a/community/5_mins_rag_no_gpu/requirements.txt b/community/5_mins_rag_no_gpu/requirements.txt index 5c3277b2..72047cae 100644 --- a/community/5_mins_rag_no_gpu/requirements.txt +++ b/community/5_mins_rag_no_gpu/requirements.txt @@ -1,13 +1,5 @@ -streamlit +streamlit==1.30.0 faiss-cpu==1.7.4 +langchain==0.1.20 unstructured[all-docs]==0.11.2 -langchain -langchain-community -langchain-core langchain-nvidia-ai-endpoints -langchain-text-splitters -nltk==3.8.1 -numpy==1.23.5 -onnx==1.16.1 -onnxruntime==1.15.1 -python-magic \ No newline at end of file diff --git a/community/5_mins_rag_no_gpu/style.css b/community/5_mins_rag_no_gpu/style.css deleted file mode 100644 index 9e04b381..00000000 --- a/community/5_mins_rag_no_gpu/style.css +++ /dev/null @@ -1,73 +0,0 @@ -/* style.css */ - -/* custom footer */ -.footer { - text-align: center; - color: #666; - font-size: 14px; -} - -/* NVIDIA green for headers */ -h1, h2, h3, h4, h5 { - color: #76b900; -} - - -/* add line when hovering over link */ -.hover-link { - text-decoration: none; - color: inherit; - position: relative; - } - -.hover-link::after { - content: ''; - position: absolute; - width: 100%; - height: 1px; - bottom: 0; - left: 0; - background-color: #000; - transform: scaleX(0); - transition: transform 0.3s ease-in-out; -} - -.hover-link:hover::after { - transform: scaleX(1); -} - -/* Remove default formatting for links */ -a { - color: #666; - text-decoration: none; -} - -/* Remove streamlit bar */ -header { - visibility: hidden; -} - -/* custom container */ - -.custom-image-container img { - border-radius: 10px; -} - -.custom-column-container { - background-color: #f0f0f0; - border-radius: 10px; - padding: 20px; -} - -.custom-column-container .stMarkdown { - padding-right: 20px; -} - -.streamlit-expanderHeader { - background-color: white; - color: #76b900; -} -.streamlit-expanderContent { - background-color: white; - color: black; -} \ No newline at end of file