-
Notifications
You must be signed in to change notification settings - Fork 4.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Bug]: Not able to create asynchronous chromadb. #14515
Comments
To resolve the issue of calling Here is an example of how you can modify your code to handle asynchronous embeddings properly: import os
import asyncio
from fastapi import FastAPI, File, UploadFile
from fastapi.responses import JSONResponse
import chromadb
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import VectorStoreIndex, StorageContext, SimpleDirectoryReader
from llama_index.core.retrievers import RecursiveRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
app = FastAPI()
# Global variable to track upload status
class GlobalVariable:
upload_in_progress = 0
retriever_chunk = None
query_engine_chunk = None
embed_model = None # Initialize with your embedding model
llm = None # Initialize with your LLM
text_qa_template = None # Initialize with your text QA template
global_variable = GlobalVariable()
@app.post("/upload")
async def upload_file(file: UploadFile = File(...)):
try:
# Ensure 'docs' directory exists
if not os.path.exists("docs"):
os.makedirs("docs")
# Write the file to server with its original filename
file_path = os.path.join("docs", file.filename)
with open(file_path, "wb") as f:
f.write(await file.read())
# Start the asynchronous task to define RAG
asyncio.create_task(define_rag())
return JSONResponse(content={"message": "File uploaded successfully"})
except Exception as e:
return JSONResponse(content={"error": str(e)}, status_code=500)
@app.post("/status")
async def status_checker():
return {"upload_in_progress": global_variable.upload_in_progress}
async def define_rag():
documents = SimpleDirectoryReader(input_dir="./docs", required_exts=[".docx", ".doc", ".pdf", ".txt"]).load_data()
if os.path.exists('./chroma_db'):
print("*************************utilizing pre generated embeddings from chromadb folder")
chroma_client = chromadb.PersistentClient(path="./chroma_db")
chroma_collection = chroma_client.get_or_create_collection("quickstart")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
vector_index_chunk = VectorStoreIndex.from_vector_store(vector_store, embed_model=global_variable.embed_model)
else:
chroma_client = chromadb.PersistentClient(path="./chroma_db")
chroma_collection = chroma_client.get_or_create_collection("quickstart")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
vector_index_chunk = await VectorStoreIndex.from_documents(documents, embed_model=global_variable.embed_model, storage_context=storage_context, use_async=True, show_progress=True)
vector_retriever_chunk = vector_index_chunk.as_retriever(similarity_top_k=5)
global_variable.retriever_chunk = RecursiveRetriever(
"vector",
retriever_dict={"vector": vector_retriever_chunk},
node_dict={node.node_id: node for node in documents},
verbose=True,
)
print("Vector store creation done")
global_variable.upload_in_progress = 1
global_variable.query_engine_chunk = RetrieverQueryEngine.from_args(global_variable.retriever_chunk, llm=global_variable.llm, text_qa_template=global_variable.text_qa_template) In this example, the |
Followed same code suggested by @dosu and executed still getting the same error. |
The error persists because Here's the corrected async def define_rag():
documents = SimpleDirectoryReader(input_dir="./docs", required_exts=[".docx", ".doc", ".pdf", ".txt"]).load_data()
if os.path.exists('./chroma_db'):
print("*************************utilizing pre generated embeddings from chromadb folder")
chroma_client = chromadb.PersistentClient(path="./chroma_db")
chroma_collection = chroma_client.get_or_create_collection("quickstart")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
vector_index_chunk = VectorStoreIndex.from_vector_store(vector_store, embed_model=global_variable.embed_model)
else:
chroma_client = chromadb.PersistentClient(path="./chroma_db")
chroma_collection = chroma_client.get_or_create_collection("quickstart")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
vector_index_chunk = await VectorStoreIndex.from_documents(documents, embed_model=global_variable.embed_model, storage_context=storage_context, use_async=True, show_progress=True)
vector_retriever_chunk = vector_index_chunk.as_retriever(similarity_top_k=5)
global_variable.retriever_chunk = RecursiveRetriever(
"vector",
retriever_dict={"vector": vector_retriever_chunk},
node_dict={node.node_id: node for node in documents},
verbose=True,
)
print("Vector store creation done")
global_variable.upload_in_progress = 1
global_variable.query_engine_chunk = RetrieverQueryEngine.from_args(global_variable.retriever_chunk, llm=global_variable.llm, text_qa_template=global_variable.text_qa_template) This modification ensures that the asynchronous tasks are awaited properly within the running event loop, avoiding the |
Bug Description
I like to create a status_checker api endpoint in fastapi to track the creation of chromadb embeddings. Also I like to create these emebeddings in async mode. Below mentioned the code, but it is giving error. Please do the needful.
Version
llama-index 0.10.12
Steps to Reproduce
import chromadb
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.core import SimpleDirectoryReader
@app.post("/upload")
async def upload_file(file: UploadFile = File(...)):
@app.post("/status")
async def status_checker():
return global_variable.upload_in_progress
async def define_rag():
documents = SimpleDirectoryReader(input_dir="./docs",required_exts = [".docx",".doc",".pdf",".txt"]).load_data()
if os.path.exists('./chroma_db'):
print("*************************utilizing pre generated embeddings from chromadb folder")
chroma_client = chromadb.PersistentClient(path="./chroma_db")
chroma_collection = chroma_client.get_or_create_collection("quickstart")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
vector_index_chunk = VectorStoreIndex.from_vector_store(vector_store, embed_model=global_variable.embed_model)
else:
chroma_client = chromadb.PersistentClient(path="./chroma_db")
chroma_collection = chroma_client.get_or_create_collection("quickstart")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
# index = VectorStoreIndex.from_documents(documents,storage_context=storage_context)
vector_index_chunk = await VectorStoreIndex(all_nodes, embed_model=global_variable.embed_model,
storage_context=storage_context)#, use_async=True, show_progress=True)
vector_retriever_chunk = vector_index_chunk.as_retriever(similarity_top_k=5)
global_variable.retriever_chunk = RecursiveRetriever(
"vector",
retriever_dict={"vector": vector_retriever_chunk},
node_dict=all_nodes_dict,
verbose=True,
)
print("Vector store creation done")
global_variable.upload_in_progress = 1
global_variable.query_engine_chunk = RetrieverQueryEngine.from_args(global_variable.retriever_chunk, llm=global_variable.llm, text_qa_template=global_variable.text_qa_template)
Relevant Logs/Tracbacks
The text was updated successfully, but these errors were encountered: