diff --git a/.gitignore b/.gitignore index b69d9d56..ef2f37e7 100644 --- a/.gitignore +++ b/.gitignore @@ -26,6 +26,7 @@ qdrant_storage/ .truefoundry infinity/ volumes/ +user_data/ pgdata/ *.bak models_config.yaml diff --git a/backend/modules/metadata_store/prismastore.py b/backend/modules/metadata_store/prismastore.py index 70f05468..84b58511 100644 --- a/backend/modules/metadata_store/prismastore.py +++ b/backend/modules/metadata_store/prismastore.py @@ -379,7 +379,7 @@ async def adelete_data_source(self, data_source_fqn: str): data_source_uri = data_source.uri # data_source_uri is of the form: `/app/users_data/folder_name` folder_name = data_source_uri.split("/")[-1] - folder_path = os.path.join("/app/user_data", folder_name) + folder_path = os.path.join(settings.LOCAL_DATA_DIRECTORY, folder_name) logger.info( f"Deleting folder: {folder_path}, path exists: {os.path.exists(folder_path)}" ) diff --git a/backend/modules/query_controllers/example/controller.py b/backend/modules/query_controllers/example/controller.py index cbac7731..2c0c1ac6 100644 --- a/backend/modules/query_controllers/example/controller.py +++ b/backend/modules/query_controllers/example/controller.py @@ -188,11 +188,9 @@ async def _stream_answer(self, rag_chain, query): yield json.dumps( {"docs": self._format_docs_for_stream(chunk["context"])} ) - await asyncio.sleep(0.1) elif "answer" in chunk: # print("Answer: ", chunk['answer']) yield json.dumps({"answer": chunk["answer"]}) - await asyncio.sleep(0.1) yield json.dumps({"end": "<END>"}) except asyncio.TimeoutError: diff --git a/backend/modules/query_controllers/multimodal/controller.py b/backend/modules/query_controllers/multimodal/controller.py index bc9f35b2..85cac54c 100644 --- a/backend/modules/query_controllers/multimodal/controller.py +++ b/backend/modules/query_controllers/multimodal/controller.py @@ -187,17 +187,14 @@ async def _stream_answer(self, rag_chain, query): if "question " in chunk: # print("Question: ", chunk['question']) yield json.dumps({"question": chunk["question"]}) - await asyncio.sleep(0.1) elif "context" in chunk: # print("Context: ", self._format_docs_for_stream(chunk['context'])) yield json.dumps( {"docs": self._format_docs_for_stream(chunk["context"])} ) - await asyncio.sleep(0.1) elif "answer" in chunk: # print("Answer: ", chunk['answer']) yield json.dumps({"answer": chunk["answer"]}) - await asyncio.sleep(0.1) yield json.dumps({"end": "<END>"}) except asyncio.TimeoutError: @@ -211,15 +208,10 @@ async def _stream_vlm_answer(self, llm, message_payload, docs): "docs": self._format_docs_for_stream(docs), } ) - await asyncio.sleep(0.1) async for chunk in llm.astream(message_payload): yield json.dumps({"answer": chunk.content}) - await asyncio.sleep(0.1) - - await asyncio.sleep(0.1) yield json.dumps({"end": "<END>"}) - await asyncio.sleep(0.1) except asyncio.TimeoutError: raise HTTPException(status_code=504, detail="Stream timed out") diff --git a/backend/requirements.txt b/backend/requirements.txt index 38b63e5d..af95cf07 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -9,12 +9,12 @@ uvicorn==0.23.2 fastapi==0.109.1 qdrant-client==1.9.0 python-dotenv==1.0.1 -pydantic==1.10.13 +pydantic==1.10.17 orjson==3.9.15 PyMuPDF==1.23.6 redis==5.0.1 beautifulsoup4==4.12.2 -truefoundry[ml]==0.2.4 +truefoundry[ml]==0.3.0rc7 markdownify==0.11.6 gunicorn==22.0.0 markdown-crawler==0.0.8 diff --git a/backend/server/routers/internal.py b/backend/server/routers/internal.py index d22b9645..9d02052c 100644 --- a/backend/server/routers/internal.py +++ b/backend/server/routers/internal.py @@ -24,7 +24,7 @@ async def upload_to_docker_directory( ), files: List[UploadFile] = File(...), ): - """This function uploads files within `/app/user_data/` given by the name req.upload_name""" + """This function uploads files within `settings.LOCAL_DATA_DIRECTORY` given by the name req.upload_name""" if not settings.LOCAL: return JSONResponse( content={"error": "API only supported for local docker environment"}, @@ -34,7 +34,7 @@ async def upload_to_docker_directory( logger.info(f"Uploading files to docker directory: {upload_name}") # create a folder within `/volumes/user_data/` that maps to `/app/user_data/` in the docker volume # this folder will be used to store the uploaded files - folder_path = os.path.join("/app/user_data/", upload_name) + folder_path = os.path.join(settings.LOCAL_DATA_DIRECTORY, upload_name) # Create the folder if it does not exist, else raise an exception if not os.path.exists(folder_path): diff --git a/backend/settings.py b/backend/settings.py index 3f4ea253..37ac15b3 100644 --- a/backend/settings.py +++ b/backend/settings.py @@ -17,20 +17,19 @@ class Config: MODELS_CONFIG_PATH: str METADATA_STORE_CONFIG: MetadataStoreConfig VECTOR_DB_CONFIG: VectorDBConfig - LOCAL: bool = False - TFY_HOST: str = "" TFY_API_KEY: str = "" JOB_FQN: str = "" JOB_COMPONENT_NAME: str = "" - LOG_LEVEL: str = "info" TFY_SERVICE_ROOT_PATH: str = "" - UNSTRUCTURED_IO_URL: str = "" - UNSTRUCTURED_IO_API_KEY: str = "" + # default is ../user_data + LOCAL_DATA_DIRECTORY: str = os.path.abspath( + os.path.join(os.path.dirname(os.path.dirname(__file__)), "user_data") + ) @root_validator(pre=True) def _validate_values(cls, values): diff --git a/backend/types.py b/backend/types.py index 1f7a3e2a..1554a226 100644 --- a/backend/types.py +++ b/backend/types.py @@ -117,9 +117,9 @@ def to_dict(self) -> Dict[str, Any]: class ModelProviderConfig(BaseModel): provider_name: str api_format: str - llm_model_ids: List[str] - embedding_model_ids: List[str] - reranking_model_ids: List[str] + llm_model_ids: List[str] = Field(default_factory=list) + embedding_model_ids: List[str] = Field(default_factory=list) + reranking_model_ids: List[str] = Field(default_factory=list) api_key_env_var: str base_url: Optional[str] = None default_headers: Dict[str, str] = Field(default_factory=dict) diff --git a/compose.env b/compose.env index 09917554..ced9b99b 100644 --- a/compose.env +++ b/compose.env @@ -5,13 +5,6 @@ POSTGRES_PORT=5432 POSTGRES_USER=postgres POSTGRES_PASSWORD=test -## OLLAMA VARS -OLLAMA_MODEL=qwen2:1.5b - -## INFINITY VARS -INFINITY_EMBEDDING_MODEL=mixedbread-ai/mxbai-embed-large-v1 -INFINITY_RERANKING_MODEL=mixedbread-ai/mxbai-rerank-xsmall-v1 - ## COGNITA_BACKEND VARS ### Note: If you are changing `COGNITA_BACKEND_PORT`, please make sure to update `VITE_QA_FOUNDRY_URL` to match it. Frontend talks to backend via the host network ### `MODEL_PROVIDERS_CONFIG_PATH` is relative to cognita root dir @@ -19,6 +12,7 @@ MODELS_CONFIG_PATH="./models_config.yaml" METADATA_STORE_CONFIG='{"provider":"prisma"}' VECTOR_DB_CONFIG='{"provider":"qdrant","url":"http://qdrant-server:6333", "config": {"grpc_port": 6334, "prefer_grpc": false}}' COGNITA_BACKEND_PORT=8000 + UNSTRUCTURED_IO_URL=http://unstructured-io-parsers:9500/ ## COGNITA_FRONTEND VARS @@ -32,6 +26,15 @@ VITE_DOCS_QA_MAX_UPLOAD_SIZE_MB=200 ## OpenAI OPENAI_API_KEY= +## OLLAMA VARS +OLLAMA_MODEL=qwen2:1.5b + +## INFINITY VARS +INFINITY_EMBEDDING_MODEL=mixedbread-ai/mxbai-embed-large-v1 +INFINITY_RERANKING_MODEL=mixedbread-ai/mxbai-rerank-xsmall-v1 +## INFINITY_API_KEY, only required if you enable API KEY auth on infinity container +INFINITY_API_KEY= + ## TFY VARS TFY_API_KEY= TFY_HOST= diff --git a/docker-compose.yaml b/docker-compose.yaml index 7bcf9b51..5e72195c 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -162,14 +162,16 @@ services: - DEBUG_MODE=true - LOCAL=${LOCAL} - LOG_LEVEL=DEBUG + - DATABASE_URL=postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@cognita-db:5432/cognita-config - METADATA_STORE_CONFIG=${METADATA_STORE_CONFIG} - VECTOR_DB_CONFIG=${VECTOR_DB_CONFIG} - - OPENAI_API_KEY=${OPENAI_API_KEY} + - LOCAL_DATA_DIRECTORY=/app/user_data + - UNSTRUCTURED_IO_URL=${UNSTRUCTURED_IO_URL} - MODELS_CONFIG_PATH=${MODELS_CONFIG_PATH} - - TFY_API_KEY=${TFY_API_KEY} - TFY_HOST=${TFY_HOST} - - DATABASE_URL=postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@cognita-db:5432/cognita-config - - UNSTRUCTURED_IO_URL=${UNSTRUCTURED_IO_URL} + - TFY_API_KEY=${TFY_API_KEY} + - OPENAI_API_KEY=${OPENAI_API_KEY} + - INFINITY_API_KEY=${INFINITY_API_KEY} entrypoint: /bin/bash command: -c "set -e; prisma db push --schema ./backend/database/schema.prisma && uvicorn --host 0.0.0.0 --port 8000 backend.server.app:app --reload" networks: diff --git a/models_config.truefoundry.yaml b/models_config.truefoundry.yaml index 421dda07..1776e152 100644 --- a/models_config.truefoundry.yaml +++ b/models_config.truefoundry.yaml @@ -4,9 +4,11 @@ model_providers: base_url: https://llm-gateway.truefoundry.com/api/inference/openai api_key_env_var: TFY_API_KEY llm_model_ids: - - "openai-main/gpt-4-turbo" + - "openai-main/gpt-4o-mini" - "openai-main/gpt-3-5-turbo" + - "openai-main/gpt-4-turbo" embedding_model_ids: + - "openai-main/text-embedding-3-small" - "openai-main/text-embedding-ada-002" reranking_model_ids: [] default_headers: