Skip to content

Commit

Permalink
Merge pull request #284 from truefoundry/cj_rm_sleeps
Browse files Browse the repository at this point in the history
Remove artificial sleeps, add gpt-4o-mini and small embeddings + misc
  • Loading branch information
S1LV3RJ1NX authored Jul 23, 2024
2 parents ce18715 + 68a39c0 commit 898af28
Show file tree
Hide file tree
Showing 11 changed files with 32 additions and 35 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ qdrant_storage/
.truefoundry
infinity/
volumes/
user_data/
pgdata/
*.bak
models_config.yaml
2 changes: 1 addition & 1 deletion backend/modules/metadata_store/prismastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,7 @@ async def adelete_data_source(self, data_source_fqn: str):
data_source_uri = data_source.uri
# data_source_uri is of the form: `/app/users_data/folder_name`
folder_name = data_source_uri.split("/")[-1]
folder_path = os.path.join("/app/user_data", folder_name)
folder_path = os.path.join(settings.LOCAL_DATA_DIRECTORY, folder_name)
logger.info(
f"Deleting folder: {folder_path}, path exists: {os.path.exists(folder_path)}"
)
Expand Down
2 changes: 0 additions & 2 deletions backend/modules/query_controllers/example/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,11 +188,9 @@ async def _stream_answer(self, rag_chain, query):
yield json.dumps(
{"docs": self._format_docs_for_stream(chunk["context"])}
)
await asyncio.sleep(0.1)
elif "answer" in chunk:
# print("Answer: ", chunk['answer'])
yield json.dumps({"answer": chunk["answer"]})
await asyncio.sleep(0.1)

yield json.dumps({"end": "<END>"})
except asyncio.TimeoutError:
Expand Down
8 changes: 0 additions & 8 deletions backend/modules/query_controllers/multimodal/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,17 +187,14 @@ async def _stream_answer(self, rag_chain, query):
if "question " in chunk:
# print("Question: ", chunk['question'])
yield json.dumps({"question": chunk["question"]})
await asyncio.sleep(0.1)
elif "context" in chunk:
# print("Context: ", self._format_docs_for_stream(chunk['context']))
yield json.dumps(
{"docs": self._format_docs_for_stream(chunk["context"])}
)
await asyncio.sleep(0.1)
elif "answer" in chunk:
# print("Answer: ", chunk['answer'])
yield json.dumps({"answer": chunk["answer"]})
await asyncio.sleep(0.1)

yield json.dumps({"end": "<END>"})
except asyncio.TimeoutError:
Expand All @@ -211,15 +208,10 @@ async def _stream_vlm_answer(self, llm, message_payload, docs):
"docs": self._format_docs_for_stream(docs),
}
)
await asyncio.sleep(0.1)

async for chunk in llm.astream(message_payload):
yield json.dumps({"answer": chunk.content})
await asyncio.sleep(0.1)

await asyncio.sleep(0.1)
yield json.dumps({"end": "<END>"})
await asyncio.sleep(0.1)
except asyncio.TimeoutError:
raise HTTPException(status_code=504, detail="Stream timed out")

Expand Down
4 changes: 2 additions & 2 deletions backend/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@ uvicorn==0.23.2
fastapi==0.109.1
qdrant-client==1.9.0
python-dotenv==1.0.1
pydantic==1.10.13
pydantic==1.10.17
orjson==3.9.15
PyMuPDF==1.23.6
redis==5.0.1
beautifulsoup4==4.12.2
truefoundry[ml]==0.2.4
truefoundry[ml]==0.3.0rc7
markdownify==0.11.6
gunicorn==22.0.0
markdown-crawler==0.0.8
Expand Down
4 changes: 2 additions & 2 deletions backend/server/routers/internal.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ async def upload_to_docker_directory(
),
files: List[UploadFile] = File(...),
):
"""This function uploads files within `/app/user_data/` given by the name req.upload_name"""
"""This function uploads files within `settings.LOCAL_DATA_DIRECTORY` given by the name req.upload_name"""
if not settings.LOCAL:
return JSONResponse(
content={"error": "API only supported for local docker environment"},
Expand All @@ -34,7 +34,7 @@ async def upload_to_docker_directory(
logger.info(f"Uploading files to docker directory: {upload_name}")
# create a folder within `/volumes/user_data/` that maps to `/app/user_data/` in the docker volume
# this folder will be used to store the uploaded files
folder_path = os.path.join("/app/user_data/", upload_name)
folder_path = os.path.join(settings.LOCAL_DATA_DIRECTORY, upload_name)

# Create the folder if it does not exist, else raise an exception
if not os.path.exists(folder_path):
Expand Down
9 changes: 4 additions & 5 deletions backend/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,19 @@ class Config:
MODELS_CONFIG_PATH: str
METADATA_STORE_CONFIG: MetadataStoreConfig
VECTOR_DB_CONFIG: VectorDBConfig

LOCAL: bool = False

TFY_HOST: str = ""
TFY_API_KEY: str = ""
JOB_FQN: str = ""
JOB_COMPONENT_NAME: str = ""

LOG_LEVEL: str = "info"
TFY_SERVICE_ROOT_PATH: str = ""

UNSTRUCTURED_IO_URL: str = ""

UNSTRUCTURED_IO_API_KEY: str = ""
# default is ../user_data
LOCAL_DATA_DIRECTORY: str = os.path.abspath(
os.path.join(os.path.dirname(os.path.dirname(__file__)), "user_data")
)

@root_validator(pre=True)
def _validate_values(cls, values):
Expand Down
6 changes: 3 additions & 3 deletions backend/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,9 @@ def to_dict(self) -> Dict[str, Any]:
class ModelProviderConfig(BaseModel):
provider_name: str
api_format: str
llm_model_ids: List[str]
embedding_model_ids: List[str]
reranking_model_ids: List[str]
llm_model_ids: List[str] = Field(default_factory=list)
embedding_model_ids: List[str] = Field(default_factory=list)
reranking_model_ids: List[str] = Field(default_factory=list)
api_key_env_var: str
base_url: Optional[str] = None
default_headers: Dict[str, str] = Field(default_factory=dict)
Expand Down
17 changes: 10 additions & 7 deletions compose.env
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,14 @@ POSTGRES_PORT=5432
POSTGRES_USER=postgres
POSTGRES_PASSWORD=test

## OLLAMA VARS
OLLAMA_MODEL=qwen2:1.5b

## INFINITY VARS
INFINITY_EMBEDDING_MODEL=mixedbread-ai/mxbai-embed-large-v1
INFINITY_RERANKING_MODEL=mixedbread-ai/mxbai-rerank-xsmall-v1

## COGNITA_BACKEND VARS
### Note: If you are changing `COGNITA_BACKEND_PORT`, please make sure to update `VITE_QA_FOUNDRY_URL` to match it. Frontend talks to backend via the host network
### `MODEL_PROVIDERS_CONFIG_PATH` is relative to cognita root dir
MODELS_CONFIG_PATH="./models_config.yaml"
METADATA_STORE_CONFIG='{"provider":"prisma"}'
VECTOR_DB_CONFIG='{"provider":"qdrant","url":"http://qdrant-server:6333", "config": {"grpc_port": 6334, "prefer_grpc": false}}'
COGNITA_BACKEND_PORT=8000

UNSTRUCTURED_IO_URL=http://unstructured-io-parsers:9500/

## COGNITA_FRONTEND VARS
Expand All @@ -32,6 +26,15 @@ VITE_DOCS_QA_MAX_UPLOAD_SIZE_MB=200
## OpenAI
OPENAI_API_KEY=

## OLLAMA VARS
OLLAMA_MODEL=qwen2:1.5b

## INFINITY VARS
INFINITY_EMBEDDING_MODEL=mixedbread-ai/mxbai-embed-large-v1
INFINITY_RERANKING_MODEL=mixedbread-ai/mxbai-rerank-xsmall-v1
## INFINITY_API_KEY, only required if you enable API KEY auth on infinity container
INFINITY_API_KEY=

## TFY VARS
TFY_API_KEY=
TFY_HOST=
10 changes: 6 additions & 4 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -162,14 +162,16 @@ services:
- DEBUG_MODE=true
- LOCAL=${LOCAL}
- LOG_LEVEL=DEBUG
- DATABASE_URL=postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@cognita-db:5432/cognita-config
- METADATA_STORE_CONFIG=${METADATA_STORE_CONFIG}
- VECTOR_DB_CONFIG=${VECTOR_DB_CONFIG}
- OPENAI_API_KEY=${OPENAI_API_KEY}
- LOCAL_DATA_DIRECTORY=/app/user_data
- UNSTRUCTURED_IO_URL=${UNSTRUCTURED_IO_URL}
- MODELS_CONFIG_PATH=${MODELS_CONFIG_PATH}
- TFY_API_KEY=${TFY_API_KEY}
- TFY_HOST=${TFY_HOST}
- DATABASE_URL=postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@cognita-db:5432/cognita-config
- UNSTRUCTURED_IO_URL=${UNSTRUCTURED_IO_URL}
- TFY_API_KEY=${TFY_API_KEY}
- OPENAI_API_KEY=${OPENAI_API_KEY}
- INFINITY_API_KEY=${INFINITY_API_KEY}
entrypoint: /bin/bash
command: -c "set -e; prisma db push --schema ./backend/database/schema.prisma && uvicorn --host 0.0.0.0 --port 8000 backend.server.app:app --reload"
networks:
Expand Down
4 changes: 3 additions & 1 deletion models_config.truefoundry.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@ model_providers:
base_url: https://llm-gateway.truefoundry.com/api/inference/openai
api_key_env_var: TFY_API_KEY
llm_model_ids:
- "openai-main/gpt-4-turbo"
- "openai-main/gpt-4o-mini"
- "openai-main/gpt-3-5-turbo"
- "openai-main/gpt-4-turbo"
embedding_model_ids:
- "openai-main/text-embedding-3-small"
- "openai-main/text-embedding-ada-002"
reranking_model_ids: []
default_headers:
Expand Down

0 comments on commit 898af28

Please sign in to comment.