Skip to content

Commit

Permalink
Add docker compose. Modified gitignore. Fixed md formatting in README…
Browse files Browse the repository at this point in the history
… for local llm
  • Loading branch information
sanjay920 committed Jan 13, 2024
1 parent c3c80c1 commit ee8266e
Show file tree
Hide file tree
Showing 3 changed files with 209 additions and 7 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -218,4 +218,5 @@ volumes/

# Rubra
.rubra_serve_env/
llama-cpp-python/
llama-cpp-python/
llm-config.yaml
15 changes: 9 additions & 6 deletions deploy_local_llm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,16 @@ sh serve.sh
You can serve the model using Docker. If you'd like to run just the model you can:

1. Download the quantized LLM:
```sh
wget https://huggingface.co/TheBloke/OpenHermes-2.5-neural-chat-v3-3-Slerp-GGUF/resolve/main/openhermes-2.5-neural-chat-v3-3-slerp.Q6_K.gguf
```

```sh
wget https://huggingface.co/TheBloke/OpenHermes-2.5-neural-chat-v3-3-Slerp-GGUF/resolve/main/openhermes-2.5-neural-chat-v3-3-slerp.Q6_K.gguf
```

2. Run the docker container using docker-compose:
```sh
docker-compose up
```

```sh
docker-compose up
```

You can set the `api_base` of your custom model in [llm-config.yaml](../llm-config.yaml#L10) to `"http://llama_cpp_python:1234/v1"`

Expand Down
198 changes: 198 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
version: '3.8'

services:
etcd:
container_name: milvus-etcd
image: quay.io/coreos/etcd:v3.5.5
environment:
- ETCD_AUTO_COMPACTION_MODE=revision
- ETCD_AUTO_COMPACTION_RETENTION=1000
- ETCD_QUOTA_BACKEND_BYTES=4294967296
- ETCD_SNAPSHOT_COUNT=50000
volumes:
- ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/etcd:/etcd
command: etcd -advertise-client-urls=http://127.0.0.1:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd
healthcheck:
test: ["CMD", "etcdctl", "endpoint", "health"]
interval: 30s
timeout: 20s
retries: 3
networks:
- rubra

minio:
container_name: milvus-minio
image: minio/minio:RELEASE.2023-03-20T20-16-18Z
environment:
MINIO_ACCESS_KEY: minioadmin
MINIO_SECRET_KEY: minioadmin
ports:
- "9001:9001"
- "9000:9000"
volumes:
- ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/minio:/minio_data
command: minio server /minio_data --console-address ":9001"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
interval: 30s
timeout: 20s
retries: 3
networks:
- rubra

standalone:
container_name: milvus
image: milvusdb/milvus:v2.3.3
command: ["milvus", "run", "standalone"]
security_opt:
- seccomp:unconfined
environment:
ETCD_ENDPOINTS: etcd:2379
MINIO_ADDRESS: minio:9000
volumes:
- ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/milvus:/var/lib/milvus
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"]
interval: 30s
start_period: 90s
timeout: 20s
retries: 3
ports:
- "19530:19530"
- "9091:9091"
depends_on:
- "etcd"
- "minio"
networks:
- rubra
logging:
driver: json-file
options:
max-size: "10m"
max-file: "3"

mongodb:
image: mongo
container_name: mongodb
ports:
- "27017:27017"
volumes:
- ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/mongodb:/data/db
networks:
- rubra

redis:
image: redis
container_name: my-redis
command: redis-server --appendonly yes
ports:
- "6379:6379"
networks:
- rubra

litellm:
image: ghcr.io/berriai/litellm:main-v1.15.3
ports:
- "8002:8002"
volumes:
- ./llm-config.yaml:/app/config.yaml
command: ["--config", "/app/config.yaml", "--port", "8002", "--num_workers", "8"]
networks:
- rubra

embedding_model:
build:
context: ./backend/app/embedding_model
dockerfile: Dockerfile
container_name: embedding_model
image: ghcr.io/rubra-ai/embedding_model:latest
ports:
- "8020:8020"
environment:
- MODEL_NAME=sentence-transformers/all-MiniLM-L6-v2
networks:
- rubra

vector_db:
build:
context: ./backend/app/vector_db
dockerfile: Dockerfile
container_name: vector_db
image: ghcr.io/rubra-ai/vector_db:latest
ports:
- "8010:8010"
environment:
- MILVUS_HOST=milvus
- EMBEDDING_HOST=embedding_model
networks:
- rubra


rubra_task_executor:
build:
context: ./backend
dockerfile: Dockerfile_task_executor
image: ghcr.io/rubra-ai/rubra_task_executor:latest
container_name: rubra_task_executor
environment:
- REDIS_HOST=redis
- MONGODB_HOST=mongodb
- LITELLM_HOST=litellm
- EMBEDDING_HOST=embedding_model
- VECTOR_DB_HOST=vector_db
- MILVUS_HOST=milvus
depends_on:
- redis
- mongodb
networks:
- rubra


rubra_backend:
build:
context: ./backend
dockerfile: Dockerfile_backend
image: ghcr.io/rubra-ai/rubra_backend:latest
container_name: rubra_backend
environment:
- REDIS_HOST=redis
- MONGODB_HOST=mongodb
- LITELLM_HOST=litellm
ports:
- "8000:8000"
depends_on:
- rubra_task_executor
networks:
- rubra

rubra_frontend:
build:
context: ./frontend
dockerfile: Dockerfile
image: ghcr.io/rubra-ai/rubra_frontend:latest
container_name: rubra_frontend
environment:
- RUBRA_BACKEND_HOST=rubra_backend
- LITELLM_HOST=litellm
ports:
- "8501:8501"
networks:
- rubra

# Preferably use GPU acceleration when available. If not, uncomment to run on CPU
# llama_cpp:
# image: ghcr.io/rubra-ai/llama-cpp-python-rubra-local:latest
# container_name: llama_cpp_python
# environment:
# - GRAMMAR_FILE=grammar/json_grammar.gbnf # apply grammar rule file
# ports:
# - "1234:1234"
# volumes:
# - ./deploy_local_llm/model/openhermes-2.5-neural-chat-v3-3-slerp.Q6_K.gguf:/model/openhermes-2.5-neural-chat-v3-3-slerp.Q6_K.gguf # change this to the path of your model if you want to use a different model
# command: --server --model /model/openhermes-2.5-neural-chat-v3-3-slerp.Q6_K.gguf --port 1234 --chat_format chatml --host 0.0.0.0 --n_ctx 16000 # change the name of the model file accordingly
# networks:
# - rubra

networks:
rubra:
name: rubra

0 comments on commit ee8266e

Please sign in to comment.