diff --git a/docker-compose.yaml b/docker-compose.yaml index 80f9459..8ef2c5a 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -1,6 +1,12 @@ services: vllm-cpu-env: - image: ghcr.io/socialgouv/docker/vllm-cpu-env:latest + image: vllm/vllm-openai:latest + environment: + HUGGING_FACE_HUB_TOKEN: "${HUGGING_FACE_HUB_TOKEN}" + command: + - "--model=mistralai/Mistral-7B-v0.1" + - "--tensor-parallel-size=4" + - "--dtype=half" # https://github.com/vllm-project/vllm/issues/1157 expose: - "8000" labels: @@ -9,6 +15,15 @@ services: - "traefik.http.routers.vllm-cpu-env.tls.certresolver=myresolver" - "traefik.http.middlewares.main-auth.basicauth.users=${CREDENTIALS}" - "traefik.http.routers.vllm-cpu-env.middlewares=main-auth@docker" + runtime: nvidia # Set the desired runtime for GPU support + ipc: host # Set the IPC namespace to host + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] reverse-proxy: image: traefik:v2.4 diff --git a/main.py b/main.py index b7043e3..b1fb50d 100644 --- a/main.py +++ b/main.py @@ -17,6 +17,9 @@ def getRequiredEnv(key): sys.exit(1) return value +def indentString(input_string, indent_level=4): + indent = ' ' * indent_level + return '\n'.join(indent + line for line in input_string.splitlines()) if len(sys.argv) != 2: logger.error("Usage: python script.py ") @@ -39,6 +42,11 @@ def getRequiredEnv(key): imageId = getRequiredEnv("OVH_INSTANCE_IMAGE_ID") region = getRequiredEnv("OVH_REGION") authToken = getRequiredEnv("AUTH_TOKEN") +huggingFaceHubToken = getRequiredEnv("HUGGING_FACE_HUB_TOKEN") + +f = open("docker-compose.yaml", "r") +dockerCompose = indentString(f.read(), 8) + userData = f""" #cloud-config @@ -55,9 +63,12 @@ def getRequiredEnv(key): set -Eeuo pipefail cd /home/ubuntu - curl -O https://raw.githubusercontent.com/SocialGouv/vllm-managed-instance/main/docker-compose.yaml + cat <<'EOF' > docker-compose.yaml +{dockerCompose} + EOF echo "HOST=$(curl -4 ifconfig.me)" >> .env echo "CREDENTIALS='$(htpasswd -nBb user {authToken})'" >> .env + echo "HUGGING_FACE_HUB_TOKEN='{huggingFaceHubToken}'" >> .env docker compose up -d --build touch /tmp/runcmd_finished @@ -65,7 +76,6 @@ def getRequiredEnv(key): - su - ubuntu -c '/home/ubuntu/init.sh > init.log 2>&1' """ - def findInstance(): instances = client.get(f"/cloud/project/{serviceName}/instance") if not instances: