From 786e9ba0bbc3d5c0318c6afb9bd83898e713273c Mon Sep 17 00:00:00 2001 From: Zhanghao Wu Date: Tue, 12 Dec 2023 01:52:40 +0000 Subject: [PATCH] Fix setup --- llm/mixtral/serve.yaml | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/llm/mixtral/serve.yaml b/llm/mixtral/serve.yaml index 14ec5819035..eb8e453df26 100644 --- a/llm/mixtral/serve.yaml +++ b/llm/mixtral/serve.yaml @@ -1,15 +1,26 @@ resources: accelerators: {A100:4, A100:8, A100-80GB:2, A100-80GB:4, A100-80GB:8} ports: 8000 + disk_tier: high -service: - readiness_probe: - path: /v1/models - initial_delay_seconds: 1200 - replicas: 2 +setup: | + conda activate mixtral + if [ $? -ne 0 ]; then + conda create -n mixtral -y python=3.10 + conda activate mixtral + fi + # We have to manually install Torch otherwise apex & xformers won't build + pip list | grep torch || pip install "torch>=2.0.0" + + pip list | grep vllm || pip install "git+https://github.com/vllm-project/vllm.git" + pip install git+https://github.com/huggingface/transformers + pip list | grep megablocks || pip install megablocks run: | - docker run --gpus all -p 8000:8000 ghcr.io/mistralai/mistral-src/vllm:latest \ - --host 0.0.0.0 \ - --model mistralai/Mixtral-8x7B-Instruct-v0.1 \ - --tensor-parallel-size $SKYPILOT_NUM_GPUS + conda activate mixtral + export PATH=$PATH:/sbin + python -u -m vllm.entrypoints.openai.api_server \ + --host 0.0.0.0 \ + --model mistralai/Mixtral-8x7B-Instruct-v0.1 \ + --tensor-parallel-size 2 | tee ~/openai_api_server.log +