Fix setup

skypilot-org · Dec 12, 2023 · 786e9ba · 786e9ba
1 parent 3dda235
commit 786e9ba
Showing 1 changed file with 20 additions and 9 deletions.
diff --git a/llm/mixtral/serve.yaml b/llm/mixtral/serve.yaml
@@ -1,15 +1,26 @@
 resources: 
   accelerators: {A100:4, A100:8, A100-80GB:2, A100-80GB:4, A100-80GB:8}
   ports: 8000
+  disk_tier: high
 
-service:
-  readiness_probe:
-    path: /v1/models
-    initial_delay_seconds: 1200
-  replicas: 2
+setup: |
+  conda activate mixtral
+  if [ $? -ne 0 ]; then
+    conda create -n mixtral -y python=3.10
+    conda activate mixtral
+  fi
+  # We have to manually install Torch otherwise apex & xformers won't build
+  pip list | grep torch || pip install "torch>=2.0.0"
+
+  pip list | grep vllm || pip install "git+https://github.com/vllm-project/vllm.git"
+  pip install git+https://github.com/huggingface/transformers
+  pip list | grep megablocks || pip install megablocks
 
 run: |
-  docker run --gpus all -p 8000:8000 ghcr.io/mistralai/mistral-src/vllm:latest \
-                   --host 0.0.0.0 \
-                   --model mistralai/Mixtral-8x7B-Instruct-v0.1 \
-                   --tensor-parallel-size $SKYPILOT_NUM_GPUS
+  conda activate mixtral
+  export PATH=$PATH:/sbin
+  python -u -m vllm.entrypoints.openai.api_server \
+                --host 0.0.0.0 \
+                --model mistralai/Mixtral-8x7B-Instruct-v0.1 \
+                --tensor-parallel-size 2 | tee ~/openai_api_server.log
+