From 786e9ba0bbc3d5c0318c6afb9bd83898e713273c Mon Sep 17 00:00:00 2001
From: Zhanghao Wu <zhanghao.wu@outlook.com>
Date: Tue, 12 Dec 2023 01:52:40 +0000
Subject: [PATCH] Fix setup

---
 llm/mixtral/serve.yaml | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/llm/mixtral/serve.yaml b/llm/mixtral/serve.yaml
index 14ec5819035..eb8e453df26 100644
--- a/llm/mixtral/serve.yaml
+++ b/llm/mixtral/serve.yaml
@@ -1,15 +1,26 @@
 resources: 
   accelerators: {A100:4, A100:8, A100-80GB:2, A100-80GB:4, A100-80GB:8}
   ports: 8000
+  disk_tier: high
 
-service:
-  readiness_probe:
-    path: /v1/models
-    initial_delay_seconds: 1200
-  replicas: 2
+setup: |
+  conda activate mixtral
+  if [ $? -ne 0 ]; then
+    conda create -n mixtral -y python=3.10
+    conda activate mixtral
+  fi
+  # We have to manually install Torch otherwise apex & xformers won't build
+  pip list | grep torch || pip install "torch>=2.0.0"
+
+  pip list | grep vllm || pip install "git+https://github.com/vllm-project/vllm.git"
+  pip install git+https://github.com/huggingface/transformers
+  pip list | grep megablocks || pip install megablocks
 
 run: |
-  docker run --gpus all -p 8000:8000 ghcr.io/mistralai/mistral-src/vllm:latest \
-                   --host 0.0.0.0 \
-                   --model mistralai/Mixtral-8x7B-Instruct-v0.1 \
-                   --tensor-parallel-size $SKYPILOT_NUM_GPUS
+  conda activate mixtral
+  export PATH=$PATH:/sbin
+  python -u -m vllm.entrypoints.openai.api_server \
+                --host 0.0.0.0 \
+                --model mistralai/Mixtral-8x7B-Instruct-v0.1 \
+                --tensor-parallel-size 2 | tee ~/openai_api_server.log
+