Skip to content

Commit

Permalink
Fix setup
Browse files Browse the repository at this point in the history
  • Loading branch information
Michaelvll committed Dec 12, 2023
1 parent 3dda235 commit 786e9ba
Showing 1 changed file with 20 additions and 9 deletions.
29 changes: 20 additions & 9 deletions llm/mixtral/serve.yaml
Original file line number Diff line number Diff line change
@@ -1,15 +1,26 @@
resources:
accelerators: {A100:4, A100:8, A100-80GB:2, A100-80GB:4, A100-80GB:8}
ports: 8000
disk_tier: high

service:
readiness_probe:
path: /v1/models
initial_delay_seconds: 1200
replicas: 2
setup: |
conda activate mixtral
if [ $? -ne 0 ]; then
conda create -n mixtral -y python=3.10
conda activate mixtral
fi
# We have to manually install Torch otherwise apex & xformers won't build
pip list | grep torch || pip install "torch>=2.0.0"
pip list | grep vllm || pip install "git+https://github.com/vllm-project/vllm.git"
pip install git+https://github.com/huggingface/transformers
pip list | grep megablocks || pip install megablocks
run: |
docker run --gpus all -p 8000:8000 ghcr.io/mistralai/mistral-src/vllm:latest \
--host 0.0.0.0 \
--model mistralai/Mixtral-8x7B-Instruct-v0.1 \
--tensor-parallel-size $SKYPILOT_NUM_GPUS
conda activate mixtral
export PATH=$PATH:/sbin
python -u -m vllm.entrypoints.openai.api_server \
--host 0.0.0.0 \
--model mistralai/Mixtral-8x7B-Instruct-v0.1 \
--tensor-parallel-size 2 | tee ~/openai_api_server.log

0 comments on commit 786e9ba

Please sign in to comment.