diff --git a/llm/mixtral/serve.yaml b/llm/mixtral/serve.yaml index d2216c28ddd..a3304fd4f63 100644 --- a/llm/mixtral/serve.yaml +++ b/llm/mixtral/serve.yaml @@ -13,16 +13,19 @@ setup: | conda activate mixtral fi # We have to manually install Torch otherwise apex & xformers won't build - pip install "torch>=2.0.0" --index-url https://download.pytorch.org/whl/cu118 + pip list | grep torch || pip install "torch>=2.0.0" --index-url https://download.pytorch.org/whl/cu118 # This build is slow but NVIDIA does not provide binaries. Increase MAX_JOBS as needed. - git clone https://github.com/NVIDIA/apex || true && \ + pip list | grep apex || { pip install packaging ninja && \ + git clone https://github.com/NVIDIA/apex || true && \ cd apex && git checkout 2386a912164b0c5cfcd8be7a2b890fbac5607c82 && \ sed -i '/check_cuda_torch_binary_vs_bare_metal(CUDA_HOME)/d' setup.py && \ - python setup.py install --cpp_ext --cuda_ext + python setup.py install --cpp_ext --cuda_ext; } - pip install "git+https://github.com/vllm-project/vllm.git@b5f882cc98e2c9c6dde7357dbac2ec0c2c57d8cd" - pip install "xformers==0.0.22" "transformers==4.34.0" "fschat[model_worker]==0.2.30" + pip list | grep vllm || pip install "git+https://github.com/vllm-project/vllm.git@b5f882cc98e2c9c6dde7357dbac2ec0c2c57d8cd" + pip list | grep stanford-stk || pip install stanford-stk + pip list | grep megablocks || pip install megablocks + pip list | grep fschat || pip install "fschat[model_worker]==0.2.34" if [[ ! -z "${HF_TOKEN}" ]]; then echo "The HF_TOKEN environment variable set, logging to Hugging Face."