diff --git a/llm/mixtral/serve.yaml b/llm/mixtral/serve.yaml
index d2216c28ddd..a3304fd4f63 100644
--- a/llm/mixtral/serve.yaml
+++ b/llm/mixtral/serve.yaml
@@ -13,16 +13,19 @@ setup: |
     conda activate mixtral
   fi
   # We have to manually install Torch otherwise apex & xformers won't build
-  pip install "torch>=2.0.0" --index-url https://download.pytorch.org/whl/cu118
+  pip list | grep torch || pip install "torch>=2.0.0" --index-url https://download.pytorch.org/whl/cu118
 
   # This build is slow but NVIDIA does not provide binaries. Increase MAX_JOBS as needed.
-  git clone https://github.com/NVIDIA/apex || true && \
+  pip list | grep apex || { pip install packaging ninja && \
+      git clone https://github.com/NVIDIA/apex || true && \
       cd apex && git checkout 2386a912164b0c5cfcd8be7a2b890fbac5607c82 && \
       sed -i '/check_cuda_torch_binary_vs_bare_metal(CUDA_HOME)/d' setup.py && \
-      python setup.py install --cpp_ext --cuda_ext
+      python setup.py install --cpp_ext --cuda_ext; }
 
-  pip install "git+https://github.com/vllm-project/vllm.git@b5f882cc98e2c9c6dde7357dbac2ec0c2c57d8cd"
-  pip install "xformers==0.0.22" "transformers==4.34.0" "fschat[model_worker]==0.2.30"
+  pip list | grep vllm || pip install "git+https://github.com/vllm-project/vllm.git@b5f882cc98e2c9c6dde7357dbac2ec0c2c57d8cd"
+  pip list | grep stanford-stk || pip install stanford-stk
+  pip list | grep megablocks || pip install megablocks
+  pip list | grep fschat || pip install "fschat[model_worker]==0.2.34"
 
   if [[ ! -z "${HF_TOKEN}" ]]; then
       echo "The HF_TOKEN environment variable set, logging to Hugging Face."