From 075ab9db4e0ae8db4ea53c18a7fa729650b5b139 Mon Sep 17 00:00:00 2001 From: cenzhiyao <2523403608@qq.com> Date: Mon, 16 Dec 2024 15:27:47 +0800 Subject: [PATCH] update Memory usage && rename image && pinned version for torch torchvision --- docs/source/serving/deploying_with_docker.rst | 4 ++-- requirements-cuda-arm64.txt | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/source/serving/deploying_with_docker.rst b/docs/source/serving/deploying_with_docker.rst index 3118e19daf118..11a9f12fd17cd 100644 --- a/docs/source/serving/deploying_with_docker.rst +++ b/docs/source/serving/deploying_with_docker.rst @@ -51,11 +51,11 @@ of PyTorch Nightly and should be considered **experimental**. Using the flag `-- .. code-block:: console - # Example of building on Nvidia GH200 server. (Memory usage: ~180GB, Build time: ~2387s / ~40 min) + # Example of building on Nvidia GH200 server. (Memory usage: ~12GB, Build time: ~1475s / ~25 min, Image size: 7.26GB) $ DOCKER_BUILDKIT=1 sudo docker build . \ --target vllm-openai \ -platform "linux/arm64" \ - -t drikster80/vllm-gh200-openai:v0.6.4.post1 \ + -t vllm/vllm-gh200-openai:latest \ --build-arg max_jobs=66 \ --build-arg nvcc_threads=2 \ --build-arg torch_cuda_arch_list="9.0+PTX" \ diff --git a/requirements-cuda-arm64.txt b/requirements-cuda-arm64.txt index a8baf1dedb5a8..bbcb5cb7012ce 100644 --- a/requirements-cuda-arm64.txt +++ b/requirements-cuda-arm64.txt @@ -1,3 +1,3 @@ --index-url https://download.pytorch.org/whl/nightly/cu124 -torchvision; platform_machine == 'aarch64' -torch; platform_machine == 'aarch64' +torchvision==0.22.0.dev20241215; platform_machine == 'aarch64' +torch==2.6.0.dev20241210+cu124; platform_machine == 'aarch64'