Skip to content

Commit

Permalink
chore: Upgrade to CUDA 12.1 (#2020)
Browse files Browse the repository at this point in the history
  • Loading branch information
gs-olive committed Jul 3, 2023
1 parent 8b09e71 commit 6ceaed8
Show file tree
Hide file tree
Showing 14 changed files with 74 additions and 89 deletions.
94 changes: 40 additions & 54 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,17 +92,9 @@ commands:
key: cmake-<< parameters.cache-key >>-<< parameters.version >>
paths:
- << parameters.install-dir >>
uninstall-cuda:
description: "Uninstall CUDA-11.4"
steps:
- run:
name: "Uninstall CUDA-11.4"
command: |
sudo apt-get --purge remove "cuda*" "nsight*"
sudo apt-get --purge remove "*nvidia*"

install-cudnn:
description: "Install CUDNN 8.8.0"
description: "Install CUDNN 8.8.1"
parameters:
os:
type: string
Expand All @@ -112,10 +104,10 @@ commands:
default: "x86_64"
cudnn-version:
type: string
default: "8.8.0.121"
default: "8.8.1.3"
cuda-version:
type: string
default: "cuda11.8"
default: "cuda12.0"
steps:
- run:
name: Install CUDNN
Expand Down Expand Up @@ -149,13 +141,13 @@ commands:
default: "amd64"
cuda-pkg-name:
type: string
default: "cuda-toolkit-11-8"
default: "cuda-toolkit-12-1"
cuda-pkg-version:
type: string
default: "11-8"
default: "12-1"
cuda-version:
type: string
default: "11.8.0"
default: "12.1.1"
steps:
- run:
name: Install CUDA
Expand Down Expand Up @@ -186,21 +178,12 @@ commands:
architecture:
type: string
default: "amd64"
cuda-pkg-name:
type: string
default: "cuda-toolkit-11-8"
cuda-pkg-version:
type: string
default: "11-8"
cuda-version:
type: string
default: "11.8.0"
cuda-string-version:
type: string
default: "cuda11.8"
default: "cuda12.0"
cudnn-version:
type: string
default: "8.8.0.121"
default: "8.8.1.3"
trt-version-short:
type: string
default: "8.6.1"
Expand Down Expand Up @@ -252,7 +235,7 @@ commands:
default: "8.6.1"
cudnn-version-long:
type: string
default: "8.8.0.121"
default: "8.8.1.3"
steps:
- run:
name: Set up python environment
Expand All @@ -261,27 +244,30 @@ commands:
pip3 install wheel setuptools
pip3 install nvidia-pyindex
pip3 install tabulate
pip3 install tensorrt==<< parameters.trt-version-long >> nvidia-cudnn-cu11==<< parameters.cudnn-version-long >>
pip3 install tensorrt==<< parameters.trt-version-long >> nvidia-cudnn-cu12==<< parameters.cudnn-version-long >>
pip3 install pytest parameterized expecttest nox
install-torch-from-index:
description: "Install Pytorch"
parameters:
torch-build:
type: string
default: "2.1.0.dev20230605+cu118"
default: "2.1.0.dev20230619+cu121"
torchvision-build:
type: string
default: "0.16.0.dev20230605+cu118"
default: "0.16.0.dev20230619+cu121"
torch-build-index:
type: string
default: "https://download.pytorch.org/whl/nightly/cu118"
default: "https://download.pytorch.org/whl/nightly/cu121"
torchvision-build-index:
type: string
default: "https://download.pytorch.org/whl/nightly/cu121"
steps:
- run:
name: Install Torch
command: |
pip3 install --upgrade pip
pip3 install torch==<< parameters.torch-build >> torchvision==<< parameters.torchvision-build >> --extra-index-url << parameters.torch-build-index >>
pip3 install torch==<< parameters.torch-build >> torchvision==<< parameters.torchvision-build >> --extra-index-url << parameters.torch-build-index >> --extra-index-url << parameters.torchvision-build-index >>
build-py:
description: "Build the torch-tensorrt python release (pre-cxx11-abi)"
Expand All @@ -293,7 +279,7 @@ commands:
- run:
name: Build torch-tensorrt python release (pre-cxx11-abi)
command: |
export CUDA_HOME=/usr/local/cuda-11.8/
export CUDA_HOME=/usr/local/cuda-12.1/
mv toolchains/ci_workspaces/WORKSPACE.<< parameters.platform >> WORKSPACE
cd py
python3 -m pip install wheel setuptools
Expand All @@ -313,7 +299,7 @@ commands:
- run:
name: Build torch-tensorrt python legacy release (pre-cxx11-abi)
command: |
export CUDA_HOME=/usr/local/cuda-11.8/
export CUDA_HOME=/usr/local/cuda-12.1/
mv toolchains/ci_workspaces/WORKSPACE.<< parameters.platform >> WORKSPACE
cd py
python3 -m pip install wheel setuptools
Expand Down Expand Up @@ -345,7 +331,7 @@ commands:
- run:
name: Build torch-tensorrt python release package
command: |
export CUDA_HOME=/usr/local/cuda-11.8/
export CUDA_HOME=/usr/local/cuda-12.1/
cd ~/project/py
python3 setup.py bdist_wheel --use-cxx11-abi --release
python3 setup.py install --use-cxx11-abi --release
Expand All @@ -357,7 +343,7 @@ commands:
- run:
name: Build torch-tensorrt python package
command: |
export CUDA_HOME=/usr/local/cuda-11.8/
export CUDA_HOME=/usr/local/cuda-12.1/
cd ~/project/py
python3 setup.py bdist_wheel --use-cxx11-abi
python3 setup.py install --use-cxx11-abi
Expand All @@ -375,7 +361,7 @@ commands:
- run:
name: Build torch-tensorrt python release with only the fx backend
command: |
export CUDA_HOME=/usr/local/cuda-11.8/
export CUDA_HOME=/usr/local/cuda-12.1/
mv toolchains/ci_workspaces/WORKSPACE.<< parameters.platform >> WORKSPACE
cd py
python3 -m pip install wheel setuptools
Expand Down Expand Up @@ -447,7 +433,7 @@ commands:
name: Build torch-tensorrt library with CMake
command: |
mkdir build
export PATH=$PATH:/usr/local/cuda-11.8/bin
export PATH=$PATH:/usr/local/cuda-12.1/bin
~/cmake/bin/cmake -S. -Bbuild \
-DCMAKE_MODULE_PATH=cmake/Module \
-DTorch_DIR=/opt/circleci/.pyenv/versions/3.9.4/lib/python3.9/site-packages/torch/share/cmake/Torch \
Expand Down Expand Up @@ -499,7 +485,7 @@ commands:
name: Run core / C++ tests
no_output_timeout: 15m
environment:
LD_LIBRARY_PATH: "/opt/circleci/.pyenv/versions/3.9.4/lib/python3.9/site-packages/torch_tensorrt.libs:/home/circleci/project/bazel-project/external/libtorch_pre_cxx11_abi/lib/:/home/circleci/project/bazel-project/external/tensorrt/lib/:/usr/local/cuda-11.8/lib64/:$LD_LIBRARY_PATH"
LD_LIBRARY_PATH: "/opt/circleci/.pyenv/versions/3.9.4/lib/python3.9/site-packages/torch_tensorrt.libs:/home/circleci/project/bazel-project/external/libtorch_pre_cxx11_abi/lib/:/home/circleci/project/bazel-project/external/tensorrt/lib/:/usr/local/cuda-12.1/lib64/:$LD_LIBRARY_PATH"
command: |
set -e
mv toolchains/ci_workspaces/WORKSPACE.<< parameters.platform >> WORKSPACE
Expand Down Expand Up @@ -529,7 +515,7 @@ commands:
environment:
USE_HOST_DEPS: "1"
PYT_PATH: "/opt/circleci/.pyenv/versions/3.9.4/lib/python3.9/site-packages/"
LD_LIBRARY_PATH: "/usr/lib/x86_64-linux-gnu/:/usr/local/cuda-11.8/lib64/:$LD_LIBRARY_PATH"
LD_LIBRARY_PATH: "/usr/lib/x86_64-linux-gnu/:/usr/local/cuda-12.1/lib64/:$LD_LIBRARY_PATH"
command: |
set -e
mkdir -p /tmp/artifacts/test_results
Expand Down Expand Up @@ -819,7 +805,7 @@ jobs:
type: boolean
default: false
machine:
image: linux-cuda-11:2023.02.1
image: linux-cuda-12:2023.05.1
resource_class: gpu.nvidia.small
steps:
- checkout
Expand Down Expand Up @@ -881,7 +867,7 @@ jobs:
cudnn-version:
type: string
machine:
image: linux-cuda-11:2023.02.1
image: linux-cuda-12:2023.05.1
resource_class: gpu.nvidia.large
parallelism: 4
steps:
Expand Down Expand Up @@ -922,7 +908,7 @@ jobs:
python-version:
type: string
machine:
image: linux-cuda-11:2023.02.1
image: linux-cuda-12:2023.05.1
resource_class: gpu.nvidia.large
steps:
- checkout
Expand Down Expand Up @@ -956,7 +942,7 @@ jobs:
type: string
parallelism: 8
machine:
image: linux-cuda-11:2023.02.1
image: linux-cuda-12:2023.05.1
resource_class: gpu.nvidia.large
steps:
- checkout
Expand Down Expand Up @@ -992,7 +978,7 @@ jobs:
type: string
parallelism: 8
machine:
image: linux-cuda-11:2023.02.1
image: linux-cuda-12:2023.05.1
resource_class: gpu.nvidia.large
steps:
- checkout
Expand Down Expand Up @@ -1030,7 +1016,7 @@ jobs:
python-version:
type: string
machine:
image: linux-cuda-11:2023.02.1
image: linux-cuda-12:2023.05.1
resource_class: gpu.nvidia.large
steps:
- checkout
Expand Down Expand Up @@ -1068,7 +1054,7 @@ jobs:
type: string
parallelism: 4
machine:
image: linux-cuda-11:2023.02.1
image: linux-cuda-12:2023.05.1
resource_class: gpu.nvidia.small
steps:
- when:
Expand Down Expand Up @@ -1116,7 +1102,7 @@ jobs:
torch-build-index:
type: string
machine:
image: linux-cuda-11:2023.02.1
image: linux-cuda-12:2023.05.1
resource_class: gpu.nvidia.small
steps:
- when:
Expand Down Expand Up @@ -1252,7 +1238,7 @@ jobs:
python-version:
type: string
machine:
image: linux-cuda-11:2023.02.1
image: linux-cuda-12:2023.05.1
resource_class: gpu.nvidia.small
steps:
- checkout
Expand Down Expand Up @@ -1288,7 +1274,7 @@ jobs:
torch-base-image:
type: string
machine:
image: linux-cuda-11:2023.02.1
image: linux-cuda-12:2023.05.1
resource_class: gpu.nvidia.small
steps:
- checkout
Expand Down Expand Up @@ -1316,7 +1302,7 @@ jobs:
torch-base-image:
type: string
machine:
image: linux-cuda-11:2023.02.1
image: linux-cuda-12:2023.05.1
resource_class: gpu.nvidia.small
steps:
- when:
Expand Down Expand Up @@ -1352,13 +1338,13 @@ parameters:
# Nightly platform config
torch-build:
type: string
default: "2.1.0.dev20230605+cu118"
default: "2.1.0.dev20230619+cu121"
torchvision-build:
type: string
default: "0.16.0.dev20230605+cu118"
default: "0.16.0.dev20230619+cu121"
torch-build-index:
type: string
default: "https://download.pytorch.org/whl/nightly/cu118"
default: "https://download.pytorch.org/whl/nightly/cu121"
torch-build-legacy:
type: string
default: "1.13.1+cu117"
Expand All @@ -1373,7 +1359,7 @@ parameters:
default: true
cudnn-version:
type: string
default: "8.8.0.121"
default: "8.8.1.3"
trt-version-short:
type: string
default: "8.6.1"
Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,9 @@ torch.jit.save(trt_ts_module, "trt_torchscript_module.ts") # save the TRT embedd
These are the following dependencies used to verify the testcases. Torch-TensorRT can work with other versions, but the tests are not guaranteed to pass.

- Bazel 5.2.0
- Libtorch 2.1.0.dev20230605 (built with CUDA 11.8)
- CUDA 11.8
- cuDNN 8.8.0
- Libtorch 2.1.0.dev20230619 (built with CUDA 12.1)
- CUDA 12.1
- cuDNN 8.8.1
- TensorRT 8.6.1

## Prebuilt Binaries and Wheel files
Expand Down
20 changes: 10 additions & 10 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ local_repository(
new_local_repository(
name = "cuda",
build_file = "@//third_party/cuda:BUILD",
path = "/usr/local/cuda-11.8/",
path = "/usr/local/cuda-12.1/",
)

#############################################################################################################
Expand All @@ -51,17 +51,17 @@ new_local_repository(
http_archive(
name = "libtorch",
build_file = "@//third_party/libtorch:BUILD",
sha256 = "999becce82b73e566d0ffe010cd21fea8cf3a33f90f09dcc6b01150b820ae063",
sha256 = "5ba55259b65e071346a2b547b8d1378595f1467a39aaa923fecb09f134f1bcba",
strip_prefix = "libtorch",
urls = ["https://download.pytorch.org/libtorch/nightly/cu118/libtorch-cxx11-abi-shared-with-deps-2.1.0.dev20230605%2Bcu118.zip"],
urls = ["https://download.pytorch.org/libtorch/nightly/cu121/libtorch-cxx11-abi-shared-with-deps-2.1.0.dev20230619%2Bcu121.zip"],
)

http_archive(
name = "libtorch_pre_cxx11_abi",
build_file = "@//third_party/libtorch:BUILD",
sha256 = "786cc728c63ea69c40bd8fb535cf8e5e1dfff1d43eaad3eb5256b9ed89c1b268",
sha256 = "8f6661bfc11597e77400e9e36cc8dd8e5e385ba82361d630d9ccede8518d7c7e",
strip_prefix = "libtorch",
urls = ["https://download.pytorch.org/libtorch/nightly/cu118/libtorch-shared-with-deps-2.1.0.dev20230605%2Bcu118.zip"],
urls = ["https://download.pytorch.org/libtorch/nightly/cu121/libtorch-shared-with-deps-2.1.0.dev20230619%2Bcu121.zip"],
)

# Download these tarballs manually from the NVIDIA website
Expand All @@ -71,20 +71,20 @@ http_archive(
http_archive(
name = "cudnn",
build_file = "@//third_party/cudnn/archive:BUILD",
sha256 = "36fff137153ef73e6ee10bfb07f4381240a86fb9fb78ce372414b528cbab2293",
strip_prefix = "cudnn-linux-x86_64-8.8.0.121_cuda11-archive",
sha256 = "79d77a769c7e7175abc7b5c2ed5c494148c0618a864138722c887f95c623777c",
strip_prefix = "cudnn-linux-x86_64-8.8.1.3_cuda12-archive",
urls = [
"https://developer.download.nvidia.com/compute/cudnn/secure/8.8.0/local_installers/11.8/cudnn-linux-x86_64-8.8.0.121_cuda11-archive.tar.xz",
"https://developer.nvidia.com/downloads/compute/cudnn/secure/8.8.1/local_installers/12.0/cudnn-linux-x86_64-8.8.1.3_cuda12-archive.tar.xz",
],
)

http_archive(
name = "tensorrt",
build_file = "@//third_party/tensorrt/archive:BUILD",
sha256 = "15bfe6053d45feec45ecc7123a9106076b0b43fa0435f242d89dca0778337759",
sha256 = "0f8157a5fc5329943b338b893591373350afa90ca81239cdadd7580cd1eba254",
strip_prefix = "TensorRT-8.6.1.6",
urls = [
"https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/secure/8.6.1/tars/TensorRT-8.6.1.6.Linux.x86_64-gnu.cuda-11.8.tar.gz",
"https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/secure/8.6.1/tars/TensorRT-8.6.1.6.Linux.x86_64-gnu.cuda-12.0.tar.gz",
],
)

Expand Down
4 changes: 2 additions & 2 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Base image starts with CUDA
ARG BASE_IMG=nvidia/cuda:11.8.0-devel-ubuntu22.04
ARG BASE_IMG=nvidia/cuda:12.1.1-devel-ubuntu22.04
FROM ${BASE_IMG} as base
ENV BASE_IMG=nvidia/cuda:11.8.0-devel-ubuntu22.04
ENV BASE_IMG=nvidia/cuda:12.1.1-devel-ubuntu22.04

ARG TENSORRT_VERSION
ENV TENSORRT_VERSION=${TENSORRT_VERSION}
Expand Down
Loading

0 comments on commit 6ceaed8

Please sign in to comment.