Skip to content

Commit

Permalink
chore: Upgrade to CUDA 12.1 and CuDNN 8.9
Browse files Browse the repository at this point in the history
- Upgrade CUDA version across CI, dependencies, and build systems
- Upgrade versions for Torch, CuDNN, TensorRT, and other dependencies in
accordance with new CUDA version 12.1
- Upgrade Torch nightly date to latest available
  • Loading branch information
gs-olive committed Jun 14, 2023
1 parent 1735088 commit 59a2687
Show file tree
Hide file tree
Showing 13 changed files with 64 additions and 75 deletions.
67 changes: 28 additions & 39 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,17 +92,9 @@ commands:
key: cmake-<< parameters.cache-key >>-<< parameters.version >>
paths:
- << parameters.install-dir >>
uninstall-cuda:
description: "Uninstall CUDA-11.4"
steps:
- run:
name: "Uninstall CUDA-11.4"
command: |
sudo apt-get --purge remove "cuda*" "nsight*"
sudo apt-get --purge remove "*nvidia*"

install-cudnn:
description: "Install CUDNN 8.8.0"
description: "Install CUDNN 8.9.1"
parameters:
os:
type: string
Expand All @@ -112,10 +104,10 @@ commands:
default: "x86_64"
cudnn-version:
type: string
default: "8.8.0.121"
default: "8.9.1.23"
cuda-version:
type: string
default: "cuda11.8"
default: "cuda12.1"
steps:
- run:
name: Install CUDNN
Expand Down Expand Up @@ -149,13 +141,13 @@ commands:
default: "amd64"
cuda-pkg-name:
type: string
default: "cuda-toolkit-11-8"
default: "cuda-toolkit-12-1"
cuda-pkg-version:
type: string
default: "11-8"
default: "12-1"
cuda-version:
type: string
default: "11.8.0"
default: "12.1.1"
steps:
- run:
name: Install CUDA
Expand Down Expand Up @@ -188,19 +180,16 @@ commands:
default: "amd64"
cuda-pkg-name:
type: string
default: "cuda-toolkit-11-8"
default: "cuda-toolkit-12-1"
cuda-pkg-version:
type: string
default: "11-8"
cuda-version:
type: string
default: "11.8.0"
default: "12-0"
cuda-string-version:
type: string
default: "cuda11.8"
default: "cuda12.1"
cudnn-version:
type: string
default: "8.8.0.121"
default: "8.9.1.23"
trt-version-short:
type: string
default: "8.6.1"
Expand Down Expand Up @@ -252,7 +241,7 @@ commands:
default: "8.6.1"
cudnn-version-long:
type: string
default: "8.8.0.121"
default: "8.9.1.23"
steps:
- run:
name: Set up python environment
Expand All @@ -261,21 +250,21 @@ commands:
pip3 install wheel setuptools
pip3 install nvidia-pyindex
pip3 install tabulate
pip3 install tensorrt==<< parameters.trt-version-long >> nvidia-cudnn-cu11==<< parameters.cudnn-version-long >>
pip3 install tensorrt==<< parameters.trt-version-long >> nvidia-cudnn-cu12==<< parameters.cudnn-version-long >>
pip3 install pytest parameterized expecttest nox
install-torch-from-index:
description: "Install Pytorch"
parameters:
torch-build:
type: string
default: "2.1.0.dev20230605+cu118"
default: "2.1.0.dev20230613+cu121"
torchvision-build:
type: string
default: "0.16.0.dev20230605+cu118"
default: "0.16.0.dev20230613+cu121"
torch-build-index:
type: string
default: "https://download.pytorch.org/whl/nightly/cu118"
default: "https://download.pytorch.org/whl/nightly/cu121"
steps:
- run:
name: Install Torch
Expand All @@ -293,7 +282,7 @@ commands:
- run:
name: Build torch-tensorrt python release (pre-cxx11-abi)
command: |
export CUDA_HOME=/usr/local/cuda-11.8/
export CUDA_HOME=/usr/local/cuda-12.1/
mv toolchains/ci_workspaces/WORKSPACE.<< parameters.platform >> WORKSPACE
cd py
python3 -m pip install wheel setuptools
Expand All @@ -313,7 +302,7 @@ commands:
- run:
name: Build torch-tensorrt python legacy release (pre-cxx11-abi)
command: |
export CUDA_HOME=/usr/local/cuda-11.8/
export CUDA_HOME=/usr/local/cuda-12.1/
mv toolchains/ci_workspaces/WORKSPACE.<< parameters.platform >> WORKSPACE
cd py
python3 -m pip install wheel setuptools
Expand Down Expand Up @@ -345,7 +334,7 @@ commands:
- run:
name: Build torch-tensorrt python release package
command: |
export CUDA_HOME=/usr/local/cuda-11.8/
export CUDA_HOME=/usr/local/cuda-12.1/
cd ~/project/py
python3 setup.py bdist_wheel --use-cxx11-abi --release
python3 setup.py install --use-cxx11-abi --release
Expand All @@ -357,7 +346,7 @@ commands:
- run:
name: Build torch-tensorrt python package
command: |
export CUDA_HOME=/usr/local/cuda-11.8/
export CUDA_HOME=/usr/local/cuda-12.1/
cd ~/project/py
python3 setup.py bdist_wheel --use-cxx11-abi
python3 setup.py install --use-cxx11-abi
Expand All @@ -375,7 +364,7 @@ commands:
- run:
name: Build torch-tensorrt python release with only the fx backend
command: |
export CUDA_HOME=/usr/local/cuda-11.8/
export CUDA_HOME=/usr/local/cuda-12.1/
mv toolchains/ci_workspaces/WORKSPACE.<< parameters.platform >> WORKSPACE
cd py
python3 -m pip install wheel setuptools
Expand Down Expand Up @@ -447,7 +436,7 @@ commands:
name: Build torch-tensorrt library with CMake
command: |
mkdir build
export PATH=$PATH:/usr/local/cuda-11.8/bin
export PATH=$PATH:/usr/local/cuda-12.1/bin
~/cmake/bin/cmake -S. -Bbuild \
-DCMAKE_MODULE_PATH=cmake/Module \
-DTorch_DIR=/opt/circleci/.pyenv/versions/3.9.4/lib/python3.9/site-packages/torch/share/cmake/Torch \
Expand Down Expand Up @@ -499,7 +488,7 @@ commands:
name: Run core / C++ tests
no_output_timeout: 15m
environment:
LD_LIBRARY_PATH: "/opt/circleci/.pyenv/versions/3.9.4/lib/python3.9/site-packages/torch_tensorrt.libs:/home/circleci/project/bazel-project/external/libtorch_pre_cxx11_abi/lib/:/home/circleci/project/bazel-project/external/tensorrt/lib/:/usr/local/cuda-11.8/lib64/:$LD_LIBRARY_PATH"
LD_LIBRARY_PATH: "/opt/circleci/.pyenv/versions/3.9.4/lib/python3.9/site-packages/torch_tensorrt.libs:/home/circleci/project/bazel-project/external/libtorch_pre_cxx11_abi/lib/:/home/circleci/project/bazel-project/external/tensorrt/lib/:/usr/local/cuda-12.1/lib64/:$LD_LIBRARY_PATH"
command: |
set -e
mv toolchains/ci_workspaces/WORKSPACE.<< parameters.platform >> WORKSPACE
Expand Down Expand Up @@ -529,7 +518,7 @@ commands:
environment:
USE_HOST_DEPS: "1"
PYT_PATH: "/opt/circleci/.pyenv/versions/3.9.4/lib/python3.9/site-packages/"
LD_LIBRARY_PATH: "/usr/lib/x86_64-linux-gnu/:/usr/local/cuda-11.8/lib64/:$LD_LIBRARY_PATH"
LD_LIBRARY_PATH: "/usr/lib/x86_64-linux-gnu/:/usr/local/cuda-12.1/lib64/:$LD_LIBRARY_PATH"
command: |
set -e
mkdir -p /tmp/artifacts/test_results
Expand Down Expand Up @@ -819,7 +808,7 @@ jobs:
type: boolean
default: false
machine:
image: linux-cuda-11:2023.02.1
image: linux-cuda-12:2023.05.1
resource_class: gpu.nvidia.small
steps:
- checkout
Expand Down Expand Up @@ -1352,13 +1341,13 @@ parameters:
# Nightly platform config
torch-build:
type: string
default: "2.1.0.dev20230605+cu118"
default: "2.1.0.dev20230613+cu121"
torchvision-build:
type: string
default: "0.16.0.dev20230605+cu118"
default: "0.16.0.dev20230613+cu121"
torch-build-index:
type: string
default: "https://download.pytorch.org/whl/nightly/cu118"
default: "https://download.pytorch.org/whl/nightly/cu121"
torch-build-legacy:
type: string
default: "1.13.1+cu117"
Expand All @@ -1373,7 +1362,7 @@ parameters:
default: true
cudnn-version:
type: string
default: "8.8.0.121"
default: "8.9.1.23"
trt-version-short:
type: string
default: "8.6.1"
Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,9 @@ torch.jit.save(trt_ts_module, "trt_torchscript_module.ts") # save the TRT embedd
These are the following dependencies used to verify the testcases. Torch-TensorRT can work with other versions, but the tests are not guaranteed to pass.

- Bazel 5.2.0
- Libtorch 2.1.0.dev20230605 (built with CUDA 11.8)
- CUDA 11.8
- cuDNN 8.8.0
- Libtorch 2.1.0.dev20230613 (built with CUDA 12.1)
- CUDA 12.1
- cuDNN 8.9.1
- TensorRT 8.6.1

## Prebuilt Binaries and Wheel files
Expand Down
20 changes: 10 additions & 10 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ local_repository(
new_local_repository(
name = "cuda",
build_file = "@//third_party/cuda:BUILD",
path = "/usr/local/cuda-11.8/",
path = "/usr/local/cuda-12.1/",
)

#############################################################################################################
Expand All @@ -51,17 +51,17 @@ new_local_repository(
http_archive(
name = "libtorch",
build_file = "@//third_party/libtorch:BUILD",
sha256 = "999becce82b73e566d0ffe010cd21fea8cf3a33f90f09dcc6b01150b820ae063",
sha256 = "1c3712b3b1de34e9989549f53675b557f6f0ca0b800ccbbc80c941af68abcc65",
strip_prefix = "libtorch",
urls = ["https://download.pytorch.org/libtorch/nightly/cu118/libtorch-cxx11-abi-shared-with-deps-2.1.0.dev20230605%2Bcu118.zip"],
urls = ["https://download.pytorch.org/libtorch/nightly/cu121/libtorch-cxx11-abi-shared-with-deps-2.1.0.dev20230613%2Bcu121.zip"],
)

http_archive(
name = "libtorch_pre_cxx11_abi",
build_file = "@//third_party/libtorch:BUILD",
sha256 = "786cc728c63ea69c40bd8fb535cf8e5e1dfff1d43eaad3eb5256b9ed89c1b268",
sha256 = "6bbb53f2f9533804175a0d2eeae5093ee1907158a9ec75c7fb9d10e2103d5df5",
strip_prefix = "libtorch",
urls = ["https://download.pytorch.org/libtorch/nightly/cu118/libtorch-shared-with-deps-2.1.0.dev20230605%2Bcu118.zip"],
urls = ["https://download.pytorch.org/libtorch/nightly/cu121/libtorch-shared-with-deps-2.1.0.dev20230613%2Bcu121.zip"],
)

# Download these tarballs manually from the NVIDIA website
Expand All @@ -71,20 +71,20 @@ http_archive(
http_archive(
name = "cudnn",
build_file = "@//third_party/cudnn/archive:BUILD",
sha256 = "36fff137153ef73e6ee10bfb07f4381240a86fb9fb78ce372414b528cbab2293",
strip_prefix = "cudnn-linux-x86_64-8.8.0.121_cuda11-archive",
sha256 = "35163c5c542be0c511738b27e25235193cbeedc5e0e006e44b1cdeaf1922e83e",
strip_prefix = "cudnn-linux-x86_64-8.9.1.23_cuda12-archive",
urls = [
"https://developer.download.nvidia.com/compute/cudnn/secure/8.8.0/local_installers/11.8/cudnn-linux-x86_64-8.8.0.121_cuda11-archive.tar.xz",
"https://developer.nvidia.com/downloads/compute/cudnn/secure/8.9.1/local_installers/12.x/cudnn-linux-x86_64-8.9.1.23_cuda12-archive.tar.xz",
],
)

http_archive(
name = "tensorrt",
build_file = "@//third_party/tensorrt/archive:BUILD",
sha256 = "15bfe6053d45feec45ecc7123a9106076b0b43fa0435f242d89dca0778337759",
sha256 = "0f8157a5fc5329943b338b893591373350afa90ca81239cdadd7580cd1eba254",
strip_prefix = "TensorRT-8.6.1.6",
urls = [
"https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/secure/8.6.1/tars/TensorRT-8.6.1.6.Linux.x86_64-gnu.cuda-11.8.tar.gz",
"https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/secure/8.6.1/tars/TensorRT-8.6.1.6.Linux.x86_64-gnu.cuda-12.0.tar.gz",
],
)

Expand Down
4 changes: 2 additions & 2 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Base image starts with CUDA
ARG BASE_IMG=nvidia/cuda:11.8.0-devel-ubuntu22.04
ARG BASE_IMG=nvidia/cuda:12.1.1-devel-ubuntu22.04
FROM ${BASE_IMG} as base
ENV BASE_IMG=nvidia/cuda:11.8.0-devel-ubuntu22.04
ENV BASE_IMG=nvidia/cuda:12.1.1-devel-ubuntu22.04

ARG TENSORRT_VERSION
ENV TENSORRT_VERSION=${TENSORRT_VERSION}
Expand Down
4 changes: 2 additions & 2 deletions docker/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@ Note: By default the container uses the `pre-cxx11-abi` version of Torch + Torch

### Instructions

- The example below uses CUDNN 8.8 and TensorRT 8.6
- The example below uses CUDNN 8.9 and TensorRT 8.6
- See <a href="https://github.com/pytorch/TensorRT#dependencies">dependencies</a> for a list of current default dependencies.

> From root of Torch-TensorRT repo
Build:
```
DOCKER_BUILDKIT=1 docker build --build-arg TENSORRT_VERSION=8.6 --build-arg CUDNN_VERSION=8.8 -f docker/Dockerfile -t torch_tensorrt:latest .
DOCKER_BUILDKIT=1 docker build --build-arg TENSORRT_VERSION=8.6 --build-arg CUDNN_VERSION=8.9 -f docker/Dockerfile -t torch_tensorrt:latest .
```

Run:
Expand Down
2 changes: 1 addition & 1 deletion py/ci/build_whl.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Example usage: docker run -it -v$(pwd)/..:/workspace/TRTorch build_trtorch_wheel /bin/bash /workspace/TRTorch/py/build_whl.sh

export CXX=g++
export CUDA_HOME=/usr/local/cuda-11.8
export CUDA_HOME=/usr/local/cuda-12.1
export PROJECT_DIR=/workspace/project

cp -r $CUDA_HOME /usr/local/cuda
Expand Down
6 changes: 3 additions & 3 deletions py/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
numpy
packaging
pybind11==2.6.2
--extra-index-url https://download.pytorch.org/whl/nightly/cu118
torch==2.1.0.dev20230605+cu118
torchvision==0.16.0.dev20230605+cu118
--extra-index-url https://download.pytorch.org/whl/nightly/cu121
torch==2.1.0.dev20230613+cu121
torchvision==0.16.0.dev20230613+cu121
--extra-index-url https://pypi.ngc.nvidia.com
tensorrt==8.6.1
2 changes: 1 addition & 1 deletion py/torch_tensorrt/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def _find_lib(name, paths):

elif sys.platform.startswith("linux"):
LINUX_PATHS = [
"/usr/local/cuda-11.8/lib64",
"/usr/local/cuda-12.1/lib64",
]

if "LD_LIBRARY_PATH" in os.environ:
Expand Down
2 changes: 1 addition & 1 deletion py/torch_tensorrt/fx/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ FX2TRT is merged as FX module in Torch-TensorRT
$ conda create --name python_env python=3.8
$ conda activate python_env
# Recommend to install PyTorch 2.0 and later
$ conda install pytorch torchvision torchtext cudatoolkit=11.8 -c pytorch-nightly
$ conda install pytorch torchvision torchtext cudatoolkit=12.1 -c pytorch-nightly
# Install TensorRT python package
$ pip3 install nvidia-pyindex
$ pip3 install tensorrt==8.6.1
Expand Down
4 changes: 2 additions & 2 deletions py/versions.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "1.5.0.dev0"
__cuda_version__ = "11.8"
__cudnn_version__ = "8.8"
__cuda_version__ = "12.1"
__cudnn_version__ = "8.9"
__tensorrt_version__ = "8.6"
2 changes: 1 addition & 1 deletion toolchains/ci_workspaces/WORKSPACE.x86_64
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ local_repository(
new_local_repository(
name = "cuda",
build_file = "@//third_party/cuda:BUILD",
path = "/usr/local/cuda-11.8/",
path = "/usr/local/cuda-12.1/",
)

new_local_repository(
Expand Down
10 changes: 5 additions & 5 deletions toolchains/ci_workspaces/WORKSPACE.x86_64.release.rhel
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ local_repository(
new_local_repository(
name = "cuda",
build_file = "@//third_party/cuda:BUILD",
path = "/usr/local/cuda-11.8",
path = "/usr/local/cuda-12.1",
)

new_local_repository(
Expand All @@ -56,17 +56,17 @@ new_local_repository(
http_archive(
name = "libtorch",
build_file = "@//third_party/libtorch:BUILD",
sha256 = "999becce82b73e566d0ffe010cd21fea8cf3a33f90f09dcc6b01150b820ae063",
sha256 = "1c3712b3b1de34e9989549f53675b557f6f0ca0b800ccbbc80c941af68abcc65",
strip_prefix = "libtorch",
urls = ["https://download.pytorch.org/libtorch/nightly/cu118/libtorch-cxx11-abi-shared-with-deps-2.1.0.dev20230605%2Bcu118.zip"],
urls = ["https://download.pytorch.org/libtorch/nightly/cu121/libtorch-cxx11-abi-shared-with-deps-2.1.0.dev20230613%2Bcu121.zip"],
)

http_archive(
name = "libtorch_pre_cxx11_abi",
build_file = "@//third_party/libtorch:BUILD",
sha256 = "786cc728c63ea69c40bd8fb535cf8e5e1dfff1d43eaad3eb5256b9ed89c1b268",
sha256 = "6bbb53f2f9533804175a0d2eeae5093ee1907158a9ec75c7fb9d10e2103d5df5",
strip_prefix = "libtorch",
urls = ["https://download.pytorch.org/libtorch/nightly/cu118/libtorch-shared-with-deps-2.1.0.dev20230605%2Bcu118.zip"],
urls = ["https://download.pytorch.org/libtorch/nightly/cu121/libtorch-shared-with-deps-2.1.0.dev20230613%2Bcu121.zip"],
)

####################################################################################
Expand Down
Loading

0 comments on commit 59a2687

Please sign in to comment.