From 29048bbb373d58a5f96da5e083020f261786e332 Mon Sep 17 00:00:00 2001 From: Henning Becker Date: Thu, 12 Sep 2024 13:10:06 -0700 Subject: [PATCH] Update default CUDA Toolkit version to 12.5.1 This updates CUDA for both TF and XLA. It also enables the CUDA driver forward compatibility mode for XLA since XLA's CUDA graph integration needs a newer driver version. PiperOrigin-RevId: 673974335 --- .bazelrc | 2 +- build_tools/ci/build.py | 13 ++++++------- build_tools/ci/golden_commands.txt | 2 +- third_party/tsl/.bazelrc | 2 +- 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/.bazelrc b/.bazelrc index 8c7159662d2ed..927dc1d791f81 100644 --- a/.bazelrc +++ b/.bazelrc @@ -219,7 +219,7 @@ build:cuda --repo_env TF_NEED_CUDA=1 build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain build:cuda --@local_config_cuda//:enable_cuda # Default CUDA and CUDNN versions. -build:cuda --repo_env=HERMETIC_CUDA_VERSION="12.3.2" +build:cuda --repo_env=HERMETIC_CUDA_VERSION="12.5.1" build:cuda --repo_env=HERMETIC_CUDNN_VERSION="9.3.0" # This flag is needed to include CUDA libraries. build:cuda --@local_config_cuda//cuda:include_cuda_libs=true diff --git a/build_tools/ci/build.py b/build_tools/ci/build.py index 3a5b2136d1e0d..baf491cd4cb0a 100755 --- a/build_tools/ci/build.py +++ b/build_tools/ci/build.py @@ -216,14 +216,13 @@ def nvidia_gpu_build_with_compute_capability( test_tag_filters=("-no_oss", "requires-gpu-nvidia", "gpu") + extra_gpu_tags, build_tag_filters=("-no_oss", "requires-gpu-nvidia", "gpu"), - options=dict( - run_under="//tools/ci_build/gpu_build:parallel_gpu_execute", - repo_env=f"TF_CUDA_COMPUTE_CAPABILITIES={compute_capability/10}", + options={ + "run_under": "//tools/ci_build/gpu_build:parallel_gpu_execute", + "repo_env": f"TF_CUDA_COMPUTE_CAPABILITIES={compute_capability/10}", + "@cuda_driver//:enable_forward_compatibility": "true", **_DEFAULT_BAZEL_OPTIONS, - ), - extra_setup_commands=( - ["nvidia-smi"], - ), + }, + extra_setup_commands=(["nvidia-smi"],), ) diff --git a/build_tools/ci/golden_commands.txt b/build_tools/ci/golden_commands.txt index 4ff86a8ad58fd..8079a6e10f7a0 100644 --- a/build_tools/ci/golden_commands.txt +++ b/build_tools/ci/golden_commands.txt @@ -19,7 +19,7 @@ $KOKORO_ARTIFACTS_DIR/github/xla/.kokoro/generate_index_html.sh index.html nvidia-smi parallel --ungroup --retries 3 --delay 15 docker pull ::: gcr.io/tensorflow-sigs/build:latest-python3.11 docker run --detach --name=xla_ci --rm --interactive --tty --volume=./github:/github --workdir=/github/xla gcr.io/tensorflow-sigs/build:latest-python3.11 bash -docker exec xla_ci bazel test --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-amd --config=warnings --config=rbe_linux_cuda_nvcc --run_under=//tools/ci_build/gpu_build:parallel_gpu_execute --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async -- //xla/... //build_tools/... @tsl//tsl/... +docker exec xla_ci bazel test --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-amd --config=warnings --config=rbe_linux_cuda_nvcc --run_under=//tools/ci_build/gpu_build:parallel_gpu_execute --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --@cuda_driver//:enable_forward_compatibility=true --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async -- //xla/... //build_tools/... @tsl//tsl/... docker exec xla_ci bazel analyze-profile profile.json.gz docker stop xla_ci # END BuildType.GPU diff --git a/third_party/tsl/.bazelrc b/third_party/tsl/.bazelrc index 8c7159662d2ed..927dc1d791f81 100644 --- a/third_party/tsl/.bazelrc +++ b/third_party/tsl/.bazelrc @@ -219,7 +219,7 @@ build:cuda --repo_env TF_NEED_CUDA=1 build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain build:cuda --@local_config_cuda//:enable_cuda # Default CUDA and CUDNN versions. -build:cuda --repo_env=HERMETIC_CUDA_VERSION="12.3.2" +build:cuda --repo_env=HERMETIC_CUDA_VERSION="12.5.1" build:cuda --repo_env=HERMETIC_CUDNN_VERSION="9.3.0" # This flag is needed to include CUDA libraries. build:cuda --@local_config_cuda//cuda:include_cuda_libs=true