From cd58dea4f9471cdf50b1346da8416bea719f2300 Mon Sep 17 00:00:00 2001 From: Yanfei Guo Date: Sat, 16 Sep 2023 00:01:35 -0500 Subject: [PATCH 1/3] backend/cuda: add Ada and Hopper SM support Signed-off-by: Yanfei Guo --- src/backend/cuda/subconfigure.m4 | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/src/backend/cuda/subconfigure.m4 b/src/backend/cuda/subconfigure.m4 index 3b2a7b64..31d8e8fc 100644 --- a/src/backend/cuda/subconfigure.m4 +++ b/src/backend/cuda/subconfigure.m4 @@ -49,6 +49,16 @@ AC_ARG_WITH([cuda-sm], ampere - build compatibility for all Ampere GPUs 80 - A100, A30 86 - RTX Ampere, MX570, A40, A16, A10, A2 + 87 - Jetson AGX Orin and Drive AGX Orin + + # Ada architecture + ada - build compatibility for all Ada GPUs + 89 - GeForce RTX 4090, RTX 4080, RTX 6000, Tesla L40 + + # Hopper architecture + hopper - build compatibility for all Hopper GPUs + 90 - NVIDIA H100 (GH100) + 90a - add acceleration for features like wgmma and setmaxnreg. Required for NVIDIA CUTLASS # Other - specific SM numeric to use @@ -150,7 +160,7 @@ fi ########################################################################## if test "${have_cuda}" = "yes" ; then - for version in 11010 11000 10000 9000 8000 7000 6000 5000 ; do + for version in 12000 11080 11010 11000 10000 9000 8000 7000 6000 5000 ; do AC_COMPILE_IFELSE([AC_LANG_PROGRAM([ #include int x[[CUDA_VERSION - $version]]; @@ -163,7 +173,13 @@ if test "${have_cuda}" = "yes" ; then for sm in ${with_cuda_sm} ; do case "$sm" in all) - if test ${cuda_version} -ge 11010 ; then + if test ${cuda_version} -ge 12000 ; then + # maxwell (52) to hopper (90a) + supported_cuda_sms="52 53 60 61 62 70 72 75 80 86 87 89 90 90a" + elif test ${cuda_version} -ge 11080 ; then + # maxwell (52) to ada (89) and hopper (90) + supported_cuda_sms="52 53 60 61 62 70 72 75 80 86 87 89 90" + elif test ${cuda_version} -ge 11010 ; then # maxwell (52) to ampere (86) supported_cuda_sms="52 53 60 61 62 70 72 75 80 86" elif test ${cuda_version} -ge 11000 ; then @@ -223,6 +239,15 @@ if test "${have_cuda}" = "yes" ; then PAC_APPEND_FLAG([86],[CUDA_SM]) ;; + ada) + PAC_APPEND_FLAG([89],[CUDA_SM]) + ;; + + hopper) + PAC_APPEND_FLAG([90],[CUDA_SM]) + PAC_APPEND_FLAG([90a],[CUDA_SM]) + ;; + none) ;; From 075f2a3516808a0ceade6d768d948a2b418d1ef3 Mon Sep 17 00:00:00 2001 From: Yanfei Guo Date: Fri, 15 Sep 2023 16:19:39 -0500 Subject: [PATCH 2/3] backend/cuda: add config to detect CUDA compute capability Default to --with-cuda-sm=auto to only build for the visible GPUs Signed-off-by: Yanfei Guo --- src/backend/cuda/subconfigure.m4 | 55 ++++++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 3 deletions(-) diff --git a/src/backend/cuda/subconfigure.m4 b/src/backend/cuda/subconfigure.m4 index 31d8e8fc..0d374cc1 100644 --- a/src/backend/cuda/subconfigure.m4 +++ b/src/backend/cuda/subconfigure.m4 @@ -13,6 +13,7 @@ AC_ARG_WITH([cuda-sm], [ --with-cuda-sm= (https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/) Comma-separated list of below options: + auto - automatically build compatibility for all GPUs visible, any other specified compatibilities are ignored all - build compatibility for all GPUs supported by the CUDA version (can increase compilation time) # Kepler architecture @@ -63,7 +64,7 @@ AC_ARG_WITH([cuda-sm], # Other - specific SM numeric to use ],, - [with_cuda_sm=all]) + [with_cuda_sm=auto]) # --with-cuda @@ -160,16 +161,64 @@ fi ########################################################################## if test "${have_cuda}" = "yes" ; then - for version in 12000 11080 11010 11000 10000 9000 8000 7000 6000 5000 ; do + for version in 12000 11080 11050 11010 11000 10000 9000 8000 7000 6000 5000 ; do AC_COMPILE_IFELSE([AC_LANG_PROGRAM([ #include int x[[CUDA_VERSION - $version]]; ],)],[cuda_version=${version}],[]) if test ! -z ${cuda_version} ; then break ; fi done + + CUDA_SM= + case "$with_cuda_sm" in + *auto*) + dnl process auto detection + PAC_PUSH_FLAG([IFS]) + IFS=" " + AC_MSG_CHECKING([for CUDA compute capability auto detection]) + AC_LANG_PUSH([C]) + AC_RUN_IFELSE( + [AC_LANG_PROGRAM( + [ + #include + #include + ], + [ + int count = 0; + if (cudaSuccess != cudaGetDeviceCount(&count)) return -1; + if (count == 0) return -1; + for (int device = 0; device < count; ++device) + { + struct cudaDeviceProp prop; + if (cudaSuccess == cudaGetDeviceProperties(&prop, device)) + printf("%d.%d ", prop.major, prop.minor); + } + return 0; + ] + )], + [ + cuda_output=$(./conftest$EXEEXT | xargs -n1 | sort -u | xargs) + for sm in $cuda_output; do + sm_no_decimal=`echo $sm | tr -d '.'` + PAC_APPEND_FLAG([$sm_no_decimal],[CUDA_SM]) + done + with_cuda_sm= + AC_MSG_RESULT([yes]) + ], + [ + with_cuda_sm=all + AC_MSG_RESULT([no]) + ] + ) + AC_LANG_POP([C]) + PAC_POP_FLAG([IFS]) + ;; + *) + ;; + esac + PAC_PUSH_FLAG([IFS]) IFS="," - CUDA_SM= for sm in ${with_cuda_sm} ; do case "$sm" in all) From 49a732ba08ebd8a7c7769bac99b2b8580bf17b52 Mon Sep 17 00:00:00 2001 From: Yanfei Guo Date: Thu, 21 Sep 2023 10:51:15 -0500 Subject: [PATCH 3/3] backend/cuda: adding all-major option for CUDA capabilies We set it as the fallback for CUDA capabilities detection. Also removing all option. Co-authored-by: Jeff Hammond Signed-off-by: Yanfei Guo --- src/backend/cuda/subconfigure.m4 | 41 +++++++++++++++----------------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/src/backend/cuda/subconfigure.m4 b/src/backend/cuda/subconfigure.m4 index 0d374cc1..522067d8 100644 --- a/src/backend/cuda/subconfigure.m4 +++ b/src/backend/cuda/subconfigure.m4 @@ -14,7 +14,7 @@ AC_ARG_WITH([cuda-sm], --with-cuda-sm= (https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/) Comma-separated list of below options: auto - automatically build compatibility for all GPUs visible, any other specified compatibilities are ignored - all - build compatibility for all GPUs supported by the CUDA version (can increase compilation time) + all-major - build compatibility for all major GPU versions (sm_*0) supported by the CUDA version # Kepler architecture kepler - build compatibility for all Kepler GPUs @@ -206,7 +206,7 @@ if test "${have_cuda}" = "yes" ; then AC_MSG_RESULT([yes]) ], [ - with_cuda_sm=all + with_cuda_sm=all-major AC_MSG_RESULT([no]) ] ) @@ -221,34 +221,31 @@ if test "${have_cuda}" = "yes" ; then IFS="," for sm in ${with_cuda_sm} ; do case "$sm" in - all) - if test ${cuda_version} -ge 12000 ; then - # maxwell (52) to hopper (90a) - supported_cuda_sms="52 53 60 61 62 70 72 75 80 86 87 89 90 90a" - elif test ${cuda_version} -ge 11080 ; then - # maxwell (52) to ada (89) and hopper (90) - supported_cuda_sms="52 53 60 61 62 70 72 75 80 86 87 89 90" + all-major) + if test ${cuda_version} -ge 11080 ; then + # maxwell (52) to hopper (90) + supported_cuda_sms="52 60 70 80 90" elif test ${cuda_version} -ge 11010 ; then - # maxwell (52) to ampere (86) - supported_cuda_sms="52 53 60 61 62 70 72 75 80 86" + # maxwell (52) to ampere (80) + supported_cuda_sms="52 60 70 80" elif test ${cuda_version} -ge 11000 ; then # maxwell (52) to ampere (80) - supported_cuda_sms="52 53 60 61 62 70 72 75 80" + supported_cuda_sms="52 60 70 80" elif test ${cuda_version} -ge 10000 ; then - # kepler (30) to turing (75) - supported_cuda_sms="30 35 37 50 52 53 60 61 62 70 72 75" + # kepler (30) to volta (70) + supported_cuda_sms="30 50 60 70" elif test ${cuda_version} -ge 9000 ; then - # kepler (30) to volta (72) - supported_cuda_sms="30 35 37 50 52 53 60 61 62 70 72" + # kepler (30) to volta (70) + supported_cuda_sms="30 50 60 70" elif test ${cuda_version} -ge 8000 ; then - # kepler (30) to pascal (62) - supported_cuda_sms="30 35 37 50 52 53 60 61 62" + # kepler (30) to pascal (60) + supported_cuda_sms="30 50 60" elif test ${cuda_version} -ge 6000 ; then - # kepler (30) to maxwell (53) - supported_cuda_sms="30 35 37 50 52 53" + # kepler (30) to maxwell (50) + supported_cuda_sms="30 50" elif test ${cuda_version} -ge 5000 ; then - # kepler (30) to kepler (37) - supported_cuda_sms="30 35 37" + # kepler (30) + supported_cuda_sms="30" fi for supported_cuda_sm in $supported_cuda_sms ; do