Skip to content

Commit

Permalink
Merge pull request #246 from yfguo/cuda-arch-auto
Browse files Browse the repository at this point in the history
Adding auto CUDA compute capability detection
  • Loading branch information
yfguo authored Sep 28, 2023
2 parents b6b38c8 + 49a732b commit 911849b
Showing 1 changed file with 90 additions and 19 deletions.
109 changes: 90 additions & 19 deletions src/backend/cuda/subconfigure.m4
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ AC_ARG_WITH([cuda-sm],
[
--with-cuda-sm=<options> (https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/)
Comma-separated list of below options:
all - build compatibility for all GPUs supported by the CUDA version (can increase compilation time)
auto - automatically build compatibility for all GPUs visible, any other specified compatibilities are ignored
all-major - build compatibility for all major GPU versions (sm_*0) supported by the CUDA version
# Kepler architecture
kepler - build compatibility for all Kepler GPUs
Expand Down Expand Up @@ -49,11 +50,21 @@ AC_ARG_WITH([cuda-sm],
ampere - build compatibility for all Ampere GPUs
80 - A100, A30
86 - RTX Ampere, MX570, A40, A16, A10, A2
87 - Jetson AGX Orin and Drive AGX Orin
# Ada architecture
ada - build compatibility for all Ada GPUs
89 - GeForce RTX 4090, RTX 4080, RTX 6000, Tesla L40
# Hopper architecture
hopper - build compatibility for all Hopper GPUs
90 - NVIDIA H100 (GH100)
90a - add acceleration for features like wgmma and setmaxnreg. Required for NVIDIA CUTLASS
# Other
<numeric> - specific SM numeric to use
],,
[with_cuda_sm=all])
[with_cuda_sm=auto])


# --with-cuda
Expand Down Expand Up @@ -150,40 +161,91 @@ fi
##########################################################################

if test "${have_cuda}" = "yes" ; then
for version in 11010 11000 10000 9000 8000 7000 6000 5000 ; do
for version in 12000 11080 11050 11010 11000 10000 9000 8000 7000 6000 5000 ; do
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([
#include <cuda.h>
int x[[CUDA_VERSION - $version]];
],)],[cuda_version=${version}],[])
if test ! -z ${cuda_version} ; then break ; fi
done

CUDA_SM=
case "$with_cuda_sm" in
*auto*)
dnl process auto detection
PAC_PUSH_FLAG([IFS])
IFS=" "
AC_MSG_CHECKING([for CUDA compute capability auto detection])
AC_LANG_PUSH([C])
AC_RUN_IFELSE(
[AC_LANG_PROGRAM(
[
#include <cuda_runtime.h>
#include <stdio.h>
],
[
int count = 0;
if (cudaSuccess != cudaGetDeviceCount(&count)) return -1;
if (count == 0) return -1;
for (int device = 0; device < count; ++device)
{
struct cudaDeviceProp prop;
if (cudaSuccess == cudaGetDeviceProperties(&prop, device))
printf("%d.%d ", prop.major, prop.minor);
}
return 0;
]
)],
[
cuda_output=$(./conftest$EXEEXT | xargs -n1 | sort -u | xargs)
for sm in $cuda_output; do
sm_no_decimal=`echo $sm | tr -d '.'`
PAC_APPEND_FLAG([$sm_no_decimal],[CUDA_SM])
done
with_cuda_sm=
AC_MSG_RESULT([yes])
],
[
with_cuda_sm=all-major
AC_MSG_RESULT([no])
]
)
AC_LANG_POP([C])
PAC_POP_FLAG([IFS])
;;
*)
;;
esac

PAC_PUSH_FLAG([IFS])
IFS=","
CUDA_SM=
for sm in ${with_cuda_sm} ; do
case "$sm" in
all)
if test ${cuda_version} -ge 11010 ; then
# maxwell (52) to ampere (86)
supported_cuda_sms="52 53 60 61 62 70 72 75 80 86"
all-major)
if test ${cuda_version} -ge 11080 ; then
# maxwell (52) to hopper (90)
supported_cuda_sms="52 60 70 80 90"
elif test ${cuda_version} -ge 11010 ; then
# maxwell (52) to ampere (80)
supported_cuda_sms="52 60 70 80"
elif test ${cuda_version} -ge 11000 ; then
# maxwell (52) to ampere (80)
supported_cuda_sms="52 53 60 61 62 70 72 75 80"
supported_cuda_sms="52 60 70 80"
elif test ${cuda_version} -ge 10000 ; then
# kepler (30) to turing (75)
supported_cuda_sms="30 35 37 50 52 53 60 61 62 70 72 75"
# kepler (30) to volta (70)
supported_cuda_sms="30 50 60 70"
elif test ${cuda_version} -ge 9000 ; then
# kepler (30) to volta (72)
supported_cuda_sms="30 35 37 50 52 53 60 61 62 70 72"
# kepler (30) to volta (70)
supported_cuda_sms="30 50 60 70"
elif test ${cuda_version} -ge 8000 ; then
# kepler (30) to pascal (62)
supported_cuda_sms="30 35 37 50 52 53 60 61 62"
# kepler (30) to pascal (60)
supported_cuda_sms="30 50 60"
elif test ${cuda_version} -ge 6000 ; then
# kepler (30) to maxwell (53)
supported_cuda_sms="30 35 37 50 52 53"
# kepler (30) to maxwell (50)
supported_cuda_sms="30 50"
elif test ${cuda_version} -ge 5000 ; then
# kepler (30) to kepler (37)
supported_cuda_sms="30 35 37"
# kepler (30)
supported_cuda_sms="30"
fi

for supported_cuda_sm in $supported_cuda_sms ; do
Expand Down Expand Up @@ -223,6 +285,15 @@ if test "${have_cuda}" = "yes" ; then
PAC_APPEND_FLAG([86],[CUDA_SM])
;;

ada)
PAC_APPEND_FLAG([89],[CUDA_SM])
;;

hopper)
PAC_APPEND_FLAG([90],[CUDA_SM])
PAC_APPEND_FLAG([90a],[CUDA_SM])
;;

none)
;;

Expand Down

0 comments on commit 911849b

Please sign in to comment.