From 4cc8e294a8c2e9f34ac9c195578f94c9c5c12e34 Mon Sep 17 00:00:00 2001 From: Jade Abraham Date: Wed, 25 Sep 2024 15:14:34 -0500 Subject: [PATCH 01/21] expose CHPL_GPU_SDK_VERSION Signed-off-by: Jade Abraham --- modules/standard/ChplConfig.chpl | 6 ++++++ runtime/Makefile | 13 +++++++++++++ util/chplenv/chpl_gpu.py | 5 +++-- util/chplenv/printchplenv.py | 4 ++++ 4 files changed, 26 insertions(+), 2 deletions(-) diff --git a/modules/standard/ChplConfig.chpl b/modules/standard/ChplConfig.chpl index 4963bb5479b1..cda29ca67483 100644 --- a/modules/standard/ChplConfig.chpl +++ b/modules/standard/ChplConfig.chpl @@ -189,6 +189,12 @@ module ChplConfig { param CHPL_GPU:string; CHPL_GPU = __primitive("get compiler variable", "CHPL_GPU"); + @chpldoc.nodoc + @unstable("'ChplConfig.CHPL_GPU_SDK_VERSION' is unstable and may be replaced with a different way to access this information in the future") + param CHPL_GPU_SDK_VERSION:string; + CHPL_GPU_SDK_VERSION = __primitive("get compiler variable", "CHPL_GPU_SDK_VERSION"); + + @chpldoc.nodoc @unstable("'ChplConfig.CHPL_LIB_PIC' is unstable and may be replaced with a different way to access this information in the future") param CHPL_LIB_PIC: string; diff --git a/runtime/Makefile b/runtime/Makefile index 9d422e075ced..9987fd27d922 100644 --- a/runtime/Makefile +++ b/runtime/Makefile @@ -64,6 +64,19 @@ $(CHPL_ENV_HEADER): $(CHPL_MAKE_HOME)/util/printchplenv $(CHPL_MAKE_HOME)/util/c sed 's/^ *//;s/ *$$//' | \ sed 's/[^0-9A-Za-z]/_/g' | \ awk '{ print "#define " toupper($$1) }' >> $(CHPL_ENV_HEADER) + + @$(CHPL_MAKE_HOME)/util/printchplenv --only CHPL_GPU_SDK_VERSION --value | \ + grep -q none && \ + echo "#define CHPL_GPU_SDK_VERSION_MAJOR 0" >> $(CHPL_ENV_HEADER) && \ + echo "#define CHPL_GPU_SDK_VERSION_MINOR 0" >> $(CHPL_ENV_HEADER) && \ + echo "#define CHPL_GPU_SDK_VERSION_PATCH 0" >> $(CHPL_ENV_HEADER) \ + || \ + $(CHPL_MAKE_HOME)/util/printchplenv --only CHPL_GPU_SDK_VERSION --value | \ + sed 's/\./ /g' | \ + awk '{ print "#define CHPL_GPU_SDK_VERSION_MAJOR " $$1; \ + print "#define CHPL_GPU_SDK_VERSION_MINOR " $$2; \ + print "#define CHPL_GPU_SDK_VERSION_PATCH " $$3 }' >> $(CHPL_ENV_HEADER) + @echo "#endif /* _CHPL_ENV_GEN_H_ */" >> $(CHPL_ENV_HEADER) THIRD_PARTY_PKGS = $(shell $(CHPL_MAKE_PYTHON) $(CHPL_MAKE_HOME)/util/chplenv/third-party-pkgs) diff --git a/util/chplenv/chpl_gpu.py b/util/chplenv/chpl_gpu.py index f7e2a79dd96c..dd3f3286ef86 100644 --- a/util/chplenv/chpl_gpu.py +++ b/util/chplenv/chpl_gpu.py @@ -367,8 +367,7 @@ def get_sdk_version(): if match: rocm_version = match.group(1) return rocm_version - - if get() == 'nvidia': + elif get() == 'nvidia': chpl_cuda_path = get_sdk_path('nvidia') version_file_json = '%s/version.json' % chpl_cuda_path version_file_txt = '%s/version.txt' % chpl_cuda_path @@ -392,6 +391,8 @@ def get_sdk_version(): if match: cuda_version = match.group(1) return cuda_version + else: + return 'none' def _validate_rocm_version_impl(): diff --git a/util/chplenv/printchplenv.py b/util/chplenv/printchplenv.py index d78c840d1e86..1ba9cc2323ef 100755 --- a/util/chplenv/printchplenv.py +++ b/util/chplenv/printchplenv.py @@ -101,6 +101,7 @@ ChapelEnv('CHPL_TARGET_BACKEND_CPU', INTERNAL), ChapelEnv('CHPL_LOCALE_MODEL', RUNTIME | LAUNCHER | DEFAULT, 'loc'), ChapelEnv(' CHPL_GPU', RUNTIME | DEFAULT, 'gpu'), + ChapelEnv(' CHPL_GPU_SDK_VERSION', RUNTIME), ChapelEnv(' CHPL_GPU_ARCH', INTERNAL), ChapelEnv(' CHPL_GPU_MEM_STRATEGY', RUNTIME , 'gpu_mem' ), ChapelEnv(' CHPL_CUDA_PATH', INTERNAL), @@ -205,6 +206,7 @@ def compute_all_values(): ENV_VALS['CHPL_LOCALE_MODEL'] = chpl_locale_model.get() ENV_VALS[' CHPL_GPU'] = chpl_gpu.get() + ENV_VALS[' CHPL_GPU_SDK_VERSION'] = chpl_gpu.get_sdk_version() ENV_VALS[' CHPL_CUDA_LIBDEVICE_PATH'] = chpl_gpu.get_cuda_libdevice_path() ENV_VALS[' CHPL_GPU_MEM_STRATEGY'] = chpl_gpu.get_gpu_mem_strategy() ENV_VALS['CHPL_COMM'] = chpl_comm.get() @@ -379,6 +381,8 @@ def filter_tidy(chpl_env): return gpu == 'amd' elif chpl_env.name == ' CHPL_GPU_ARCH': return gpu == 'nvidia' or gpu == 'amd' + elif chpl_env.name == ' CHPL_GPU_SDK_VERSION': + return gpu != 'none' elif chpl_env.name == ' CHPL_HOST_JEMALLOC': return host_mem == 'jemalloc' elif chpl_env.name == ' CHPL_TARGET_JEMALLOC': From 9b5d8964e620eb47b56d4fa028956d220a80f718 Mon Sep 17 00:00:00 2001 From: Jade Abraham Date: Wed, 25 Sep 2024 15:14:47 -0500 Subject: [PATCH 02/21] use CHPL_GPU_SDK_VERSION_MAJOR Signed-off-by: Jade Abraham --- runtime/include/gpu/amd/rocm-version.h | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/runtime/include/gpu/amd/rocm-version.h b/runtime/include/gpu/amd/rocm-version.h index 39a25bc7c0c7..8da037d82da0 100644 --- a/runtime/include/gpu/amd/rocm-version.h +++ b/runtime/include/gpu/amd/rocm-version.h @@ -21,20 +21,6 @@ #ifndef __HIP_PLATFORM_AMD__ #define __HIP_PLATFORM_AMD__ #endif -#include -#include -#if __has_include() // 5.x wants this -#include -#elif __has_include() // 4.x wants this -#include -#elif __has_include() // Deprecated. 5.x used to want this -#include -#endif - -// we should have found the correct header by now. But if not, we set -// ROCM_VERSION_MAJOR to 4 as it is the lowest version we support and has fewer -// runtime features enabled. So, it is a safer choice. -#if !defined(ROCM_VERSION_MAJOR) -#define ROCM_VERSION_MAJOR 4 -#endif +// CHPL_GPU_SDK_VERSION_MAJOR is determined by printchplenv +#define ROCM_VERSION_MAJOR CHPL_GPU_SDK_VERSION_MAJOR From ad12c53753d5915e5b1e0793c836bc44540712fa Mon Sep 17 00:00:00 2001 From: Jade Abraham Date: Wed, 25 Sep 2024 16:42:43 -0500 Subject: [PATCH 03/21] cleanup makefile Signed-off-by: Jade Abraham --- runtime/Makefile | 2 -- 1 file changed, 2 deletions(-) diff --git a/runtime/Makefile b/runtime/Makefile index 9987fd27d922..4526b28b6520 100644 --- a/runtime/Makefile +++ b/runtime/Makefile @@ -64,7 +64,6 @@ $(CHPL_ENV_HEADER): $(CHPL_MAKE_HOME)/util/printchplenv $(CHPL_MAKE_HOME)/util/c sed 's/^ *//;s/ *$$//' | \ sed 's/[^0-9A-Za-z]/_/g' | \ awk '{ print "#define " toupper($$1) }' >> $(CHPL_ENV_HEADER) - @$(CHPL_MAKE_HOME)/util/printchplenv --only CHPL_GPU_SDK_VERSION --value | \ grep -q none && \ echo "#define CHPL_GPU_SDK_VERSION_MAJOR 0" >> $(CHPL_ENV_HEADER) && \ @@ -76,7 +75,6 @@ $(CHPL_ENV_HEADER): $(CHPL_MAKE_HOME)/util/printchplenv $(CHPL_MAKE_HOME)/util/c awk '{ print "#define CHPL_GPU_SDK_VERSION_MAJOR " $$1; \ print "#define CHPL_GPU_SDK_VERSION_MINOR " $$2; \ print "#define CHPL_GPU_SDK_VERSION_PATCH " $$3 }' >> $(CHPL_ENV_HEADER) - @echo "#endif /* _CHPL_ENV_GEN_H_ */" >> $(CHPL_ENV_HEADER) THIRD_PARTY_PKGS = $(shell $(CHPL_MAKE_PYTHON) $(CHPL_MAKE_HOME)/util/chplenv/third-party-pkgs) From 480f84fc7a7c3e648111841b32c34985802bd78a Mon Sep 17 00:00:00 2001 From: Jade Abraham Date: Wed, 25 Sep 2024 16:43:49 -0500 Subject: [PATCH 04/21] add strip to version checking Signed-off-by: Jade Abraham --- util/chplenv/chpl_gpu.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/util/chplenv/chpl_gpu.py b/util/chplenv/chpl_gpu.py index dd3f3286ef86..819b702517b2 100644 --- a/util/chplenv/chpl_gpu.py +++ b/util/chplenv/chpl_gpu.py @@ -341,6 +341,7 @@ def _validate_cuda_version_impl(): return True def get_sdk_version(): + version = 'none' if get() == 'amd': chpl_rocm_path = get_sdk_path('amd', sdk_type='include') files_to_try = ['%s/.info/version-hiprt' % chpl_rocm_path, @@ -366,7 +367,7 @@ def get_sdk_version(): match = re.search(r"llvm-amdgpu-([\d\.]+)", my_stdout) if match: rocm_version = match.group(1) - return rocm_version + version = rocm_version elif get() == 'nvidia': chpl_cuda_path = get_sdk_path('nvidia') version_file_json = '%s/version.json' % chpl_cuda_path @@ -390,9 +391,9 @@ def get_sdk_version(): match = re.search(pattern, my_stdout) if match: cuda_version = match.group(1) - return cuda_version - else: - return 'none' + version = cuda_version + version = version.strip() if version is not None else 'none' + return version def _validate_rocm_version_impl(): From fe47d2bcd395d2a92f8620e3afde12e38ef85a12 Mon Sep 17 00:00:00 2001 From: Jade Abraham Date: Wed, 25 Sep 2024 16:48:26 -0500 Subject: [PATCH 05/21] remove hsa-runtime64 Signed-off-by: Jade Abraham --- util/chplenv/chpl_gpu.py | 6 ------ util/chplenv/compile_link_args_utils.py | 4 ---- util/chplenv/overrides.py | 1 - util/chplenv/printchplenv.py | 4 +--- 4 files changed, 1 insertion(+), 14 deletions(-) diff --git a/util/chplenv/chpl_gpu.py b/util/chplenv/chpl_gpu.py index 819b702517b2..1a937446c23d 100644 --- a/util/chplenv/chpl_gpu.py +++ b/util/chplenv/chpl_gpu.py @@ -19,7 +19,6 @@ class gpu_type: def __init__(self, sdk_path_env, sdk_path_env_bitcode, sdk_path_env_include, - sdk_path_env_runtime, compiler, default_arch, llvm_target, @@ -29,7 +28,6 @@ def __init__(self, sdk_path_env, self.sdk_path_env = sdk_path_env self.sdk_path_env_bitcode = sdk_path_env_bitcode self.sdk_path_env_include = sdk_path_env_include - self.sdk_path_env_runtime = sdk_path_env_runtime self.compiler = compiler self.default_arch = default_arch self.llvm_target = llvm_target @@ -54,7 +52,6 @@ def _validate_rocm_llvm_version(gpu: gpu_type): "nvidia": gpu_type(sdk_path_env="CHPL_CUDA_PATH", sdk_path_env_bitcode="CHPL_CUDA_PATH", sdk_path_env_include="CHPL_CUDA_PATH", - sdk_path_env_runtime="CHPL_CUDA_PATH", compiler="nvcc", default_arch="sm_60", llvm_target="NVPTX", @@ -64,7 +61,6 @@ def _validate_rocm_llvm_version(gpu: gpu_type): "amd": gpu_type(sdk_path_env="CHPL_ROCM_PATH", sdk_path_env_bitcode="CHPL_ROCM_BITCODE_PATH", sdk_path_env_include="CHPL_ROCM_INCLUDE_PATH", - sdk_path_env_runtime="CHPL_ROCM_RUNTIME_PATH", compiler="hipcc", default_arch="", llvm_target="AMDGPU", @@ -74,7 +70,6 @@ def _validate_rocm_llvm_version(gpu: gpu_type): "cpu": gpu_type(sdk_path_env="", sdk_path_env_bitcode="", sdk_path_env_include="", - sdk_path_env_runtime="", compiler="", default_arch="", llvm_target="", @@ -174,7 +169,6 @@ def get_sdk_path(for_gpu, sdk_type='bitcode'): sub_env_names = { "bitcode": gpu.sdk_path_env_bitcode, "include": gpu.sdk_path_env_include, - "runtime": gpu.sdk_path_env_runtime } assert sdk_type in sub_env_names diff --git a/util/chplenv/compile_link_args_utils.py b/util/chplenv/compile_link_args_utils.py index f7d4abff846f..2fd5238391f7 100644 --- a/util/chplenv/compile_link_args_utils.py +++ b/util/chplenv/compile_link_args_utils.py @@ -108,15 +108,11 @@ def get_runtime_link_args(runtime_subdir): system.append("-lcuda") elif gpu_type == "amd": paths = [sdk_path] - runtime_path = chpl_gpu.get_sdk_path(gpu_type, sdk_type='runtime') - if runtime_path not in paths: - paths.append(runtime_path) for p in paths: lib_path = os.path.join(p, "lib") system.append("-L" + lib_path) system.append("-Wl,-rpath," + lib_path) system.append("-lamdhip64") - system.append("-lhsa-runtime64") # always link with the math library system.append("-lm") diff --git a/util/chplenv/overrides.py b/util/chplenv/overrides.py index 14d58dc01730..352dc8508f43 100755 --- a/util/chplenv/overrides.py +++ b/util/chplenv/overrides.py @@ -39,7 +39,6 @@ 'CHPL_ROCM_PATH', 'CHPL_ROCM_BITCODE_PATH', 'CHPL_ROCM_INCLUDE_PATH', - 'CHPL_ROCM_RUNTIME_PATH' 'CHPL_GPU_ARCH', 'CHPL_COMM', diff --git a/util/chplenv/printchplenv.py b/util/chplenv/printchplenv.py index 1ba9cc2323ef..0ba0c312ddb7 100755 --- a/util/chplenv/printchplenv.py +++ b/util/chplenv/printchplenv.py @@ -108,7 +108,6 @@ ChapelEnv(' CHPL_ROCM_PATH', INTERNAL), ChapelEnv(' CHPL_ROCM_BITCODE_PATH', INTERNAL), ChapelEnv(' CHPL_ROCM_INCLUDE_PATH', INTERNAL), - ChapelEnv(' CHPL_ROCM_RUNTIME_PATH', INTERNAL), ChapelEnv(' CHPL_CUDA_LIBDEVICE_PATH', INTERNAL), ChapelEnv('CHPL_COMM', RUNTIME | LAUNCHER | DEFAULT, 'comm'), ChapelEnv(' CHPL_COMM_SUBSTRATE', RUNTIME | LAUNCHER | DEFAULT), @@ -321,7 +320,6 @@ def compute_internal_values(): ENV_VALS[' CHPL_ROCM_PATH'] = chpl_gpu.get_sdk_path("amd") ENV_VALS[' CHPL_ROCM_BITCODE_PATH'] = chpl_gpu.get_sdk_path("amd", sdk_type="bitcode") ENV_VALS[' CHPL_ROCM_INCLUDE_PATH'] = chpl_gpu.get_sdk_path("amd", sdk_type="include") - ENV_VALS[' CHPL_ROCM_RUNTIME_PATH'] = chpl_gpu.get_sdk_path("amd", sdk_type="runtime") @@ -377,7 +375,7 @@ def filter_tidy(chpl_env): return gpu == 'nvidia' elif chpl_env.name == ' CHPL_ROCM_PATH': return gpu == 'amd' - elif chpl_env.name in (' CHPL_ROCM_BITCODE_PATH', ' CHPL_ROCM_INCLUDE_PATH', ' CHPL_ROCM_RUNTIME_PATH'): + elif chpl_env.name in (' CHPL_ROCM_BITCODE_PATH', ' CHPL_ROCM_INCLUDE_PATH'): return gpu == 'amd' elif chpl_env.name == ' CHPL_GPU_ARCH': return gpu == 'nvidia' or gpu == 'amd' From f319c79f1f0332bd7fdc38bbb56261e1a6075df2 Mon Sep 17 00:00:00 2001 From: Jade Abraham Date: Mon, 7 Oct 2024 10:27:07 -0500 Subject: [PATCH 06/21] refactor compile args for gpus Signed-off-by: Jade Abraham --- util/chplenv/chpl_gpu.py | 60 +++++++++++++++++++++++++ util/chplenv/compile_link_args_utils.py | 49 +++----------------- util/chplenv/third_party_utils.py | 1 + 3 files changed, 67 insertions(+), 43 deletions(-) diff --git a/util/chplenv/chpl_gpu.py b/util/chplenv/chpl_gpu.py index 1a937446c23d..901b01d8fb7c 100644 --- a/util/chplenv/chpl_gpu.py +++ b/util/chplenv/chpl_gpu.py @@ -3,6 +3,7 @@ import glob import json import chpl_locale_model +import chpl_platform import chpl_llvm import chpl_compiler import re @@ -230,6 +231,65 @@ def get_gpu_mem_strategy(): return memtype return "array_on_device" +def get_runtime_compile_args(): + if chpl_locale_model.get() != 'gpu': + return [], [] + bundled = [] + system = [] + + gpu_type = get() + sdk_path = get_sdk_path(gpu_type, sdk_type='include') + incl = chpl_home_utils.get_chpl_runtime_incl() + + # this -D is needed since it affects code inside of headers + bundled.append("-DHAS_GPU_LOCALE") + if gpu_type == "cpu": + bundled.append("-DGPU_RUNTIME_CPU") + + # If compiling for GPU locales, add CUDA runtime headers to include path + bundled.append("-I" + os.path.join(incl, "gpu", gpu_type)) + if gpu_type == "nvidia": + system.append("-I" + os.path.join(sdk_path, "include")) + + # workaround an issue with __float128 not being supported by clang in device code + system.append("-D__STRICT_ANSI__=1") + + elif gpu_type == "amd": + # -isystem instead of -I silences warnings from inside these includes. + system.append("-isystem" + os.path.join(sdk_path, "include")) + system.append("-isystem" + os.path.join(sdk_path, "hip", "include")) + + return bundled, system + +def get_runtime_link_args(): + if chpl_locale_model.get() != 'gpu': + return [], [] + bundled = [] + system = [] + + gpu_type = get() + sdk_path = get_sdk_path(gpu_type, sdk_type='include') + + if gpu_type == "nvidia": + system.append("-L" + os.path.join(sdk_path, "lib64")) + system.append("-lcudart") + if chpl_platform.is_wsl(): + # WSL needs to link with libcuda that belongs to the driver hosted in Windows + system.append("-L" + os.path.join("/usr", "lib", "wsl", "lib")) + system.append("-lcuda") + elif gpu_type == "amd": + paths = [sdk_path] + for p in paths: + lib_path = os.path.join(p, "lib") + system.append("-L" + lib_path) + system.append("-Wl,-rpath," + lib_path) + system.append("-lamdhip64") + + return bundled, system + + + + def get_cuda_libdevice_path(): if get() == 'nvidia': diff --git a/util/chplenv/compile_link_args_utils.py b/util/chplenv/compile_link_args_utils.py index 2fd5238391f7..ff2353f4b2b6 100644 --- a/util/chplenv/compile_link_args_utils.py +++ b/util/chplenv/compile_link_args_utils.py @@ -50,29 +50,9 @@ def get_runtime_includes_and_defines(): # this is needed since it affects code inside of headers bundled.append("-DCHPL_COMM_DEBUG") - if locale_model == "gpu": - # this -D is needed since it affects code inside of headers - bundled.append("-DHAS_GPU_LOCALE") - if chpl_gpu.get() == "cpu": - bundled.append("-DGPU_RUNTIME_CPU") - memtype = chpl_gpu.get_gpu_mem_strategy() - - # If compiling for GPU locales, add CUDA runtime headers to include path - gpu_type = chpl_gpu.get() - sdk_path = chpl_gpu.get_sdk_path(gpu_type, sdk_type='include') - - bundled.append("-I" + os.path.join(incl, "gpu", chpl_gpu.get())) - if gpu_type == "nvidia": - system.append("-I" + os.path.join(sdk_path, "include")) - - # workaround an issue with __float128 not being supported by clang in device code - system.append("-D__STRICT_ANSI__=1") - - elif gpu_type == "amd": - # -isystem instead of -I silences warnings from inside these includes. - system.append("-isystem" + os.path.join(sdk_path, "include")) - system.append("-isystem" + os.path.join(sdk_path, "hip", "include")) - system.append("-isystem" + os.path.join(sdk_path, "hsa", "include")) + gpu_bundled, gpu_system = chpl_gpu.get_runtime_includes_and_defines() + bundled.extend(gpu_bundled) + system.extend(gpu_system) if mem == "jemalloc" and chpl_jemalloc.get('target') == "bundled": # set -DCHPL_JEMALLOC_PREFIX=chpl_je_ @@ -89,30 +69,13 @@ def get_runtime_link_args(runtime_subdir): system = [ ] lib = chpl_home_utils.get_chpl_runtime_lib() - locale_model = chpl_locale_model.get() bundled.append("-L" + os.path.join(lib, runtime_subdir)) bundled.append("-lchpl") - if locale_model == "gpu": - # If compiling for GPU locales, add CUDA to link path, - # and add cuda libraries - gpu_type = chpl_gpu.get() - sdk_path = chpl_gpu.get_sdk_path(gpu_type, sdk_type='include') - if gpu_type == "nvidia": - system.append("-L" + os.path.join(sdk_path, "lib64")) - system.append("-lcudart") - if chpl_platform.is_wsl(): - # WSL needs to link with libcuda that belongs to the driver hosted in Windows - system.append("-L" + os.path.join("/usr", "lib", "wsl", "lib")) - system.append("-lcuda") - elif gpu_type == "amd": - paths = [sdk_path] - for p in paths: - lib_path = os.path.join(p, "lib") - system.append("-L" + lib_path) - system.append("-Wl,-rpath," + lib_path) - system.append("-lamdhip64") + gpu_bundled, gpu_system = chpl_gpu.get_runtime_link_args() + bundled.extend(gpu_bundled) + system.extend(gpu_system) # always link with the math library system.append("-lm") diff --git a/util/chplenv/third_party_utils.py b/util/chplenv/third_party_utils.py index 0f0a2b332b60..05490e9e5809 100644 --- a/util/chplenv/third_party_utils.py +++ b/util/chplenv/third_party_utils.py @@ -471,3 +471,4 @@ def could_not_find_pkgconfig_pkg(pkg, envname): else: install_str = " with `brew install {0}`".format(pkg) if homebrew_utils.homebrew_exists() else "" error("Could not find a suitable {0} installation. Please install {0}{1} or set {2}=bundled.".format(pkg, install_str, envname)) + From 3f2477ccbe09c24a8e5ee1ee3c8383a387551111 Mon Sep 17 00:00:00 2001 From: Jade Abraham Date: Mon, 7 Oct 2024 10:27:33 -0500 Subject: [PATCH 07/21] remove CHPL_ROCM_PATH subvars Signed-off-by: Jade Abraham --- compiler/main/driver.cpp | 2 +- util/chplenv/overrides.py | 2 -- util/chplenv/printchplenv.py | 6 ------ 3 files changed, 1 insertion(+), 9 deletions(-) diff --git a/compiler/main/driver.cpp b/compiler/main/driver.cpp index b9224b2fa2f7..17a0d6faf7c4 100644 --- a/compiler/main/driver.cpp +++ b/compiler/main/driver.cpp @@ -1831,7 +1831,7 @@ static void setChapelEnvs() { gGpuSdkPath = envMap["CHPL_CUDA_PATH"]; break; case GpuCodegenType::GPU_CG_AMD_HIP: - gGpuSdkPath = envMap["CHPL_ROCM_BITCODE_PATH"]; + gGpuSdkPath = envMap["CHPL_ROCM_PATH"]; break; case GpuCodegenType::GPU_CG_CPU: gGpuSdkPath = ""; diff --git a/util/chplenv/overrides.py b/util/chplenv/overrides.py index 352dc8508f43..c8a218c10e0c 100755 --- a/util/chplenv/overrides.py +++ b/util/chplenv/overrides.py @@ -37,8 +37,6 @@ 'CHPL_GPU_MEM_STRATEGY', 'CHPL_CUDA_PATH', 'CHPL_ROCM_PATH', - 'CHPL_ROCM_BITCODE_PATH', - 'CHPL_ROCM_INCLUDE_PATH', 'CHPL_GPU_ARCH', 'CHPL_COMM', diff --git a/util/chplenv/printchplenv.py b/util/chplenv/printchplenv.py index 0ba0c312ddb7..d74bc77c3f0e 100755 --- a/util/chplenv/printchplenv.py +++ b/util/chplenv/printchplenv.py @@ -106,8 +106,6 @@ ChapelEnv(' CHPL_GPU_MEM_STRATEGY', RUNTIME , 'gpu_mem' ), ChapelEnv(' CHPL_CUDA_PATH', INTERNAL), ChapelEnv(' CHPL_ROCM_PATH', INTERNAL), - ChapelEnv(' CHPL_ROCM_BITCODE_PATH', INTERNAL), - ChapelEnv(' CHPL_ROCM_INCLUDE_PATH', INTERNAL), ChapelEnv(' CHPL_CUDA_LIBDEVICE_PATH', INTERNAL), ChapelEnv('CHPL_COMM', RUNTIME | LAUNCHER | DEFAULT, 'comm'), ChapelEnv(' CHPL_COMM_SUBSTRATE', RUNTIME | LAUNCHER | DEFAULT), @@ -318,8 +316,6 @@ def compute_internal_values(): ENV_VALS[' CHPL_GPU_ARCH'] = chpl_gpu.get_arch() ENV_VALS[' CHPL_CUDA_PATH'] = chpl_gpu.get_sdk_path("nvidia") ENV_VALS[' CHPL_ROCM_PATH'] = chpl_gpu.get_sdk_path("amd") - ENV_VALS[' CHPL_ROCM_BITCODE_PATH'] = chpl_gpu.get_sdk_path("amd", sdk_type="bitcode") - ENV_VALS[' CHPL_ROCM_INCLUDE_PATH'] = chpl_gpu.get_sdk_path("amd", sdk_type="include") @@ -375,8 +371,6 @@ def filter_tidy(chpl_env): return gpu == 'nvidia' elif chpl_env.name == ' CHPL_ROCM_PATH': return gpu == 'amd' - elif chpl_env.name in (' CHPL_ROCM_BITCODE_PATH', ' CHPL_ROCM_INCLUDE_PATH'): - return gpu == 'amd' elif chpl_env.name == ' CHPL_GPU_ARCH': return gpu == 'nvidia' or gpu == 'amd' elif chpl_env.name == ' CHPL_GPU_SDK_VERSION': From ae2f14b8288e5df4b40d6be26a787759a25d85b3 Mon Sep 17 00:00:00 2001 From: Jade Abraham Date: Mon, 7 Oct 2024 10:27:45 -0500 Subject: [PATCH 08/21] add combine_output Signed-off-by: Jade Abraham --- util/chplenv/utils.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/util/chplenv/utils.py b/util/chplenv/utils.py index a9175a4a6b84..d945be0c0125 100644 --- a/util/chplenv/utils.py +++ b/util/chplenv/utils.py @@ -53,16 +53,18 @@ class CommandError(Exception): pass -def try_run_command(command, cmd_input=None): +def try_run_command(command, cmd_input=None, combine_output=False): """Command subprocess wrapper tolerating failure to find or run the cmd. For normal usage the vanilla run_command() may be simpler to use. This should be the only invocation of subprocess in all chplenv scripts. This could be replaced by subprocess.check_output, but that is only available after Python 2.7, and we still support 2.6 :(""" + + stderr = subprocess.STDOUT if combine_output else subprocess.PIPE try: process = subprocess.Popen(command, stdout=subprocess.PIPE, - stderr=subprocess.PIPE, + stderr=stderr, stdin=subprocess.PIPE) except OSError: return (False, 0, None, None) From 4c2d6f804381128e1afbc6949ab26d0bbae68fe6 Mon Sep 17 00:00:00 2001 From: Jade Abraham Date: Mon, 7 Oct 2024 10:28:16 -0500 Subject: [PATCH 09/21] start refactor Signed-off-by: Jade Abraham --- util/chplenv/chpl_gpu.py | 99 +++++++++++++++++++++++++++++++--------- 1 file changed, 78 insertions(+), 21 deletions(-) diff --git a/util/chplenv/chpl_gpu.py b/util/chplenv/chpl_gpu.py index 901b01d8fb7c..2bb76b5a47c9 100644 --- a/util/chplenv/chpl_gpu.py +++ b/util/chplenv/chpl_gpu.py @@ -8,7 +8,8 @@ import chpl_compiler import re import chpl_tasks -from utils import error, warning, memoize, run_command, which, is_ver_in_range +import chpl_home_utils +from utils import error, warning, memoize, run_command, try_run_command, which, is_ver_in_range def _validate_cuda_version(): return _validate_cuda_version_impl() @@ -48,33 +49,73 @@ def _validate_cuda_llvm_version(gpu: gpu_type): def _validate_rocm_llvm_version(gpu: gpu_type): return _validate_rocm_llvm_version_impl(gpu) +@memoize +def _gpu_compiler_version_output(compiler: str, lang: str): + dummy_main = "int main() { return 0; }" + exists, returncode, stdout, _ = try_run_command([compiler, "-v", "-c", "-x", lang, "-", "-o", "/dev/null"], cmd_input=dummy_main, combine_output=True) + if exists and returncode == 0 and stdout: + return stdout + else: + return None + +def _find_cuda_sdk_path(compiler: str): + out = _gpu_compiler_version_output(compiler, "cu") + # #$ TOP= + + # find lib device: #$ NVVMIR_LIBRARY_DIR= + + +# LLVM AMD GPU +def _find_rocm_sdk_path(compiler: str): + out = _gpu_compiler_version_output(compiler, "hip") + # InstalledDir + +# HIP +def _find_rocm_include_path(compiler: str): + # Found HIP installation + pass + +def _find_cuda_version(compiler: str): + # cuda, we can run nvcc --version + # 'Cuda compilation tools, release' + pass + +def _find_rocm_version(compiler: str): + # hip/amd sucks, we have to guess + # one of the following regexs in the compiler output, look in this order + ' roc-VERSION ' + '/rocm-VERSION/' + '/hip-VERSION/' + '/llvm-amdgpu-VERSION/' + '/rocm/VERSION/' + pass GPU_TYPES = { "nvidia": gpu_type(sdk_path_env="CHPL_CUDA_PATH", - sdk_path_env_bitcode="CHPL_CUDA_PATH", - sdk_path_env_include="CHPL_CUDA_PATH", compiler="nvcc", default_arch="sm_60", llvm_target="NVPTX", runtime_impl="cuda", + find_sdk_path=_find_cuda_sdk_path, + find_version=_find_cuda_version, version_validator=_validate_cuda_version, llvm_validator=_validate_cuda_llvm_version), "amd": gpu_type(sdk_path_env="CHPL_ROCM_PATH", - sdk_path_env_bitcode="CHPL_ROCM_BITCODE_PATH", - sdk_path_env_include="CHPL_ROCM_INCLUDE_PATH", compiler="hipcc", default_arch="", llvm_target="AMDGPU", runtime_impl="rocm", + find_sdk_path=_find_rocm_sdk_path, + find_version=_find_rocm_version, version_validator=_validate_rocm_version, llvm_validator=_validate_rocm_llvm_version), "cpu": gpu_type(sdk_path_env="", - sdk_path_env_bitcode="", - sdk_path_env_include="", compiler="", default_arch="", llvm_target="", runtime_impl="cpu", + find_sdk_path=lambda compiler: None, + find_version=lambda compiler: None, version_validator=lambda: None, llvm_validator=lambda: None), } @@ -157,6 +198,21 @@ def get_arch(): "for more information.".format(gpu_type)) return 'error' +@memoize +def get_gpu_compiler(): + gpu_type = get() + sdk_path = get_sdk_path(gpu_type) + if sdk_path == 'none': + return 'none' + + name = GPU_TYPES[gpu_type].compiler + bin_dir = os.path.join(sdk_path, 'bin') + full_path = os.path.join(bin_dir, name) + if not os.path.exists(full_path): + _reportMissingGpuReq("Could not find {} in {}".format(name, bin_dir)) + return full_path + + @memoize def get_sdk_path(for_gpu, sdk_type='bitcode'): gpu_type = get() @@ -188,20 +244,21 @@ def get_sdk_path(for_gpu, sdk_type='bitcode'): gpu.compiler]) if exists and returncode == 0: - # Walk up from directories from the one containing the gpu compiler - # (e.g. `nvcc` or `hipcc`) until we find a directory that starts with - # `runtime_impl` (e.g `cuda` or `rocm`) - # TODO: this logic does not seem to work for spack - real_path = os.path.realpath(my_stdout.strip()).strip() - path_parts = real_path.split("/") - chpl_sdk_path = "/" - for part in path_parts: - if len(part) == 0: continue - chpl_sdk_path += part - if not part.startswith(gpu.runtime_impl): - chpl_sdk_path += "/" - else: - break + pass + # # Walk up from directories from the one containing the gpu compiler + # # (e.g. `nvcc` or `hipcc`) until we find a directory that starts with + # # `runtime_impl` (e.g `cuda` or `rocm`) + # # TODO: this logic does not seem to work for spack + # real_path = os.path.realpath(my_stdout.strip()).strip() + # path_parts = real_path.split("/") + # chpl_sdk_path = "/" + # for part in path_parts: + # if len(part) == 0: continue + # chpl_sdk_path += part + # if not part.startswith(gpu.runtime_impl): + # chpl_sdk_path += "/" + # else: + # break # validate the SDK path found if not (os.path.exists(chpl_sdk_path) and os.path.isdir(chpl_sdk_path)): From 639cc496d604341e65bb456391a970291703018e Mon Sep 17 00:00:00 2001 From: Jade Abraham Date: Thu, 10 Oct 2024 14:14:24 -0500 Subject: [PATCH 10/21] finish main part of refactor Signed-off-by: Jade Abraham --- compiler/include/driver.h | 2 + compiler/llvm/clangUtil.cpp | 11 +- compiler/main/driver.cpp | 4 + util/chplenv/chpl_gpu.py | 305 +++++++++++++----------- util/chplenv/compile_link_args_utils.py | 2 +- util/chplenv/printchplenv.py | 8 + util/chplenv/utils.py | 5 +- 7 files changed, 182 insertions(+), 155 deletions(-) diff --git a/compiler/include/driver.h b/compiler/include/driver.h index 525cbdb7587b..eb1aeafb225e 100644 --- a/compiler/include/driver.h +++ b/compiler/include/driver.h @@ -153,6 +153,8 @@ extern const char* CHPL_TARGET_BUNDLED_LINK_ARGS; extern const char* CHPL_TARGET_SYSTEM_LINK_ARGS; extern const char* CHPL_CUDA_LIBDEVICE_PATH; +extern const char* CHPL_ROCM_LLVM_PATH; +extern const char* CHPL_ROCM_LIBDEVICE_PATH; extern const char* CHPL_GPU; extern const char* CHPL_GPU_ARCH; diff --git a/compiler/llvm/clangUtil.cpp b/compiler/llvm/clangUtil.cpp index d2730cd13442..2a393021ff89 100644 --- a/compiler/llvm/clangUtil.cpp +++ b/compiler/llvm/clangUtil.cpp @@ -4450,7 +4450,7 @@ static void linkGpuDeviceLibraries() { } else { // See for details // on what these various libraries are. - auto libPath = gGpuSdkPath + std::string("/amdgcn/bitcode"); + auto libPath = CHPL_ROCM_AMDGCN_PATH + std::string("/bitcode"); linkBitCodeFile((libPath + "/hip.bc").c_str()); linkBitCodeFile((libPath + "/ocml.bc").c_str()); linkBitCodeFile((libPath + "/ockl.bc").c_str()); @@ -4821,11 +4821,7 @@ static void makeBinaryLLVMForHIP(const std::string& artifactFilename, std::string inputs = "-inputs=/dev/null"; std::string outputs = "-outputs=" + fatbinFilename; #endif - auto sdkString = std::string(gGpuSdkPath); - // check file exists, maybe use alternate path (say if spack installed) - std::string lldBin = pathExists((sdkString + "/llvm/bin/lld").c_str()) ? - sdkString + "/llvm/bin/lld" : - sdkString + "/bin/lld"; + std::string lldBin = CHPL_ROCM_LLVM_PATH + std::string("/bin/lld"); for (auto& gpuArch : gpuArches) { std::string gpuObject = gpuObjFilename + "_" + gpuArch + ".o"; std::string gpuOut = outFilenamePrefix + "_" + gpuArch + ".out"; @@ -5051,7 +5047,8 @@ void makeBinaryLLVM(void) { gpuArgs += " -Wno-unknown-cuda-version"; } else if (getGpuCodegenType() == GpuCodegenType::GPU_CG_AMD_HIP) { - curPath = addToPATH(gGpuSdkPath + std::string("/llvm/bin")); + // use the AMD LLVM path, use separate var + curPath = addToPATH(CHPL_ROCM_LLVM_PATH + std::string("/bin")); } } diff --git a/compiler/main/driver.cpp b/compiler/main/driver.cpp index 17a0d6faf7c4..d98126bfc94a 100644 --- a/compiler/main/driver.cpp +++ b/compiler/main/driver.cpp @@ -130,6 +130,8 @@ const char* CHPL_TARGET_BUNDLED_LINK_ARGS = NULL; const char* CHPL_TARGET_SYSTEM_LINK_ARGS = NULL; const char* CHPL_CUDA_LIBDEVICE_PATH = NULL; +const char* CHPL_ROCM_LLVM_PATH = NULL; +const char* CHPL_ROCM_AMDGCN_PATH = NULL; const char* CHPL_GPU = NULL; const char* CHPL_GPU_ARCH = NULL; @@ -1824,6 +1826,8 @@ static void setChapelEnvs() { if (usingGpuLocaleModel()) { CHPL_CUDA_LIBDEVICE_PATH = envMap["CHPL_CUDA_LIBDEVICE_PATH"]; + CHPL_ROCM_LLVM_PATH = envMap["CHPL_ROCM_LLVM_PATH"]; + CHPL_ROCM_AMDGCN_PATH = envMap["CHPL_ROCM_AMDGCN_PATH"]; CHPL_GPU= envMap["CHPL_GPU"]; CHPL_GPU_ARCH = envMap["CHPL_GPU_ARCH"]; switch (getGpuCodegenType()) { diff --git a/util/chplenv/chpl_gpu.py b/util/chplenv/chpl_gpu.py index 2bb76b5a47c9..73ab5e7dde7b 100644 --- a/util/chplenv/chpl_gpu.py +++ b/util/chplenv/chpl_gpu.py @@ -19,23 +19,25 @@ def _validate_rocm_version(): class gpu_type: def __init__(self, sdk_path_env, - sdk_path_env_bitcode, - sdk_path_env_include, compiler, default_arch, llvm_target, runtime_impl, + find_sdk_path, + find_version, version_validator, - llvm_validator): + llvm_validator, + real_gpu): self.sdk_path_env = sdk_path_env - self.sdk_path_env_bitcode = sdk_path_env_bitcode - self.sdk_path_env_include = sdk_path_env_include self.compiler = compiler self.default_arch = default_arch self.llvm_target = llvm_target self.runtime_impl = runtime_impl + self.find_sdk_path = find_sdk_path + self.find_version = find_version self.version_validator = version_validator self.llvm_validator = llvm_validator + self.real_gpu = real_gpu def validate_sdk_version(self): return self.version_validator() @@ -50,7 +52,7 @@ def _validate_rocm_llvm_version(gpu: gpu_type): return _validate_rocm_llvm_version_impl(gpu) @memoize -def _gpu_compiler_version_output(compiler: str, lang: str): +def gpu_compiler_basic_compile(compiler: str, lang: str): dummy_main = "int main() { return 0; }" exists, returncode, stdout, _ = try_run_command([compiler, "-v", "-c", "-x", lang, "-", "-o", "/dev/null"], cmd_input=dummy_main, combine_output=True) if exists and returncode == 0 and stdout: @@ -59,36 +61,77 @@ def _gpu_compiler_version_output(compiler: str, lang: str): return None def _find_cuda_sdk_path(compiler: str): - out = _gpu_compiler_version_output(compiler, "cu") - # #$ TOP= + out = gpu_compiler_basic_compile(compiler, "cu") + if not out: + return None + regex = r"^#\$ TOP=(.+)$" + match = re.search(regex, out, re.MULTILINE) + return match.group(1) if match else None - # find lib device: #$ NVVMIR_LIBRARY_DIR= +def find_llvm_amd_bin_path(compiler: str): + out = gpu_compiler_basic_compile(compiler, "hip") + if not out: + return None + regex = r"^InstalledDir: (.+)$" + match = re.search(regex, out, re.MULTILINE) + return match.group(1) if match else None -# LLVM AMD GPU -def _find_rocm_sdk_path(compiler: str): - out = _gpu_compiler_version_output(compiler, "hip") - # InstalledDir +def find_amdgcn_path(compiler: str): + out = gpu_compiler_basic_compile(compiler, "hip") + if not out: + return None + # find the builtin bitcode path + # this will likely appear many times, we just take the first occurrence + regex = r"-mlink-builtin-bitcode\s*(/.+?amdgcn/bitcode)" + match = re.search(regex, out) + if not match: + return None + full_path = match.group(1) + # strip the 'bitcode' part (and trailing '/') + return os.path.dirname(full_path) -# HIP -def _find_rocm_include_path(compiler: str): - # Found HIP installation - pass +def _find_hip_sdk_path(compiler: str): + out = gpu_compiler_basic_compile(compiler, "hip") + if not out: + return None + regex = r"^Found HIP installation: (.+)," + match = re.search(regex, out, re.MULTILINE) + return match.group(1) if match else None def _find_cuda_version(compiler: str): - # cuda, we can run nvcc --version + # we can run 'nvcc --version' # 'Cuda compilation tools, release' - pass + regex = r"Cuda compilation tools, release ([\d\.]+)" + + exists, returncode, out, _ = utils.try_run_command([compiler, "--version"]) + if not (exists and returncode == 0): + return None + + match = re.search(regex, out) + return match.group(1) if match else None def _find_rocm_version(compiler: str): - # hip/amd sucks, we have to guess - # one of the following regexs in the compiler output, look in this order - ' roc-VERSION ' - '/rocm-VERSION/' - '/hip-VERSION/' - '/llvm-amdgpu-VERSION/' - '/rocm/VERSION/' - pass + # hip/amd has less uniform version info, we have to guess + # use one of the following regexes in the compiler output in this order + sep = os.path.sep + regexes = [ + r"\broc-([\d\.]+)\b", + sep+r"rocm-([\d\.]+)"+sep, + sep+r"hip-([\d\.]+)", + sep+r"llvm-amdgpu-([\d\.]+)", + sep+r"rocm"+sep+r"([\d\.]+)"+sep, + ] + + out = gpu_compiler_basic_compile(compiler, "hip") + if not out: + return None + + for regex in regexes: + match = re.search(regex, out) + if match: + return match.group(1) + return None GPU_TYPES = { "nvidia": gpu_type(sdk_path_env="CHPL_CUDA_PATH", @@ -99,16 +142,18 @@ def _find_rocm_version(compiler: str): find_sdk_path=_find_cuda_sdk_path, find_version=_find_cuda_version, version_validator=_validate_cuda_version, - llvm_validator=_validate_cuda_llvm_version), + llvm_validator=_validate_cuda_llvm_version, + real_gpu=True), "amd": gpu_type(sdk_path_env="CHPL_ROCM_PATH", compiler="hipcc", default_arch="", llvm_target="AMDGPU", runtime_impl="rocm", - find_sdk_path=_find_rocm_sdk_path, + find_sdk_path=_find_hip_sdk_path, find_version=_find_rocm_version, version_validator=_validate_rocm_version, - llvm_validator=_validate_rocm_llvm_version), + llvm_validator=_validate_rocm_llvm_version, + real_gpu=True), "cpu": gpu_type(sdk_path_env="", compiler="", default_arch="", @@ -117,7 +162,18 @@ def _find_rocm_version(compiler: str): find_sdk_path=lambda compiler: None, find_version=lambda compiler: None, version_validator=lambda: None, - llvm_validator=lambda: None), + llvm_validator=lambda: None, + real_gpu=False), + "none": gpu_type(sdk_path_env="", + compiler="", + default_arch="", + llvm_target="", + runtime_impl="", + find_sdk_path=lambda compiler: None, + find_version=lambda compiler: None, + version_validator=lambda: None, + llvm_validator=lambda: None, + real_gpu=False) } @@ -150,7 +206,8 @@ def get_llvm_override(): if get() == 'amd': major_version = get_sdk_version().split('.')[0] if major_version == '5': - return '{}/llvm/bin/llvm-config'.format(get_sdk_path('amd')) + llvm_path = find_llvm_amd_bin_path(get_gpu_compiler()) + return '{}/llvm-config'.format(llvm_path) pass return 'none' @@ -214,66 +271,38 @@ def get_gpu_compiler(): @memoize -def get_sdk_path(for_gpu, sdk_type='bitcode'): - gpu_type = get() - +def get_sdk_path(for_gpu): # No SDK path if GPU is not being used. - if gpu_type in ('cpu', 'none'): + if not GPU_TYPES[get()].real_gpu: return 'none' - - # Check vendor-specific environment variable for SDK path gpu = GPU_TYPES[for_gpu] - sub_env_names = { - "bitcode": gpu.sdk_path_env_bitcode, - "include": gpu.sdk_path_env_include, - } - assert sdk_type in sub_env_names - - # get the sub env if it exists - chpl_sdk_path = os.environ.get(sub_env_names[sdk_type]) - if chpl_sdk_path: - return chpl_sdk_path - - # otherwise use the basic one + + def validate_path(p): + # TODO: for now, just do a simple validation that checks if the path exists + return (os.path.exists(p) and os.path.isdir(p)) + + # use user specify if given chpl_sdk_path = os.environ.get(gpu.sdk_path_env) if chpl_sdk_path: + if for_gpu == get() and not validate_path(chpl_sdk_path): + _reportMissingGpuReq( + "CHPL_GPU={} specified but SDK path '{}' does not exist." + .format(for_gpu, chpl_sdk_path)) + return 'error' return chpl_sdk_path - # try to find the SDK by running `which` on a vendor-specific program. - exists, returncode, my_stdout, my_stderr = utils.try_run_command(["which", - gpu.compiler]) - - if exists and returncode == 0: - pass - # # Walk up from directories from the one containing the gpu compiler - # # (e.g. `nvcc` or `hipcc`) until we find a directory that starts with - # # `runtime_impl` (e.g `cuda` or `rocm`) - # # TODO: this logic does not seem to work for spack - # real_path = os.path.realpath(my_stdout.strip()).strip() - # path_parts = real_path.split("/") - # chpl_sdk_path = "/" - # for part in path_parts: - # if len(part) == 0: continue - # chpl_sdk_path += part - # if not part.startswith(gpu.runtime_impl): - # chpl_sdk_path += "/" - # else: - # break - - # validate the SDK path found - if not (os.path.exists(chpl_sdk_path) and os.path.isdir(chpl_sdk_path)): + # find the sdk path from the compiler based on the one in PATH + sdk_path = gpu.find_sdk_path(gpu.compiler) + if sdk_path: + if not validate_path(sdk_path): _reportMissingGpuReq( - "Can't infer {} toolkit from '{}'. Try setting {}." - .format(get(), real_path, gpu.sdk_path_env) - ) + "Can't find {} SDK path from '{}'. Try setting {}." + .format(for_gpu, gpu.compiler, gpu.sdk_path_env)) return 'error' - - return chpl_sdk_path - elif gpu_type == for_gpu: - _reportMissingGpuReq(("Can't find {} toolkit. Try setting {} to the " + - "{} installation path.").format(gpu.runtime_impl, - gpu.sdk_path_env, - gpu.runtime_impl)) + return sdk_path + elif for_gpu == get(): + _reportMissingGpuReq("Can't infer {} toolkit from '{}'. Try setting {}." + .format(gpu.runtime_impl, gpu.compiler, gpu.sdk_path_env)) return 'error' else: return '' @@ -295,7 +324,7 @@ def get_runtime_compile_args(): system = [] gpu_type = get() - sdk_path = get_sdk_path(gpu_type, sdk_type='include') + sdk_path = get_sdk_path(gpu_type) incl = chpl_home_utils.get_chpl_runtime_incl() # this -D is needed since it affects code inside of headers @@ -325,7 +354,7 @@ def get_runtime_link_args(): system = [] gpu_type = get() - sdk_path = get_sdk_path(gpu_type, sdk_type='include') + sdk_path = get_sdk_path(gpu_type) if gpu_type == "nvidia": system.append("-L" + os.path.join(sdk_path, "lib64")) @@ -344,30 +373,60 @@ def get_runtime_link_args(): return bundled, system - - - - def get_cuda_libdevice_path(): if get() == 'nvidia': + # TODO: # find lib device: #$ NVVMIR_LIBRARY_DIR= # TODO this only makes sense when we are generating for nvidia chpl_cuda_path = get_sdk_path('nvidia') - + print(chpl_cuda_path) + compiler = get_gpu_compiler() + out = gpu_compiler_basic_compile(compiler, "cu") + if not out: + _reportMissingGpuReq("Can't find libdevice. Please make sure your CHPL_CUDA_PATH is " "set such that CHPL_CUDA_PATH points to the CUDA installation.") + return "error" + regex = r"^#\$ NVVMIR_LIBRARY_DIR=(.+)$" + match = re.search(regex, out, re.MULTILINE) + if not match: + _reportMissingGpuReq("Can't find libdevice. Please make sure your CHPL_CUDA_PATH is " + "set such that CHPL_CUDA_PATH points to the CUDA installation.") + return 'error' + libdevice_path = match.group(1) # there can be multiple libdevices for multiple compute architectures. Not # sure how realistic that is, nor I see multiple instances in the systems I # have access to. They are always named `libdevice.10.bc`, but I just want # to be sure here. - path_part = "/nvvm/libdevice/libdevice*.bc" - libdevices = glob.glob(chpl_cuda_path+path_part) + libdevices = glob.glob(os.path.join(libdevice_path, "libdevice.*.bc")) if len(libdevices) == 0: _reportMissingGpuReq("Can't find libdevice. Please make sure your CHPL_CUDA_PATH is " - "set such that CHPL_CUDA_PATH{} exists.".format(path_part)) + "set such that CHPL_CUDA_PATH{} exists.") return 'error' else: return libdevices[0] return "none" +def get_rocm_llvm_path(): + if get() == 'amd': + compiler = get_gpu_compiler() + llvm_path = find_llvm_amd_bin_path(compiler) + if not llvm_path: + _reportMissingGpuReq("Could not find llvm-amd in {}".format(compiler)) + return 'error' + # strip bin path component (and trailing /) + return os.path.dirname(llvm_path) + return 'none' + +def get_rocm_amdgcn_path(): + if get() == 'amd': + compiler = get_gpu_compiler() + amdgcn_path = find_amdgcn_path(compiler) + if not amdgcn_path: + _reportMissingGpuReq("Could not find amdgcn in {}".format(compiler)) + return 'error' + return amdgcn_path + return 'none' + + def validateLlvmBuiltForTgt(expectedTgt): # If we're using the bundled LLVM, llvm-config may not have been built # before we call chplenv. It seems safe to assume the bundled LLVM has been @@ -452,57 +511,13 @@ def _validate_cuda_version_impl(): return True def get_sdk_version(): - version = 'none' - if get() == 'amd': - chpl_rocm_path = get_sdk_path('amd', sdk_type='include') - files_to_try = ['%s/.info/version-hiprt' % chpl_rocm_path, - '%s/.info/version-libs' % chpl_rocm_path] - - version_filename = None - for fname in files_to_try: - if os.path.exists(fname): - version_filename = fname - break - - rocm_version = None - if version_filename is not None: - rocm_version = open(version_filename).read() - else: - exists, returncode, my_stdout, my_stderr = utils.try_run_command( - ["hipcc", "--version"]) - if exists and returncode == 0: - match = re.search(r"rocm?-([\d\.]+)", my_stdout) - if match: - rocm_version = match.group(1) - else: - match = re.search(r"llvm-amdgpu-([\d\.]+)", my_stdout) - if match: - rocm_version = match.group(1) - version = rocm_version - elif get() == 'nvidia': - chpl_cuda_path = get_sdk_path('nvidia') - version_file_json = '%s/version.json' % chpl_cuda_path - version_file_txt = '%s/version.txt' % chpl_cuda_path - cuda_version = None - if os.path.exists(version_file_json): - f = open(version_file_json) - version_json = json.load(f) - f.close() - cuda_version = version_json["cuda"]["version"] - elif os.path.exists(version_file_txt): - txt = open(version_file_txt).read() - match = re.search(r'\d+\.\d+\.\d+', txt) - if match: - cuda_version = match.group() - if cuda_version is None: - exists, returncode, my_stdout, my_stderr = utils.try_run_command( - ["nvcc", "--version"]) - if exists and returncode == 0: - pattern = r"Cuda compilation tools, release ([\d\.]+)" - match = re.search(pattern, my_stdout) - if match: - cuda_version = match.group(1) - version = cuda_version + + gpu = GPU_TYPES[get()] + if not gpu.real_gpu: + return 'none' + version = gpu.find_version(get_gpu_compiler()) + # TODO add validation for if the compiler matches what the user set CHPL_GPU_SDK_VERSION? + version = version.strip() if version is not None else 'none' return version @@ -517,7 +532,7 @@ def _validate_rocm_version_impl(): rocm_version = get_sdk_version() - if rocm_version is None: + if rocm_version == 'none': _reportMissingGpuReq("Unable to determine ROCm version.") return False diff --git a/util/chplenv/compile_link_args_utils.py b/util/chplenv/compile_link_args_utils.py index ff2353f4b2b6..69794c07e186 100644 --- a/util/chplenv/compile_link_args_utils.py +++ b/util/chplenv/compile_link_args_utils.py @@ -50,7 +50,7 @@ def get_runtime_includes_and_defines(): # this is needed since it affects code inside of headers bundled.append("-DCHPL_COMM_DEBUG") - gpu_bundled, gpu_system = chpl_gpu.get_runtime_includes_and_defines() + gpu_bundled, gpu_system = chpl_gpu.get_runtime_compile_args() bundled.extend(gpu_bundled) system.extend(gpu_system) diff --git a/util/chplenv/printchplenv.py b/util/chplenv/printchplenv.py index d74bc77c3f0e..737be8c5d9bb 100755 --- a/util/chplenv/printchplenv.py +++ b/util/chplenv/printchplenv.py @@ -107,6 +107,8 @@ ChapelEnv(' CHPL_CUDA_PATH', INTERNAL), ChapelEnv(' CHPL_ROCM_PATH', INTERNAL), ChapelEnv(' CHPL_CUDA_LIBDEVICE_PATH', INTERNAL), + ChapelEnv(' CHPL_ROCM_LLVM_PATH', INTERNAL), + ChapelEnv(' CHPL_ROCM_AMDGCN_PATH', INTERNAL), ChapelEnv('CHPL_COMM', RUNTIME | LAUNCHER | DEFAULT, 'comm'), ChapelEnv(' CHPL_COMM_SUBSTRATE', RUNTIME | LAUNCHER | DEFAULT), ChapelEnv(' CHPL_GASNET_SEGMENT', RUNTIME | LAUNCHER | DEFAULT), @@ -205,6 +207,8 @@ def compute_all_values(): ENV_VALS[' CHPL_GPU'] = chpl_gpu.get() ENV_VALS[' CHPL_GPU_SDK_VERSION'] = chpl_gpu.get_sdk_version() ENV_VALS[' CHPL_CUDA_LIBDEVICE_PATH'] = chpl_gpu.get_cuda_libdevice_path() + ENV_VALS[' CHPL_ROCM_LLVM_PATH'] = chpl_gpu.get_rocm_llvm_path() + ENV_VALS[' CHPL_ROCM_AMDGCN_PATH'] = chpl_gpu.get_rocm_amdgcn_path() ENV_VALS[' CHPL_GPU_MEM_STRATEGY'] = chpl_gpu.get_gpu_mem_strategy() ENV_VALS['CHPL_COMM'] = chpl_comm.get() ENV_VALS[' CHPL_COMM_SUBSTRATE'] = chpl_comm_substrate.get() @@ -369,6 +373,10 @@ def filter_tidy(chpl_env): return gpu == 'nvidia' elif chpl_env.name == ' CHPL_CUDA_LIBDEVICE_PATH': return gpu == 'nvidia' + elif chpl_env.name == ' CHPL_ROCM_LLVM_PATH': + return gpu == 'amd' + elif chpl_env.name == ' CHPL_ROCM_AMDGCN_PATH': + return gpu == 'amd' elif chpl_env.name == ' CHPL_ROCM_PATH': return gpu == 'amd' elif chpl_env.name == ' CHPL_GPU_ARCH': diff --git a/util/chplenv/utils.py b/util/chplenv/utils.py index d945be0c0125..2ade628227b9 100644 --- a/util/chplenv/utils.py +++ b/util/chplenv/utils.py @@ -70,8 +70,9 @@ def try_run_command(command, cmd_input=None, combine_output=False): return (False, 0, None, None) byte_cmd_input = str.encode(cmd_input, "utf-8") if cmd_input else None output = process.communicate(input=byte_cmd_input) - return (True, process.returncode, output[0].decode("utf-8"), - output[1].decode("utf-8")) + my_stdout = output[0].decode("utf-8") + my_sterr = output[1].decode("utf-8") if combine_output and output[1] else None + return (True, process.returncode, my_stdout, my_sterr) def run_command(command, stdout=True, stderr=False, cmd_input=None): From 99772abe3df245239812419df628841ae108ec85 Mon Sep 17 00:00:00 2001 From: Jade Abraham Date: Thu, 10 Oct 2024 14:50:18 -0500 Subject: [PATCH 11/21] fix var name Signed-off-by: Jade Abraham --- compiler/include/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/include/driver.h b/compiler/include/driver.h index eb1aeafb225e..31233d9b6bca 100644 --- a/compiler/include/driver.h +++ b/compiler/include/driver.h @@ -154,7 +154,7 @@ extern const char* CHPL_TARGET_SYSTEM_LINK_ARGS; extern const char* CHPL_CUDA_LIBDEVICE_PATH; extern const char* CHPL_ROCM_LLVM_PATH; -extern const char* CHPL_ROCM_LIBDEVICE_PATH; +extern const char* CHPL_ROCM_AMDGCN_PATH; extern const char* CHPL_GPU; extern const char* CHPL_GPU_ARCH; From b9b1483b073b6929519aa91198dea2e23a314560 Mon Sep 17 00:00:00 2001 From: Jade Abraham Date: Thu, 10 Oct 2024 15:09:54 -0500 Subject: [PATCH 12/21] cleanups Signed-off-by: Jade Abraham --- util/chplenv/chpl_gpu.py | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/util/chplenv/chpl_gpu.py b/util/chplenv/chpl_gpu.py index 73ab5e7dde7b..b48096a1908b 100644 --- a/util/chplenv/chpl_gpu.py +++ b/util/chplenv/chpl_gpu.py @@ -1,7 +1,5 @@ -import utils import os import glob -import json import chpl_locale_model import chpl_platform import chpl_llvm @@ -9,7 +7,7 @@ import re import chpl_tasks import chpl_home_utils -from utils import error, warning, memoize, run_command, try_run_command, which, is_ver_in_range +from utils import error, warning, memoize, try_run_command, which, is_ver_in_range def _validate_cuda_version(): return _validate_cuda_version_impl() @@ -104,7 +102,7 @@ def _find_cuda_version(compiler: str): # 'Cuda compilation tools, release' regex = r"Cuda compilation tools, release ([\d\.]+)" - exists, returncode, out, _ = utils.try_run_command([compiler, "--version"]) + exists, returncode, out, _ = try_run_command([compiler, "--version"]) if not (exists and returncode == 0): return None @@ -281,7 +279,7 @@ def validate_path(p): # TODO: for now, just do a simple validation that checks if the path exists return (os.path.exists(p) and os.path.isdir(p)) - # use user specify if given + # use user specified if given chpl_sdk_path = os.environ.get(gpu.sdk_path_env) if chpl_sdk_path: if for_gpu == get() and not validate_path(chpl_sdk_path): @@ -375,7 +373,6 @@ def get_runtime_link_args(): def get_cuda_libdevice_path(): if get() == 'nvidia': - # TODO: # find lib device: #$ NVVMIR_LIBRARY_DIR= # TODO this only makes sense when we are generating for nvidia chpl_cuda_path = get_sdk_path('nvidia') print(chpl_cuda_path) @@ -435,7 +432,7 @@ def validateLlvmBuiltForTgt(expectedTgt): if chpl_llvm.get() == 'bundled': return True - exists, returncode, my_stdout, my_stderr = utils.try_run_command( + exists, returncode, my_stdout, _ = try_run_command( [chpl_llvm.get_llvm_config(), "--targets-built"]) if not exists or returncode != 0: @@ -511,13 +508,11 @@ def _validate_cuda_version_impl(): return True def get_sdk_version(): - gpu = GPU_TYPES[get()] if not gpu.real_gpu: return 'none' version = gpu.find_version(get_gpu_compiler()) - # TODO add validation for if the compiler matches what the user set CHPL_GPU_SDK_VERSION? - + # TODO: add validation for if the compiler matches what the user set CHPL_GPU_SDK_VERSION? version = version.strip() if version is not None else 'none' return version @@ -568,10 +563,4 @@ def validate(chplLocaleModel): gpu.validate_llvm() - for depr_env in ("CHPL_GPU_CODEGEN", "CHPL_GPU_RUNTIME"): - if os.environ.get(depr_env): - warning(depr_env + " is deprecated and now ignored. Please use " + - "'CHPL_GPU=[nvidia|amd|cpu]' to choose a GPU target " + - "explicitly.") - return True From a999c0e46fb653c4992df61d919aa7a5eb7b900f Mon Sep 17 00:00:00 2001 From: Jade Abraham Date: Thu, 10 Oct 2024 16:11:15 -0500 Subject: [PATCH 13/21] fix chplenv test Signed-off-by: Jade Abraham --- modules/standard/ChplConfig.chpl | 1 - test/compflags/albrecht/chplenv/chplenv.chpl | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/standard/ChplConfig.chpl b/modules/standard/ChplConfig.chpl index cda29ca67483..1e21654bc79e 100644 --- a/modules/standard/ChplConfig.chpl +++ b/modules/standard/ChplConfig.chpl @@ -194,7 +194,6 @@ module ChplConfig { param CHPL_GPU_SDK_VERSION:string; CHPL_GPU_SDK_VERSION = __primitive("get compiler variable", "CHPL_GPU_SDK_VERSION"); - @chpldoc.nodoc @unstable("'ChplConfig.CHPL_LIB_PIC' is unstable and may be replaced with a different way to access this information in the future") param CHPL_LIB_PIC: string; diff --git a/test/compflags/albrecht/chplenv/chplenv.chpl b/test/compflags/albrecht/chplenv/chplenv.chpl index 1af9ae1a0b2c..358bd8645122 100644 --- a/test/compflags/albrecht/chplenv/chplenv.chpl +++ b/test/compflags/albrecht/chplenv/chplenv.chpl @@ -8,6 +8,7 @@ writeln("CHPL_TARGET_ARCH=",CHPL_TARGET_ARCH); writeln("CHPL_TARGET_CPU=",CHPL_TARGET_CPU); writeln("CHPL_LOCALE_MODEL=",CHPL_LOCALE_MODEL); writeln("CHPL_GPU=", CHPL_GPU); +writeln("CHPL_GPU_SDK_VERSION=", CHPL_GPU_SDK_VERSION); writeln("CHPL_GPU_MEM_STRATEGY=", CHPL_GPU_MEM_STRATEGY); writeln("CHPL_COMM=",CHPL_COMM); writeln("CHPL_COMM_SUBSTRATE=",CHPL_COMM_SUBSTRATE); From 4af3cf8494db7dc039092e472f629f3b54ba0329 Mon Sep 17 00:00:00 2001 From: Jade Abraham Date: Mon, 14 Oct 2024 09:30:01 -0500 Subject: [PATCH 14/21] remove extra code Signed-off-by: Jade Abraham --- util/chplenv/chpl_gpu.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/util/chplenv/chpl_gpu.py b/util/chplenv/chpl_gpu.py index b48096a1908b..5da3f6bc7c0c 100644 --- a/util/chplenv/chpl_gpu.py +++ b/util/chplenv/chpl_gpu.py @@ -374,8 +374,6 @@ def get_runtime_link_args(): def get_cuda_libdevice_path(): if get() == 'nvidia': # TODO this only makes sense when we are generating for nvidia - chpl_cuda_path = get_sdk_path('nvidia') - print(chpl_cuda_path) compiler = get_gpu_compiler() out = gpu_compiler_basic_compile(compiler, "cu") if not out: From 50c283fa19acb701ed7a3d496555cbf2913e1aa4 Mon Sep 17 00:00:00 2001 From: Jade Abraham Date: Mon, 14 Oct 2024 09:30:16 -0500 Subject: [PATCH 15/21] fix finding gcc prefix when command fails Signed-off-by: Jade Abraham --- util/chplenv/chpl_llvm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/chplenv/chpl_llvm.py b/util/chplenv/chpl_llvm.py index b2e6735b3b1b..2d710057244f 100755 --- a/util/chplenv/chpl_llvm.py +++ b/util/chplenv/chpl_llvm.py @@ -676,7 +676,7 @@ def get_gcc_prefix_dir(): else: # Try to figure out the GCC prefix by running gcc out, err = run_command(['gcc', '-v'], stdout=True, stderr=True) - out = out + err + out = (out or '') + (err or '') # look for the --prefix= specified when GCC was configured words = out.split() From 9b0973eca266456b30fb654cf422c753d7e32fbc Mon Sep 17 00:00:00 2001 From: Jade Abraham Date: Mon, 14 Oct 2024 09:34:50 -0500 Subject: [PATCH 16/21] cleanup finding libdevice Signed-off-by: Jade Abraham --- util/chplenv/chpl_gpu.py | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/util/chplenv/chpl_gpu.py b/util/chplenv/chpl_gpu.py index 5da3f6bc7c0c..a407beb6e30e 100644 --- a/util/chplenv/chpl_gpu.py +++ b/util/chplenv/chpl_gpu.py @@ -66,6 +66,22 @@ def _find_cuda_sdk_path(compiler: str): match = re.search(regex, out, re.MULTILINE) return match.group(1) if match else None +def find_cuda_libdevice_path(compiler: str): + out = gpu_compiler_basic_compile(compiler, "cu") + if not out: + return None + regex = r"^#\$ NVVMIR_LIBRARY_DIR=(.+)$" + match = re.search(regex, out, re.MULTILINE) + if not match: + return None + libdevice_path = match.group(1) + # there can be multiple libdevices for multiple compute architectures. Not + # sure how realistic that is, nor I see multiple instances in the systems I + # have access to. They are always named `libdevice.10.bc`, but I just want + # to be sure here. + libdevices = glob.glob(os.path.join(libdevice_path, "libdevice.*.bc")) + return libdevices[0] if len(libdevices) > 0 else None + def find_llvm_amd_bin_path(compiler: str): out = gpu_compiler_basic_compile(compiler, "hip") @@ -375,29 +391,11 @@ def get_cuda_libdevice_path(): if get() == 'nvidia': # TODO this only makes sense when we are generating for nvidia compiler = get_gpu_compiler() - out = gpu_compiler_basic_compile(compiler, "cu") - if not out: + libdevice = find_cuda_libdevice_path(compiler) + if not libdevice: _reportMissingGpuReq("Can't find libdevice. Please make sure your CHPL_CUDA_PATH is " "set such that CHPL_CUDA_PATH points to the CUDA installation.") - return "error" - regex = r"^#\$ NVVMIR_LIBRARY_DIR=(.+)$" - match = re.search(regex, out, re.MULTILINE) - if not match: - _reportMissingGpuReq("Can't find libdevice. Please make sure your CHPL_CUDA_PATH is " - "set such that CHPL_CUDA_PATH points to the CUDA installation.") - return 'error' - libdevice_path = match.group(1) - # there can be multiple libdevices for multiple compute architectures. Not - # sure how realistic that is, nor I see multiple instances in the systems I - # have access to. They are always named `libdevice.10.bc`, but I just want - # to be sure here. - libdevices = glob.glob(os.path.join(libdevice_path, "libdevice.*.bc")) - if len(libdevices) == 0: - _reportMissingGpuReq("Can't find libdevice. Please make sure your CHPL_CUDA_PATH is " - "set such that CHPL_CUDA_PATH{} exists.") return 'error' - else: - return libdevices[0] - + return libdevice return "none" def get_rocm_llvm_path(): From b8299dea16e33cc7951fc1e8f34041a8805e2f76 Mon Sep 17 00:00:00 2001 From: Jade Abraham Date: Mon, 14 Oct 2024 09:50:52 -0500 Subject: [PATCH 17/21] resolve TODO, as get() only returns nvidia if LOCALE_MODEL is flat Signed-off-by: Jade Abraham --- util/chplenv/chpl_gpu.py | 1 - 1 file changed, 1 deletion(-) diff --git a/util/chplenv/chpl_gpu.py b/util/chplenv/chpl_gpu.py index a407beb6e30e..a750b818b115 100644 --- a/util/chplenv/chpl_gpu.py +++ b/util/chplenv/chpl_gpu.py @@ -389,7 +389,6 @@ def get_runtime_link_args(): def get_cuda_libdevice_path(): if get() == 'nvidia': - # TODO this only makes sense when we are generating for nvidia compiler = get_gpu_compiler() libdevice = find_cuda_libdevice_path(compiler) if not libdevice: From 99b2bb952a7db0524b21f64268f260f92f2dc40d Mon Sep 17 00:00:00 2001 From: Jade Abraham Date: Mon, 14 Oct 2024 10:28:09 -0500 Subject: [PATCH 18/21] cleanup errors Signed-off-by: Jade Abraham --- util/chplenv/chpl_gpu.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/util/chplenv/chpl_gpu.py b/util/chplenv/chpl_gpu.py index a750b818b115..2730438a935c 100644 --- a/util/chplenv/chpl_gpu.py +++ b/util/chplenv/chpl_gpu.py @@ -280,7 +280,7 @@ def get_gpu_compiler(): bin_dir = os.path.join(sdk_path, 'bin') full_path = os.path.join(bin_dir, name) if not os.path.exists(full_path): - _reportMissingGpuReq("Could not find {} in {}".format(name, bin_dir)) + _reportMissingGpuReq("Can't find {} in '{}'".format(name, bin_dir)) return full_path @@ -300,8 +300,8 @@ def validate_path(p): if chpl_sdk_path: if for_gpu == get() and not validate_path(chpl_sdk_path): _reportMissingGpuReq( - "CHPL_GPU={} specified but SDK path '{}' does not exist." - .format(for_gpu, chpl_sdk_path)) + "{}='{}' does not exist. Make sure the toolkit path is correct." + .format(gpu.sdk_path_env, chpl_sdk_path)) return 'error' return chpl_sdk_path @@ -310,8 +310,8 @@ def validate_path(p): if sdk_path: if not validate_path(sdk_path): _reportMissingGpuReq( - "Can't find {} SDK path from '{}'. Try setting {}." - .format(for_gpu, gpu.compiler, gpu.sdk_path_env)) + "Inferred {} toolkit is not valid. Try setting {}." + .format(gpu.runtime_impl, gpu.sdk_path_env)) return 'error' return sdk_path elif for_gpu == get(): @@ -319,7 +319,12 @@ def validate_path(p): .format(gpu.runtime_impl, gpu.compiler, gpu.sdk_path_env)) return 'error' else: - return '' + return 'none' + +@memoize +def is_sdk_path_user_specified(for_gpu): + return os.environ.get(GPU_TYPES[for_gpu].sdk_path_env) is not None + def get_gpu_mem_strategy(): memtype = os.environ.get("CHPL_GPU_MEM_STRATEGY") @@ -392,7 +397,7 @@ def get_cuda_libdevice_path(): compiler = get_gpu_compiler() libdevice = find_cuda_libdevice_path(compiler) if not libdevice: - _reportMissingGpuReq("Can't find libdevice. Please make sure your CHPL_CUDA_PATH is " "set such that CHPL_CUDA_PATH points to the CUDA installation.") + _reportMissingGpuReq("Can't determine libdevice path from {}".format(compiler)) return 'error' return libdevice return "none" @@ -402,7 +407,7 @@ def get_rocm_llvm_path(): compiler = get_gpu_compiler() llvm_path = find_llvm_amd_bin_path(compiler) if not llvm_path: - _reportMissingGpuReq("Could not find llvm-amd in {}".format(compiler)) + _reportMissingGpuReq("Can't determine AMD LLVM path from {}".format(compiler)) return 'error' # strip bin path component (and trailing /) return os.path.dirname(llvm_path) @@ -413,7 +418,7 @@ def get_rocm_amdgcn_path(): compiler = get_gpu_compiler() amdgcn_path = find_amdgcn_path(compiler) if not amdgcn_path: - _reportMissingGpuReq("Could not find amdgcn in {}".format(compiler)) + _reportMissingGpuReq("Can't determine amdgcn path from {}".format(compiler)) return 'error' return amdgcn_path return 'none' From 83ded41646da103d41347c8a1587c8e9b6df5a93 Mon Sep 17 00:00:00 2001 From: Jade Abraham Date: Mon, 14 Oct 2024 10:54:38 -0500 Subject: [PATCH 19/21] fix printchplbuilds parsing Signed-off-by: Jade Abraham --- util/chplenv/printchplbuilds.py | 1 + util/chplenv/printchplenv.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/util/chplenv/printchplbuilds.py b/util/chplenv/printchplbuilds.py index c51f24e8d7f2..fb400bf0a4f0 100755 --- a/util/chplenv/printchplbuilds.py +++ b/util/chplenv/printchplbuilds.py @@ -45,6 +45,7 @@ class State(Enum): 'cpu': 'CHPL_TARGET_CPU', 'loc': 'CHPL_LOCALE_MODEL', 'gpu': 'CHPL_GPU', + 'gpu_vers': 'CHPL_GPU_SDK_VERSION', 'gpu_mem': 'CHPL_GPU_MEM_STRATEGY', 'comm': 'CHPL_COMM', 'tasks': 'CHPL_TASKS', diff --git a/util/chplenv/printchplenv.py b/util/chplenv/printchplenv.py index 737be8c5d9bb..757a2b479518 100755 --- a/util/chplenv/printchplenv.py +++ b/util/chplenv/printchplenv.py @@ -101,9 +101,9 @@ ChapelEnv('CHPL_TARGET_BACKEND_CPU', INTERNAL), ChapelEnv('CHPL_LOCALE_MODEL', RUNTIME | LAUNCHER | DEFAULT, 'loc'), ChapelEnv(' CHPL_GPU', RUNTIME | DEFAULT, 'gpu'), - ChapelEnv(' CHPL_GPU_SDK_VERSION', RUNTIME), + ChapelEnv(' CHPL_GPU_SDK_VERSION', RUNTIME, 'gpu_vers'), ChapelEnv(' CHPL_GPU_ARCH', INTERNAL), - ChapelEnv(' CHPL_GPU_MEM_STRATEGY', RUNTIME , 'gpu_mem' ), + ChapelEnv(' CHPL_GPU_MEM_STRATEGY', RUNTIME , 'gpu_mem'), ChapelEnv(' CHPL_CUDA_PATH', INTERNAL), ChapelEnv(' CHPL_ROCM_PATH', INTERNAL), ChapelEnv(' CHPL_CUDA_LIBDEVICE_PATH', INTERNAL), From 86b7aa1a7485be7737ff495c513cb9820ded45b8 Mon Sep 17 00:00:00 2001 From: Jade Abraham Date: Mon, 14 Oct 2024 12:05:54 -0500 Subject: [PATCH 20/21] make rocm llvm path more robust Signed-off-by: Jade Abraham --- runtime/src/gpu/amd/Makefile.share | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/src/gpu/amd/Makefile.share b/runtime/src/gpu/amd/Makefile.share index 035e7d135eab..0f7cf7b363ab 100644 --- a/runtime/src/gpu/amd/Makefile.share +++ b/runtime/src/gpu/amd/Makefile.share @@ -28,4 +28,4 @@ RUNTIME_CFLAGS += -Wno-strict-prototypes $(RUNTIME_OBJ_DIR)/gpu-amd-cub.o: gpu-amd-cub.cc \ $(RUNTIME_OBJ_DIR_STAMP) - PATH=$(PATH):$(CHPL_MAKE_ROCM_PATH)/llvm/bin $(CXX) -c -std=c++17 $(RUNTIME_CXXFLAGS) $(RUNTIME_INCLS) -o $@ $< + PATH=$(PATH):$(CHPL_MAKE_ROCM_LLVM_PATH)/bin $(CXX) -c -std=c++17 $(RUNTIME_CXXFLAGS) $(RUNTIME_INCLS) -o $@ $< From 67f1c887687dcccd2802b82dea3e49b4a50fe16e Mon Sep 17 00:00:00 2001 From: Jade Abraham Date: Mon, 14 Oct 2024 13:17:23 -0500 Subject: [PATCH 21/21] replace rocm-version.h with `-DROCM_VERSION_MAJOR=...` Signed-off-by: Jade Abraham --- runtime/Makefile | 11 -------- runtime/include/gpu/amd/chpl-gpu-dev-reduce.h | 1 - runtime/include/gpu/amd/rocm-version.h | 26 ------------------- runtime/src/gpu/amd/gpu-amd-cub.cc | 1 - runtime/src/gpu/amd/gpu-amd.c | 1 - util/chplenv/chpl_gpu.py | 3 +++ 6 files changed, 3 insertions(+), 40 deletions(-) delete mode 100644 runtime/include/gpu/amd/rocm-version.h diff --git a/runtime/Makefile b/runtime/Makefile index 4526b28b6520..9d422e075ced 100644 --- a/runtime/Makefile +++ b/runtime/Makefile @@ -64,17 +64,6 @@ $(CHPL_ENV_HEADER): $(CHPL_MAKE_HOME)/util/printchplenv $(CHPL_MAKE_HOME)/util/c sed 's/^ *//;s/ *$$//' | \ sed 's/[^0-9A-Za-z]/_/g' | \ awk '{ print "#define " toupper($$1) }' >> $(CHPL_ENV_HEADER) - @$(CHPL_MAKE_HOME)/util/printchplenv --only CHPL_GPU_SDK_VERSION --value | \ - grep -q none && \ - echo "#define CHPL_GPU_SDK_VERSION_MAJOR 0" >> $(CHPL_ENV_HEADER) && \ - echo "#define CHPL_GPU_SDK_VERSION_MINOR 0" >> $(CHPL_ENV_HEADER) && \ - echo "#define CHPL_GPU_SDK_VERSION_PATCH 0" >> $(CHPL_ENV_HEADER) \ - || \ - $(CHPL_MAKE_HOME)/util/printchplenv --only CHPL_GPU_SDK_VERSION --value | \ - sed 's/\./ /g' | \ - awk '{ print "#define CHPL_GPU_SDK_VERSION_MAJOR " $$1; \ - print "#define CHPL_GPU_SDK_VERSION_MINOR " $$2; \ - print "#define CHPL_GPU_SDK_VERSION_PATCH " $$3 }' >> $(CHPL_ENV_HEADER) @echo "#endif /* _CHPL_ENV_GEN_H_ */" >> $(CHPL_ENV_HEADER) THIRD_PARTY_PKGS = $(shell $(CHPL_MAKE_PYTHON) $(CHPL_MAKE_HOME)/util/chplenv/third-party-pkgs) diff --git a/runtime/include/gpu/amd/chpl-gpu-dev-reduce.h b/runtime/include/gpu/amd/chpl-gpu-dev-reduce.h index 83d0b166414a..ca1aef0759da 100644 --- a/runtime/include/gpu/amd/chpl-gpu-dev-reduce.h +++ b/runtime/include/gpu/amd/chpl-gpu-dev-reduce.h @@ -25,7 +25,6 @@ #include "chpltypes.h" #include #include -#include "gpu/amd/rocm-version.h" #include "gpu/amd/rocm-utils.h" #if ROCM_VERSION_MAJOR >= 5 diff --git a/runtime/include/gpu/amd/rocm-version.h b/runtime/include/gpu/amd/rocm-version.h deleted file mode 100644 index 8da037d82da0..000000000000 --- a/runtime/include/gpu/amd/rocm-version.h +++ /dev/null @@ -1,26 +0,0 @@ - -/* - * Copyright 2020-2024 Hewlett Packard Enterprise Development LP - * Copyright 2004-2019 Cray Inc. - * Other additional copyright holders may be indicated within. * - * The entirety of this work is licensed under the Apache License, - * Version 2.0 (the "License"); you may not use this file except - * in compliance with the License. - * - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __HIP_PLATFORM_AMD__ -#define __HIP_PLATFORM_AMD__ -#endif - -// CHPL_GPU_SDK_VERSION_MAJOR is determined by printchplenv -#define ROCM_VERSION_MAJOR CHPL_GPU_SDK_VERSION_MAJOR diff --git a/runtime/src/gpu/amd/gpu-amd-cub.cc b/runtime/src/gpu/amd/gpu-amd-cub.cc index 35adda537201..9b0f437a9e57 100644 --- a/runtime/src/gpu/amd/gpu-amd-cub.cc +++ b/runtime/src/gpu/amd/gpu-amd-cub.cc @@ -21,7 +21,6 @@ #include -#include "gpu/amd/rocm-version.h" #if ROCM_VERSION_MAJOR >= 5 // if we include this all the time, we get unused function errors diff --git a/runtime/src/gpu/amd/gpu-amd.c b/runtime/src/gpu/amd/gpu-amd.c index 25f48d109223..49d9cf5445a0 100644 --- a/runtime/src/gpu/amd/gpu-amd.c +++ b/runtime/src/gpu/amd/gpu-amd.c @@ -30,7 +30,6 @@ #include "chpl-env-gen.h" #include "chpl-linefile-support.h" #include "gpu/amd/rocm-utils.h" -#include "gpu/amd/rocm-version.h" #include "chpl-topo.h" #include diff --git a/util/chplenv/chpl_gpu.py b/util/chplenv/chpl_gpu.py index 2730438a935c..17c4ab2719fb 100644 --- a/util/chplenv/chpl_gpu.py +++ b/util/chplenv/chpl_gpu.py @@ -363,6 +363,9 @@ def get_runtime_compile_args(): # -isystem instead of -I silences warnings from inside these includes. system.append("-isystem" + os.path.join(sdk_path, "include")) system.append("-isystem" + os.path.join(sdk_path, "hip", "include")) + + major_version = get_sdk_version().split('.')[0] + bundled.append("-DROCM_VERSION_MAJOR=" + major_version) return bundled, system