Rework configuration of CHPL_GPU to improve clarity (chapel-lang#26072

) Reworks many of our configuration settings around `CHPL_GPU` to improve the clarity improve the user expierence when setting up Chapel for GPUs. Major highlights: - For the most part, if the target gpu compiler is in `PATH`, our scripts will "do the right thing" - Exposes the current CUDA/ROCm version as `CHPL_GPU_SDK_VERSION` - The macro `ROCM_VERSION_MAJOR` is now based on this value - Switches to inferring information like versions and paths to be based on invocations of the target gpu compiler Implementation changes: - Most information is now determined by looking at the ouptut of a test compile of the target gpu compiler, i.e.: `echo ""int main() { return 0; }" | nvcc -v -c -x cu - -o /dev/null &2>&1` - Reworked what `CHPL_ROCM_PATH` means (and remove the sub vars for this, which were confusing). This now always points to the directory such that `CHPL_ROCM_PATH/bin/hipcc` exists. This was the previous definition, but this PR enforces that better improves our logic to infer that - Added `CHPL_ROCM_AMDGCN_PATH` and `CHPL_ROCM_LLVM_PATH`, both of which are based on information gained from actually invoking `hipcc`. This removes many heuristics we had for finding this - Moved compile/link argument detection into `chpl_gpu.py` - The Chapel compiler should now do a much better job of working with spack installs of the target gpu compiler Fixes: - Resolves chapel-lang#23791 - Resolves chapel-lang#25952 - Resolves chapel-lang#22057 - Resolves chapel-lang#23542 - Resolves [Expose cuda/rocm versions](Cray/chapel-private#5508) Testing - ROCm 4 - [x] validated that printchplenv output is correct, with/without `CHPL_ROCM_PATH` - [x] validated proper errors occur when the path is wrong or the target gpu compiler is "broken" - [x] `make check` - ROCm 5 - [x] validated that printchplenv output is correct, with/without `CHPL_ROCM_PATH` - [x] validated proper errors occur when the path is wrong or the target gpu compiler is "broken" - [x] `make check` - ROCm 6 - [x] validated that printchplenv output is correct, with/without `CHPL_ROCM_PATH` - [x] validated proper errors occur when the path is wrong or the target gpu compiler is "broken" - [x] `start_test test/gpu/native` - [x] `make check` with spack installed ROCm 6 - CUDA 11 - [x] validated that printchplenv output is correct, with/without `CHPL_CUDA_PATH` - [x] validated proper errors occur when the path is wrong or the target gpu compiler is "broken" - [x] `make check` - CUDA 12 - [x] validated that printchplenv output is correct, with/without `CHPL_CUDA_PATH` - [x] validated proper errors occur when the path is wrong or the target gpu compiler is "broken" - [x] `start_test test/gpu/native` - Default config (no GPU) - [x] validated that printchplenv output is correct - [x] paratest with/without comm - CHPL_GPU=cpu - [x] validated that printchplenv output is correct - [x] `start_test test/gpu/native` <details> <summary>command to validate paths</summary> ```bash printchplenv --only CHPL_GPU,CHPL_GPU_SDK_VERSION,CHPL_CUDA_PATH,CHPL_CUDA_LIBDEVICE_PATH,CHPL_ROCM_PATH,CHPL_ROCM_LLVM_PATH,CHPL_ROCM_AMDGCN_PATH ``` </details> [Reviewed by @arezaii]
arezaii · Oct 16, 2024 · d71e4a8 · d71e4a8
2 parents c595c71 + 67f1c88
commit d71e4a8
Show file tree

Hide file tree

Showing 18 changed files with 314 additions and 263 deletions.
diff --git a/compiler/include/driver.h b/compiler/include/driver.h
@@ -153,6 +153,8 @@ extern const char* CHPL_TARGET_BUNDLED_LINK_ARGS;
 extern const char* CHPL_TARGET_SYSTEM_LINK_ARGS;
 
 extern const char* CHPL_CUDA_LIBDEVICE_PATH;
+extern const char* CHPL_ROCM_LLVM_PATH;
+extern const char* CHPL_ROCM_AMDGCN_PATH;
 extern const char* CHPL_GPU;
 extern const char* CHPL_GPU_ARCH;
 

diff --git a/compiler/llvm/clangUtil.cpp b/compiler/llvm/clangUtil.cpp
@@ -4450,7 +4450,7 @@ static void linkGpuDeviceLibraries() {
   } else {
     // See <https://github.com/RadeonOpenCompute/ROCm-Device-Libs> for details
     // on what these various libraries are.
-    auto libPath = gGpuSdkPath + std::string("/amdgcn/bitcode");
+    auto libPath = CHPL_ROCM_AMDGCN_PATH + std::string("/bitcode");
     linkBitCodeFile((libPath + "/hip.bc").c_str());
     linkBitCodeFile((libPath + "/ocml.bc").c_str());
     linkBitCodeFile((libPath + "/ockl.bc").c_str());
@@ -4821,11 +4821,7 @@ static void makeBinaryLLVMForHIP(const std::string& artifactFilename,
   std::string inputs = "-inputs=/dev/null";
   std::string outputs = "-outputs=" + fatbinFilename;
 #endif
-  auto sdkString = std::string(gGpuSdkPath);
-  // check file exists, maybe use alternate path (say if spack installed)
-  std::string lldBin = pathExists((sdkString + "/llvm/bin/lld").c_str()) ?
-                       sdkString + "/llvm/bin/lld"  :
-                       sdkString + "/bin/lld";
+  std::string lldBin = CHPL_ROCM_LLVM_PATH + std::string("/bin/lld");
   for (auto& gpuArch : gpuArches) {
     std::string gpuObject = gpuObjFilename + "_" + gpuArch + ".o";
     std::string gpuOut = outFilenamePrefix + "_" + gpuArch + ".out";
@@ -5051,7 +5047,8 @@ void makeBinaryLLVM(void) {
         gpuArgs += " -Wno-unknown-cuda-version";
       }
       else if (getGpuCodegenType() == GpuCodegenType::GPU_CG_AMD_HIP) {
-        curPath = addToPATH(gGpuSdkPath + std::string("/llvm/bin"));
+        // use the AMD LLVM path, use separate var
+        curPath = addToPATH(CHPL_ROCM_LLVM_PATH + std::string("/bin"));
       }
     }
 

diff --git a/compiler/main/driver.cpp b/compiler/main/driver.cpp
@@ -130,6 +130,8 @@ const char* CHPL_TARGET_BUNDLED_LINK_ARGS = NULL;
 const char* CHPL_TARGET_SYSTEM_LINK_ARGS = NULL;
 
 const char* CHPL_CUDA_LIBDEVICE_PATH = NULL;
+const char* CHPL_ROCM_LLVM_PATH = NULL;
+const char* CHPL_ROCM_AMDGCN_PATH = NULL;
 const char* CHPL_GPU = NULL;
 const char* CHPL_GPU_ARCH = NULL;
 
@@ -1824,14 +1826,16 @@ static void setChapelEnvs() {
 
   if (usingGpuLocaleModel()) {
     CHPL_CUDA_LIBDEVICE_PATH = envMap["CHPL_CUDA_LIBDEVICE_PATH"];
+    CHPL_ROCM_LLVM_PATH = envMap["CHPL_ROCM_LLVM_PATH"];
+    CHPL_ROCM_AMDGCN_PATH = envMap["CHPL_ROCM_AMDGCN_PATH"];
     CHPL_GPU= envMap["CHPL_GPU"];
     CHPL_GPU_ARCH = envMap["CHPL_GPU_ARCH"];
     switch (getGpuCodegenType()) {
       case GpuCodegenType::GPU_CG_NVIDIA_CUDA:
         gGpuSdkPath = envMap["CHPL_CUDA_PATH"];
         break;
       case GpuCodegenType::GPU_CG_AMD_HIP:
-        gGpuSdkPath = envMap["CHPL_ROCM_BITCODE_PATH"];
+        gGpuSdkPath = envMap["CHPL_ROCM_PATH"];
         break;
       case GpuCodegenType::GPU_CG_CPU:
         gGpuSdkPath = "";

diff --git a/modules/standard/ChplConfig.chpl b/modules/standard/ChplConfig.chpl
@@ -189,6 +189,11 @@ module ChplConfig {
   param CHPL_GPU:string;
   CHPL_GPU = __primitive("get compiler variable", "CHPL_GPU");
 
+  @chpldoc.nodoc
+  @unstable("'ChplConfig.CHPL_GPU_SDK_VERSION' is unstable and may be replaced with a different way to access this information in the future")
+  param CHPL_GPU_SDK_VERSION:string;
+  CHPL_GPU_SDK_VERSION = __primitive("get compiler variable", "CHPL_GPU_SDK_VERSION");
+
   @chpldoc.nodoc
   @unstable("'ChplConfig.CHPL_LIB_PIC' is unstable and may be replaced with a different way to access this information in the future")
   param CHPL_LIB_PIC: string;

diff --git a/runtime/include/gpu/amd/chpl-gpu-dev-reduce.h b/runtime/include/gpu/amd/chpl-gpu-dev-reduce.h
@@ -25,7 +25,6 @@
 #include "chpltypes.h"
 #include <hip/hip_common.h>
 #include <hip/hip_runtime.h>
-#include "gpu/amd/rocm-version.h"
 #include "gpu/amd/rocm-utils.h"
 
 #if ROCM_VERSION_MAJOR >= 5

diff --git a/runtime/include/gpu/amd/rocm-version.h b/runtime/include/gpu/amd/rocm-version.h
diff --git a/runtime/src/gpu/amd/Makefile.share b/runtime/src/gpu/amd/Makefile.share
@@ -28,4 +28,4 @@ RUNTIME_CFLAGS += -Wno-strict-prototypes
 
 $(RUNTIME_OBJ_DIR)/gpu-amd-cub.o: gpu-amd-cub.cc \
                                          $(RUNTIME_OBJ_DIR_STAMP)
-	PATH=$(PATH):$(CHPL_MAKE_ROCM_PATH)/llvm/bin $(CXX) -c -std=c++17 $(RUNTIME_CXXFLAGS) $(RUNTIME_INCLS) -o $@ $<
+	PATH=$(PATH):$(CHPL_MAKE_ROCM_LLVM_PATH)/bin $(CXX) -c -std=c++17 $(RUNTIME_CXXFLAGS) $(RUNTIME_INCLS) -o $@ $<
diff --git a/runtime/src/gpu/amd/gpu-amd-cub.cc b/runtime/src/gpu/amd/gpu-amd-cub.cc
@@ -21,7 +21,6 @@
 
 
 #include <hip/hip_common.h>
-#include "gpu/amd/rocm-version.h"
 
 #if ROCM_VERSION_MAJOR >= 5
 // if we include this all the time, we get unused function errors

diff --git a/runtime/src/gpu/amd/gpu-amd.c b/runtime/src/gpu/amd/gpu-amd.c
@@ -30,7 +30,6 @@
 #include "chpl-env-gen.h"
 #include "chpl-linefile-support.h"
 #include "gpu/amd/rocm-utils.h"
-#include "gpu/amd/rocm-version.h"
 #include "chpl-topo.h"
 
 #include <assert.h>

diff --git a/test/compflags/albrecht/chplenv/chplenv.chpl b/test/compflags/albrecht/chplenv/chplenv.chpl
@@ -8,6 +8,7 @@ writeln("CHPL_TARGET_ARCH=",CHPL_TARGET_ARCH);
 writeln("CHPL_TARGET_CPU=",CHPL_TARGET_CPU);
 writeln("CHPL_LOCALE_MODEL=",CHPL_LOCALE_MODEL);
 writeln("CHPL_GPU=", CHPL_GPU);
+writeln("CHPL_GPU_SDK_VERSION=", CHPL_GPU_SDK_VERSION);
 writeln("CHPL_GPU_MEM_STRATEGY=", CHPL_GPU_MEM_STRATEGY);
 writeln("CHPL_COMM=",CHPL_COMM);
 writeln("CHPL_COMM_SUBSTRATE=",CHPL_COMM_SUBSTRATE);