Skip to content

Commit

Permalink
Add new configs for gpu on hpe cray ex (chapel-lang#26060)
Browse files Browse the repository at this point in the history
This adds the following new configurations scripts for testing gpus on
HPE Cray EX systems:
- `test-gpu-ex-cpu` (analogous to `test-gpu-cpu` for Cray CS)
- `test-gpu-ex-cuda-12.interop` (analogous to `test-gpu-cuda.interop`
for Cray CS)
- `test-gpu-ex-cuda-12.specialization` (analogous to
`test-gpu-cuda.specialization` for Cray CS)
- `test-perf.gpu-ex-cuda-12.um` (analogous to `test-perf.gpu-cuda.um`
for Cray CS)

[Reviewed by @riftEmber and @e-kayrakli , thanks!]
  • Loading branch information
ShreyasKhandekar authored Oct 9, 2024
2 parents 75028c6 + c49231d commit 9547ee1
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 1 deletion.
14 changes: 14 additions & 0 deletions util/cron/test-gpu-ex-cpu.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/usr/bin/env bash
#
# GPU native testing on a Cray EX (using none for CHPL_COMM)

CWD=$(cd $(dirname ${BASH_SOURCE[0]}) ; pwd)
source $CWD/common-native-gpu.bash
source $CWD/common-hpe-cray-ex.bash

export CHPL_GPU=cpu
export CHPL_COMM=none
export CHPL_GPU_NO_CPU_MODE_WARNING=y

export CHPL_NIGHTLY_TEST_CONFIG_NAME="gpu-ex-cpu"
$CWD/nightly -cron ${nightly_args}
27 changes: 27 additions & 0 deletions util/cron/test-gpu-ex-cuda-12.interop.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/usr/bin/env bash
#
# GPU native testing on a Cray EX (using none for CHPL_COMM)

CWD=$(cd $(dirname ${BASH_SOURCE[0]}) ; pwd)
source $CWD/common.bash
source $CWD/common-hpe-cray-ex.bash


# We need 12.4 for the stream test because the CUDA driver on pinoak
# only supports PTX for 12.4, until the driver is updated, we need to
# stick with 12.4 instead of 12.5
module load cuda/12.4 # default is CUDA 12.5

# We need cublas for the cublas interop test, but since we are using 12.4 above
# pinoak doesn't have the cublas library for 12.4, so we need to use the cublas
# from 12.5 (which is compatible across minor versions)
# This can be removed once we use CUDA 12.5
export CHPL_LIB_PATH="/opt/nvidia/hpc_sdk/Linux_x86_64/24.7/math_libs/lib64:$CHPL_LIB_PATH"

export CHPL_LLVM=bundled # Using bundled LLVM since that's safer
export CHPL_TEST_GPU=true
export CHPL_LAUNCHER_PARTITION=allgriz
export CHPL_NIGHTLY_TEST_DIRS="gpu/interop/"

export CHPL_NIGHTLY_TEST_CONFIG_NAME="gpu-ex-cuda-12.interop"
$CWD/nightly -cron ${nightly_args}
21 changes: 21 additions & 0 deletions util/cron/test-gpu-ex-cuda-12.specialization.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/usr/bin/env bash
#
# GPU native testing on a Cray EX (using none for CHPL_COMM)

CWD=$(cd $(dirname ${BASH_SOURCE[0]}) ; pwd)
source $CWD/common-native-gpu.bash
source $CWD/common-hpe-cray-ex.bash

module load cudatoolkit # default is CUDA 12

export CHPL_LLVM=bundled # Using bundled LLVM since that's safer
export CHPL_COMM=none
export CHPL_LOCALE_MODEL=gpu
export CHPL_LAUNCHER_PARTITION=allgriz
export CHPL_TEST_GPU=true
export CHPL_GPU=nvidia # amd is also detected automatically

export CHPL_GPU_SPECIALIZATION=y

export CHPL_NIGHTLY_TEST_CONFIG_NAME="gpu-ex-cuda-12.specialization"
$CWD/nightly -cron ${nightly_args}
2 changes: 1 addition & 1 deletion util/cron/test-perf.gpu-ex-cuda-12.bash
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ source $CWD/common-hpe-cray-ex.bash

module load cudatoolkit # default is CUDA 12

export CHPL_LLVM=bundled # CUDA 12 is only supported with bundled LLVM
export CHPL_LLVM=bundled # Using bundled LLVM since that's safer
export CHPL_COMM=none
export CHPL_LOCALE_MODEL=gpu
export CHPL_LAUNCHER_PARTITION=allgriz
Expand Down
30 changes: 30 additions & 0 deletions util/cron/test-perf.gpu-ex-cuda-12.um.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/usr/bin/env bash
#
# Run GPU performance testing on a Cray EX

CWD=$(cd $(dirname $0) ; pwd)
source $CWD/common-native-gpu.bash
source $CWD/common-hpe-cray-ex.bash

module load cudatoolkit # default is CUDA 12

export CHPL_LLVM=bundled # Using bundled LLVM since that's safer
export CHPL_COMM=none
export CHPL_LOCALE_MODEL=gpu
export CHPL_LAUNCHER_PARTITION=allgriz
export CHPL_GPU=nvidia # amd is detected automatically
export CHPL_GPU_MEM_STRATEGY=unified_memory

export CHPL_NIGHTLY_TEST_CONFIG_NAME="perf.gpu-ex-cuda-12.um"

export CHPL_TEST_PERF_CONFIG_NAME="1-node-a100" # pinoak has ampere GPUs
source $CWD/common-native-gpu-perf.bash
# make sure this comes after setting SUBDIR (set by native-gpu-perf) and
# CONFIG_NAME
source $CWD/common-perf.bash

SHORT_NAME=um
nightly_args="${nightly_args} -performance-description $SHORT_NAME -performance-configs default:v,$SHORT_NAME:v -sync-dir-suffix $SHORT_NAME"
nightly_args="${nightly_args} -startdate 10/10/24"

$CWD/nightly -cron ${nightly_args}

0 comments on commit 9547ee1

Please sign in to comment.