Skip to content

Commit

Permalink
Add new configs for gpu on hpe cray ex
Browse files Browse the repository at this point in the history
This adds the following new configurations scripts
for testing gpus on HPE Cray EX systems:
- `test-gpu-ex-cpu` (analogous to `test-gpu-cpu` for Cray CS)
- `test-gpu-ex-cuda-12.interop` (analogous to
  `test-gpu-cuda.interop` for Cray CS)
- `test-gpu-ex-cuda-12.specialization` (analogous to
  `test-gpu-cuda.specialization` for Cray CS)
- `test-perf.gpu-ex-cuda-12.um` (analogous to
  `test-perf.gpu-cuda.um` for Cray CS)

Signed-off-by: Shreyas Khandekar <[email protected]>
  • Loading branch information
ShreyasKhandekar committed Oct 8, 2024
1 parent 2605b1f commit f008983
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 0 deletions.
14 changes: 14 additions & 0 deletions util/cron/test-gpu-ex-cpu.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/usr/bin/env bash
#
# GPU native testing on a Cray EX (using none for CHPL_COMM)

CWD=$(cd $(dirname ${BASH_SOURCE[0]}) ; pwd)
source $CWD/common-native-gpu.bash
source $CWD/common-hpe-cray-ex.bash

export CHPL_GPU=cpu
export CHPL_COMM=none
export CHPL_GPU_NO_CPU_MODE_WARNING=y

export CHPL_NIGHTLY_TEST_CONFIG_NAME="gpu-ex-cpu"
$CWD/nightly -cron ${nightly_args}
1 change: 1 addition & 0 deletions util/cron/test-gpu-ex-cuda-12.bash
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ export CHPL_LLVM=bundled # CUDA 12 is only supported with bundled LLVM
export CHPL_COMM=none
export CHPL_LOCALE_MODEL=gpu
export CHPL_LAUNCHER_PARTITION=allgriz
export CHPL_TEST_GPU=true
export CHPL_GPU=nvidia # amd is also detected automatically

export CHPL_NIGHTLY_TEST_CONFIG_NAME="gpu-ex-cuda-12"
Expand Down
27 changes: 27 additions & 0 deletions util/cron/test-gpu-ex-cuda-12.interop.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/usr/bin/env bash
#
# GPU native testing on a Cray EX (using none for CHPL_COMM)

CWD=$(cd $(dirname ${BASH_SOURCE[0]}) ; pwd)
source $CWD/common.bash
source $CWD/common-hpe-cray-ex.bash


# We need 12.4 for the stream test because the CUDA driver on pinoak
# only supports PTX for 12.4, untill the driver is updated, we need to
# stick with 12.4 instead of 12.5
module load cuda/12.4 # default is CUDA 12

# We need cublas for the cublas interop test, but since we are using 12.4 above
# pinoak doesn't have the cublas library for 12.4, so we need to use the cublas
# from 12.5 (which is compatible across minor versions)
# This can be removed once we use CUDA 12.5
export CHPL_LIB_PATH="/opt/nvidia/hpc_sdk/Linux_x86_64/24.7/math_libs/lib64"

export CHPL_LLVM=bundled # CUDA 12 is only supported with bundled LLVM
export CHPL_TEST_GPU=true
export CHPL_LAUNCHER_PARTITION=allgriz
export CHPL_NIGHTLY_TEST_DIRS="gpu/interop/"

export CHPL_NIGHTLY_TEST_CONFIG_NAME="gpu-ex-cuda-12.interop"
$CWD/nightly -cron ${nightly_args}
21 changes: 21 additions & 0 deletions util/cron/test-gpu-ex-cuda-12.specialization.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/usr/bin/env bash
#
# GPU native testing on a Cray EX (using none for CHPL_COMM)

CWD=$(cd $(dirname ${BASH_SOURCE[0]}) ; pwd)
source $CWD/common-native-gpu.bash
source $CWD/common-hpe-cray-ex.bash

module load cudatoolkit # default is CUDA 12

export CHPL_LLVM=bundled # CUDA 12 is only supported with bundled LLVM
export CHPL_COMM=none
export CHPL_LOCALE_MODEL=gpu
export CHPL_LAUNCHER_PARTITION=allgriz
export CHPL_TEST_GPU=true
export CHPL_GPU=nvidia # amd is also detected automatically

export CHPL_GPU_SPECIALIZATION=y

export CHPL_NIGHTLY_TEST_CONFIG_NAME="gpu-ex-cuda-12.specialization"
$CWD/nightly -cron ${nightly_args}
30 changes: 30 additions & 0 deletions util/cron/test-perf.gpu-ex-cuda-12.um.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/usr/bin/env bash
#
# Run GPU performance testing on a Cray EX

CWD=$(cd $(dirname $0) ; pwd)
source $CWD/common-native-gpu.bash
source $CWD/common-hpe-cray-ex.bash

module load cudatoolkit # default is CUDA 12

export CHPL_LLVM=bundled # CUDA 12 is only supported with bundled LLVM
export CHPL_COMM=none
export CHPL_LOCALE_MODEL=gpu
export CHPL_LAUNCHER_PARTITION=allgriz
export CHPL_GPU=nvidia # amd is detected automatically
export CHPL_GPU_MEM_STRATEGY=unified_memory

export CHPL_NIGHTLY_TEST_CONFIG_NAME="perf.gpu-ex-cuda-12.um"

export CHPL_TEST_PERF_CONFIG_NAME="1-node-a100" # pinoak has ampere GPUs
source $CWD/common-native-gpu-perf.bash
# make sure this comes after setting SUBDIR (set by native-gpu-perf) and
# CONFIG_NAME
source $CWD/common-perf.bash

SHORT_NAME=um
nightly_args="${nightly_args} -performance-description $SHORT_NAME -performance-configs default:v,$SHORT_NAME:v -sync-dir-suffix $SHORT_NAME"
nightly_args="${nightly_args} -startdate 10/10/24"

$CWD/nightly -cron ${nightly_args}

0 comments on commit f008983

Please sign in to comment.