Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introducing 'mix' versions #65

Open
wants to merge 8 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@ jobs:
io_library_flag: ''
build_flags: '--with-gpu --with-loki --with-cuda'
ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-cuda-' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE
- arch: nvhpc/21.9
nvhpc_version: 21.9
io_library_flag: ''
build_flags: '--with-mix --cloudsc-gpu-offload ACC --cloudsc-gpu-lang CUDA'
ctest_exclude_pattern: '-mix' # GPU variants don't work on CPU runners
- arch: nvhpc/21.9
nvhpc_version: 21.9
io_library_flag: ''
Expand Down Expand Up @@ -104,6 +109,11 @@ jobs:
io_library_flag: ''
build_flags: '--with-gpu --with-loki --with-cuda'
ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-loki-sca|-cuda-' # GPU variants don't work on CPU runners, loki-c and loki-sca variant causes SIGFPE
- arch: nvhpc/23.5
nvhpc_version: 23.5
io_library_flag: ''
build_flags: '--with-mix --cloudsc-gpu-offload ACC --cloudsc-gpu-lang CUDA'
ctest_exclude_pattern: '-mix' # GPU variants don't work on CPU runners
- arch: nvhpc/23.5
nvhpc_version: 23.5
io_library_flag: ''
Expand Down
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,13 @@ Balthasar Reuter ([email protected])
using CUDA-Fortran (CUF). To enable this variant,
a suitable CUDA installation is required and the `--with-cuda` flag
needs to be passed at the build stage.
- **dwarf-cloudsc-mix**: GPU-enabled SCC-K-CACHING with data
offload via *OpenMP*/*OpenACC* (specify via
`--cloudsc-gpu-offload=OMP|ACC`, default: `ACC`) and a low-level C-style kernel
implementation via *CUDA*/*HIP*/*SYCL* (specify via
`--cloudsc-gpu-lang=CUDA|HIP|SYCL`, default: `CUDA`). The implementation
consists of a Fortran driver, C-binding/interface and low-level C-style kernel
implementation. To enable this variant, use `--with-mix`.
- **CUDA C prototypes**: To enable these variants, a suitable
CUDA installation is required and the `--with-cuda` flag needs
to be pased at the build stage.
Expand Down
51 changes: 51 additions & 0 deletions arch/ecmwf/hpc2020/nvhpc/23.7/env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# (C) Copyright 1988- ECMWF.
#
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.

# Source me to get the correct configure/build/run environment

# Store tracing and disable (module is *way* too verbose)
{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null

module_load() {
echo "+ module load $1"
module load $1
}
module_unload() {
echo "+ module unload $1"
module unload $1
}

# Unload all modules to be certain
module_unload nvidia
module_unload intel-mpi
module_unload openmpi
module_unload hpcx-openmpi
module_unload boost
module_unload hdf5
module_unload cmake
module_unload python3
module_unload java

# Load modules
module_load prgenv/nvidia
module_load nvidia/23.7
module_load hpcx-openmpi/2.17.1
module_load hdf5/1.14.3
module_load cmake/3.25.2
module_load python3/3.10.10-01
module_load java/11.0.6

# Increase stack size to maximum
ulimit -S -s unlimited

set -x

# Restore tracing to stored setting
{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null

export ECBUILD_TOOLCHAIN="./toolchain.cmake"
1 change: 1 addition & 0 deletions arch/ecmwf/hpc2020/nvhpc/23.7/toolchain.cmake
13 changes: 13 additions & 0 deletions bundle.yml
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,19 @@ options :
ENABLE_CLOUDSC_GPU_OMP_SCC_STACK=ON
ENABLE_CLOUDSC_GPU_OMP_SCC_K_CACHING=ON

- with-mix:
help: Enable GPU kernel variant based on OpenMP/OpenACC and HIP/CUDA/SYCL
cmake: >
ENABLE_CLOUDSC_MIX=ON

- cloudsc-gpu-offload :
help : "Data offload model for GPU variants. Available options: OMP, ACC"
cmake : CLOUDSC_GPU_OFFLOAD={{value}}

- cloudsc-gpu-lang :
help : "Kernel language for low-level GPU kernel implementations. Available options: CUDA, HIP, SYCL"
cmake : CLOUDSC_GPU_LANG={{value}}

- with-cuda :
help : Enable GPU kernel variants based on CUDA and CUDA-Fortran
cmake : >
Expand Down
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ add_subdirectory(cloudsc_fortran_atlas)
add_subdirectory(cloudsc_pyiface)
add_subdirectory(cloudsc_python)
add_subdirectory(cloudsc_c)
add_subdirectory(cloudsc_mix)
add_subdirectory(cloudsc_cuda)
add_subdirectory(cloudsc_hip)
add_subdirectory(cloudsc_sycl)
Expand Down
137 changes: 137 additions & 0 deletions src/cloudsc_mix/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
# (C) Copyright 1988- ECMWF.
#
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.

# Define this dwarf variant as an ECBuild feature
ecbuild_add_option( FEATURE CLOUDSC_MIX
DESCRIPTION "Build the mix version CLOUDSC (Fortran driver + OpenMP/OpenACC + HIP/CUDA/SYCL" DEFAULT OFF
CONDITION Serialbox_FOUND OR HDF5_FOUND
)

if( HAVE_CLOUDSC_MIX )

# Define integer IDs corresponding to every language choice
## kernel language
set(CUDA_LANG "1")
set(HIP_LANG "2")
set(SYCL_LANG "3")
## offload model
set(ACC_OFFLOAD "1")
set(OMP_OFFLOAD "2")

# Select offload model
if( NOT DEFINED CLOUDSC_GPU_OFFLOAD )
set(CLOUDSC_GPU_OFFLOAD "ACC")
endif()
if( NOT ${CLOUDSC_GPU_OFFLOAD} MATCHES "ACC|OMP" )
message(FATAL_ERROR "CLOUDSC_GPU_OFFLOAD: '${CLOUDSC_GPU_OFFLOAD}' is not a valid option! (Allowed: 'ACC' and 'OMP')")
endif()

set(GPU_OFFLOAD ${${CLOUDSC_GPU_OFFLOAD}_OFFLOAD})

# Select kernel language
if( NOT DEFINED CLOUDSC_GPU_LANG )
set(CLOUDSC_GPU_LANG "CUDA")
endif()
if( NOT ${CLOUDSC_GPU_LANG} MATCHES "CUDA|HIP|SYCL" )
message(FATAL_ERROR "CLOUDSC_GPU_LANG: '${CLOUDSC_GPU_LANG}' is not a valid option! (Allowed: 'CUDA', 'HIP' and 'SYCL')")
endif()

set(GPU_LANG ${${CLOUDSC_GPU_LANG}_LANG})

if (CLOUDSC_GPU_LANG STREQUAL "CUDA")
enable_language(CUDA)
set(CMAKE_CUDA_FLAGS "-O3 -ffast-math")
elseif (CLOUDSC_GPU_LANG STREQUAL "HIP")
if(NOT DEFINED ROCM_PATH)
if(DEFINED ENV{ROCM_PATH})
set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Path to which ROCM has been installed")
else()
set(ROCM_PATH "/opt/rocm" CACHE PATH "Path to which ROCM has been installed")
endif()
endif()
enable_language(HIP)
find_package(hip REQUIRED)
set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} -O3 -ffast-math")
elseif (CLOUDSC_GPU_LANG STREQUAL "SYCL")
enable_language(CXX)
set(CMAKE_CXX_STANDARD 17)
endif()

ecbuild_add_library(
TARGET dwarf-cloudsc-gpu-lib
INSTALL_HEADERS LISTED
SOURCES
yoecldp_c.h
cloudsc_c_k_caching.cpp
cloudsc_c_k_caching_mod.F90
PUBLIC_INCLUDES
$<INSTALL_INTERFACE:include>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
PUBLIC_LIBS
cloudsc-common-lib
)

# Provide definitions to target
foreach(_def CUDA_LANG HIP_LANG SYCL_LANG ACC_OFFLOAD OMP_OFFLOAD GPU_LANG GPU_OFFLOAD)
target_compile_definitions(dwarf-cloudsc-gpu-lib PUBLIC ${_def}=${${_def}})
endforeach()

target_include_directories(dwarf-cloudsc-gpu-lib PUBLIC $<INSTALL_INTERFACE:include> $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>)

# CUDA specific
if (CLOUDSC_GPU_LANG STREQUAL "CUDA")
set_source_files_properties(cloudsc_c_k_caching.cpp PROPERTIES LANGUAGE CUDA)
cloudsc_add_compile_options(
SOURCES cloudsc_c_k_caching_mod.F90 dwarf_cloudsc_gpu.F90 cloudsc_driver_mod.F90
FLAGS "-cuda") # -gpu=maxrregcount ...
target_include_directories(
dwarf-cloudsc-gpu-lib
PUBLIC
${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}
)
if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
target_compile_options(dwarf-cloudsc-gpu-lib PRIVATE $<COMPILE_LANGUAGE:CUDA>)
else()
target_compile_options(dwarf-cloudsc-gpu-lib PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
-gencode arch=compute_${CMAKE_CUDA_ARCHITECTURES},code=sm_${CMAKE_CUDA_ARCHITECTURES}>)
endif()
set_target_properties( dwarf-cloudsc-gpu-lib PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
# HIP specific
elseif (CLOUDSC_GPU_LANG STREQUAL "HIP")
set_source_files_properties(cloudsc_c_k_caching.cpp PROPERTIES LANGUAGE HIP)
target_compile_options(dwarf-cloudsc-gpu-lib PRIVATE --offload-arch=gfx90a)
# SYCL specific
elseif (CLOUDSC_GPU_LANG STREQUAL "SYCL")

else ()

endif()

ecbuild_add_executable(
TARGET dwarf-cloudsc-mix
SOURCES dwarf_cloudsc_gpu.F90 cloudsc_driver_mod.F90
LIBS dwarf-cloudsc-gpu-lib
)

set_target_properties(dwarf-cloudsc-mix PROPERTIES LINKER_LANGUAGE Fortran)

# CUDA specific
if (CLOUDSC_GPU_LANG STREQUAL "CUDA")
set_target_properties( dwarf-cloudsc-mix PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
target_link_options(dwarf-cloudsc-mix PRIVATE "-cuda")
# HIP specific 
elseif (CLOUDSC_GPU_LANG STREQUAL "HIP")
# SYCL specific
elseif (CLOUDSC_GPU_LANG STREQUAL "SYCL")
else ()
endif()

execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink
${CMAKE_CURRENT_SOURCE_DIR}/../../data ${CMAKE_CURRENT_BINARY_DIR}/../../../data )

endif()
Loading
Loading