Skip to content

Initial GPU port based on CUDA. #22

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -221,3 +221,13 @@ Makefile.in
# /tests/
/patches/*.patch
/src/TAGS

# Eclipse IDE meta files
/.ptp-sync/
.ptp-sync-folder
.cproject
.project
.settings

# Temporary files
*~
8 changes: 7 additions & 1 deletion config/cs_auto_flags.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
# cxxflags_default_prf # Added to $CXXFLAGS for profiling (default: "-g")
# cxxflags_default_omp # Added to $CXXFLAGS for OpenMP (default: "")
# cxxflags_default_std # C++ standard variant (default: "")
# cxxflags_default_cuda_offload # Added to $CXXFLAGS for CUDA Offl. (default: "-std=c++11 -I/usr/local/cuda/include")

# fcflags_default # Base FCFLAGS (default: "")
# fcflags_default_dbg # Added to $FCFLAGS for debugging (default: "-g")
Expand All @@ -55,11 +56,13 @@
# ldflags_default_opt # Added to $LDFLAGS for optimization (default: "-O")
# ldflags_default_prf # Added to $LDFLAGS for profiling (default: "-g")
# ldflags_rpath # Added to $LDFLAGS for shared libs (default: "")
# ldflags_default_cuda_offload # Added to $LDFLAGS for CUDA Offl. (default: "-L/usr/local/cuda/lib64 -L/usr/local/cuda/lib")

# libs_default # Base LIBS (default: "")
# libs_default_dbg # Added to $LIBS for debugging (default: "")
# libs_default_opt # Added to $LIBS for optimization (default: "")
# libs_default_prf # Added to $LIBS for profiling (default: "")
# libs_default_cuda_offload # Added to $LDFLAGS for CUDA Offl. (default: "-lcuda -lcudart")

# Two other environment variable strings are defined, containing possibly
# more detailed compiler information:
Expand Down Expand Up @@ -299,7 +302,6 @@ elif test "x$cs_gcc" = "xclang"; then
cflags_default_dbg="-g -O0"
cflags_default_opt="-O2"
cflags_default_hot="-O3"
cflags_default_omp="-fopenmp=libomp"

# Otherwise, are we using pathcc ?
#---------------------------------
Expand Down Expand Up @@ -531,6 +533,7 @@ if test "x$cs_gxx" = "xg++"; then
cxxflags_default_hot="-O3"
cxxflags_default_omp="-fopenmp"
cxxflags_default_std="-ansi -funsigned-char"
cxxflags_default_cuda_offload="-std=c++11 -I/usr/local/cuda/include"

# Modify default flags on certain systems

Expand Down Expand Up @@ -1057,6 +1060,9 @@ ldflags_default_prf="-g"

if test "x$cs_linker_set" != "xyes" ; then

ldflags_default_cuda_offload="-L/usr/local/cuda/lib64 -L/usr/local/cuda/lib"
libs_default_cuda_offload="-lcuda -lcudart"

case "$host_os" in

linux*)
Expand Down
61 changes: 61 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,62 @@ if test "x$cs_have_openmp_f" = "xyes" ; then
AC_FC_LIBRARY_LDFLAGS
fi

#------------------------------------------------------------------------------
# Determine CUDA support
#------------------------------------------------------------------------------


cs_have_cuda_offload=no

AC_ARG_ENABLE(cuda-offload,
[AS_HELP_STRING([--enable-cuda-offload], [enable CUDA offload])],
[
case "${enableval}" in
yes) cs_have_cuda_offload=yes ;;
no) cs_have_cuda_offload=no ;;
*) AC_MSG_ERROR([bad value ${enableval} for --enable-cuda-offload]) ;;
esac
],
[ cs_have_cuda_offload=no ]
)

AC_MSG_CHECKING([for CUDA support])
AC_MSG_RESULT($cs_have_cuda_offload)

if test "x$cs_have_cuda_offload" = "xyes" ; then

saved_CXXFLAGS="$CXXFLAGS"
saved_LDFLAGS="$LDFLAGS"

# Select compute capabilities we want to support.
CUDA_COMPUTE_CAPABILITIES=""
CUDA_COMPUTE_CAPABILITIES="${CUDA_COMPUTE_CAPABILITIES} -gencode arch=compute_35,code=sm_35"
CUDA_COMPUTE_CAPABILITIES="${CUDA_COMPUTE_CAPABILITIES} -gencode arch=compute_37,code=sm_37"
CUDA_COMPUTE_CAPABILITIES="${CUDA_COMPUTE_CAPABILITIES} -gencode arch=compute_60,code=sm_60"
CUDA_COMPUTE_CAPABILITIES="${CUDA_COMPUTE_CAPABILITIES} -gencode arch=compute_70,code=sm_70"

# Add CUDA offload default options
CXXFLAGS="${CXXFLAGS} ${cxxflags_default_cuda_offload}"
CUDAFLAGS="-DHAVE_CONFIG_H -I../../ ${CUDA_COMPUTE_CAPABILITIES} --maxrregcount=64 -Xptxas -v "

if test "x$debug" = xyes; then
CUDAFLAGS="${CUDAFLAGS} --device-debug"
fi
# Make sure we link against CUDA libraries
LDFLAGS="${LDFLAGS} ${ldflags_default_cuda_offload}"
LIBS="${LIBS} ${libs_default_cuda_offload}"

# Wrap C++ compilers around nvcc
CUDACC="nvcc -ccbin $CXX"
AC_SUBST(CUDACC)
AC_SUBST(CUDAFLAGS)
AC_DEFINE([HAVE_CUDA_OFFLOAD], 1, [CUDA Offload Support])
fi

AC_SUBST(cs_have_cuda_offload)
AM_CONDITIONAL(HAVE_CUDA_OFFLOAD, [test "${cs_have_cuda_offload}" = yes])


#------------------------------------------------------------------------------
# Checks for Python support.
#------------------------------------------------------------------------------
Expand Down Expand Up @@ -694,6 +750,10 @@ elif test x$cs_have_catalyst = xyes ; then
cs_have_link_cxx=yes
fi
fi
# CUDA files are interpreted as C++ files.
if test x$cs_have_cuda_offload = xyes ; then
cs_have_link_cxx=yes
fi
if test x$user_CS_LD != "x" ; then
CS_LD=$user_CS_LD
elif test $cs_have_link_cxx = yes ; then
Expand Down Expand Up @@ -1161,6 +1221,7 @@ AC_CONFIG_FILES([Makefile po/Makefile.in
src/pprt/Makefile src/lagr/Makefile src/rayt/Makefile
src/turb/Makefile src/alge/Makefile src/mesh/Makefile
src/user/Makefile src/user_examples/Makefile
src/cuda/Makefile
gui/Makefile gui/Base/Makefile gui/Pages/Makefile
gui/studymanager_gui/Makefile gui/trackcvg/Makefile
salome/fsi_coupling/Makefile
Expand Down
2 changes: 1 addition & 1 deletion src/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ SUBDIRS = . bft mei
if HAVE_BACKEND
SUBDIRS += \
fvm gui \
base cdo pprt alge mesh turb darc \
base cuda cdo pprt alge mesh turb darc \
atmo cfbl cogz comb ctwr elec lagr rayt \
user user_examples apps
endif
Expand Down
1 change: 1 addition & 0 deletions src/alge/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ endif

AM_CPPFLAGS = \
-I$(top_srcdir)/src/bft \
-I$(top_srcdir)/src/cuda \
-I$(top_srcdir)/src/fvm \
-I$(top_srcdir)/src/mei \
-I$(top_srcdir)/src/base \
Expand Down
16 changes: 16 additions & 0 deletions src/alge/cs_blas.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
*----------------------------------------------------------------------------*/

#include "cs_base.h"
#include "cs_cuda.h"
#include "cs_parall.h"

/*----------------------------------------------------------------------------
Expand Down Expand Up @@ -299,6 +300,11 @@ _cs_dot_xx_superblock(cs_lnum_t n,
{
double dot_xx = 0.0;

# ifdef HAVE_CUDA_OFFLOAD
if (cs_cuda_dot_product_xx(&dot_xx, x, n))
return dot_xx;
# endif

# pragma omp parallel reduction(+:dot_xx) if (n > CS_THR_MIN)
{
cs_lnum_t s_id, e_id;
Expand Down Expand Up @@ -360,6 +366,11 @@ _cs_dot_xx_xy_superblock(cs_lnum_t n,
double *xx,
double *xy)
{
# ifdef HAVE_CUDA_OFFLOAD
if (cs_cuda_dot_product_xx_xy(xx, xy, x, y,n))
return;
# endif

double dot_xx = 0.0, dot_xy = 0.0;

# pragma omp parallel reduction(+:dot_xx, dot_xy) if (n > CS_THR_MIN)
Expand Down Expand Up @@ -433,6 +444,11 @@ _cs_dot_xy_yz_superblock(cs_lnum_t n,
double *xy,
double *yz)
{
# ifdef HAVE_CUDA_OFFLOAD
if (cs_cuda_dot_product_xy_yz(xy, yz, x, y, z, n))
return;
# endif

double dot_xy = 0.0, dot_yz = 0.0;

# pragma omp parallel reduction(+:dot_xy, dot_yz) if (n > CS_THR_MIN)
Expand Down
Loading