code-saturne · sfantao · Oct 31, 2018
diff --git a/.gitignore b/.gitignore
@@ -221,3 +221,13 @@ Makefile.in
 # /tests/
 /patches/*.patch
 /src/TAGS
+
+# Eclipse IDE meta files
+/.ptp-sync/
+.ptp-sync-folder
+.cproject
+.project
+.settings
+
+# Temporary files
+*~
diff --git a/config/cs_auto_flags.sh b/config/cs_auto_flags.sh
@@ -42,6 +42,7 @@
 # cxxflags_default_prf   # Added to $CXXFLAGS for profiling    (default: "-g")
 # cxxflags_default_omp   # Added to $CXXFLAGS for OpenMP       (default: "")
 # cxxflags_default_std   # C++ standard variant                (default: "")
+# cxxflags_default_cuda_offload # Added to $CXXFLAGS for CUDA Offl. (default: "-std=c++11 -I/usr/local/cuda/include")
 
 # fcflags_default        # Base FCFLAGS                       (default: "")
 # fcflags_default_dbg    # Added to $FCFLAGS for debugging    (default: "-g")
@@ -55,11 +56,13 @@
 # ldflags_default_opt    # Added to $LDFLAGS for optimization (default: "-O")
 # ldflags_default_prf    # Added to $LDFLAGS for profiling    (default: "-g")
 # ldflags_rpath          # Added to $LDFLAGS for shared libs  (default: "")
+# ldflags_default_cuda_offload # Added to $LDFLAGS for CUDA Offl. (default: "-L/usr/local/cuda/lib64 -L/usr/local/cuda/lib")
 
 # libs_default           # Base LIBS                          (default: "")
 # libs_default_dbg       # Added to $LIBS for debugging       (default: "")
 # libs_default_opt       # Added to $LIBS for optimization    (default: "")
 # libs_default_prf       # Added to $LIBS for profiling       (default: "")
+# libs_default_cuda_offload # Added to $LDFLAGS for CUDA Offl. (default: "-lcuda -lcudart")
 
 # Two other environment variable strings are defined, containing possibly
 # more detailed compiler information:
@@ -299,7 +302,6 @@ elif test "x$cs_gcc" = "xclang"; then
   cflags_default_dbg="-g -O0"
   cflags_default_opt="-O2"
   cflags_default_hot="-O3"
-  cflags_default_omp="-fopenmp=libomp"
 
 # Otherwise, are we using pathcc ?
 #---------------------------------
@@ -531,6 +533,7 @@ if test "x$cs_gxx" = "xg++"; then
   cxxflags_default_hot="-O3"
   cxxflags_default_omp="-fopenmp"
   cxxflags_default_std="-ansi -funsigned-char"
+  cxxflags_default_cuda_offload="-std=c++11 -I/usr/local/cuda/include"
 
   # Modify default flags on certain systems
 
@@ -1057,6 +1060,9 @@ ldflags_default_prf="-g"
 
 if test "x$cs_linker_set" != "xyes" ; then
 
+  ldflags_default_cuda_offload="-L/usr/local/cuda/lib64 -L/usr/local/cuda/lib"
+  libs_default_cuda_offload="-lcuda -lcudart"
+
   case "$host_os" in
 
     linux*)

diff --git a/configure.ac b/configure.ac
@@ -511,6 +511,62 @@ if test "x$cs_have_openmp_f" = "xyes" ; then
   AC_FC_LIBRARY_LDFLAGS
 fi
 
+#------------------------------------------------------------------------------
+# Determine CUDA support
+#------------------------------------------------------------------------------
+
+
+cs_have_cuda_offload=no
+
+AC_ARG_ENABLE(cuda-offload,
+  [AS_HELP_STRING([--enable-cuda-offload], [enable CUDA offload])],
+  [
+    case "${enableval}" in
+      yes) cs_have_cuda_offload=yes ;;
+      no)  cs_have_cuda_offload=no ;;
+      *)   AC_MSG_ERROR([bad value ${enableval} for --enable-cuda-offload]) ;;
+    esac
+  ],
+  [ cs_have_cuda_offload=no ]
+)
+
+AC_MSG_CHECKING([for CUDA support])
+AC_MSG_RESULT($cs_have_cuda_offload)
+
+if test "x$cs_have_cuda_offload" = "xyes" ; then
+
+  saved_CXXFLAGS="$CXXFLAGS"
+  saved_LDFLAGS="$LDFLAGS"
+
+  # Select compute capabilities we want to support.
+  CUDA_COMPUTE_CAPABILITIES=""
+  CUDA_COMPUTE_CAPABILITIES="${CUDA_COMPUTE_CAPABILITIES} -gencode arch=compute_35,code=sm_35"
+  CUDA_COMPUTE_CAPABILITIES="${CUDA_COMPUTE_CAPABILITIES} -gencode arch=compute_37,code=sm_37"
+  CUDA_COMPUTE_CAPABILITIES="${CUDA_COMPUTE_CAPABILITIES} -gencode arch=compute_60,code=sm_60"
+  CUDA_COMPUTE_CAPABILITIES="${CUDA_COMPUTE_CAPABILITIES} -gencode arch=compute_70,code=sm_70" 
+
+  # Add CUDA offload default options 
+  CXXFLAGS="${CXXFLAGS} ${cxxflags_default_cuda_offload}"
+  CUDAFLAGS="-DHAVE_CONFIG_H -I../../ ${CUDA_COMPUTE_CAPABILITIES} --maxrregcount=64 -Xptxas -v "
+
+  if test "x$debug" = xyes; then
+    CUDAFLAGS="${CUDAFLAGS} --device-debug"
+  fi
+  # Make sure we link against CUDA libraries
+  LDFLAGS="${LDFLAGS} ${ldflags_default_cuda_offload}"
+  LIBS="${LIBS} ${libs_default_cuda_offload}"
+
+  # Wrap C++ compilers around nvcc
+  CUDACC="nvcc -ccbin $CXX"
+  AC_SUBST(CUDACC)
+  AC_SUBST(CUDAFLAGS)  
+  AC_DEFINE([HAVE_CUDA_OFFLOAD], 1, [CUDA Offload Support])
+fi
+
+AC_SUBST(cs_have_cuda_offload)
+AM_CONDITIONAL(HAVE_CUDA_OFFLOAD, [test "${cs_have_cuda_offload}" = yes])
+
+
 #------------------------------------------------------------------------------
 # Checks for Python support.
 #------------------------------------------------------------------------------
@@ -694,6 +750,10 @@ elif test x$cs_have_catalyst = xyes ; then
     cs_have_link_cxx=yes
   fi
 fi
+# CUDA files are interpreted as C++ files.
+if test x$cs_have_cuda_offload = xyes ; then
+  cs_have_link_cxx=yes
+fi
 if test x$user_CS_LD != "x" ; then
   CS_LD=$user_CS_LD
 elif test $cs_have_link_cxx = yes ; then
@@ -1161,6 +1221,7 @@ AC_CONFIG_FILES([Makefile po/Makefile.in
                  src/pprt/Makefile src/lagr/Makefile src/rayt/Makefile
                  src/turb/Makefile src/alge/Makefile src/mesh/Makefile
                  src/user/Makefile src/user_examples/Makefile
+                 src/cuda/Makefile
                  gui/Makefile gui/Base/Makefile gui/Pages/Makefile
                  gui/studymanager_gui/Makefile gui/trackcvg/Makefile
                  salome/fsi_coupling/Makefile

diff --git a/src/Makefile.am b/src/Makefile.am
@@ -27,7 +27,7 @@ SUBDIRS = . bft mei
 if HAVE_BACKEND
 SUBDIRS += \
 fvm gui \
-base cdo pprt alge mesh turb darc \
+base cuda cdo pprt alge mesh turb darc \
 atmo cfbl cogz comb ctwr elec lagr rayt \
 user user_examples apps
 endif

diff --git a/src/alge/Makefile.am b/src/alge/Makefile.am
@@ -34,6 +34,7 @@ endif
 
 AM_CPPFLAGS = \
 -I$(top_srcdir)/src/bft \
+-I$(top_srcdir)/src/cuda \
 -I$(top_srcdir)/src/fvm \
 -I$(top_srcdir)/src/mei \
 -I$(top_srcdir)/src/base \

diff --git a/src/alge/cs_blas.c b/src/alge/cs_blas.c
@@ -38,6 +38,7 @@
  *----------------------------------------------------------------------------*/
 
 #include "cs_base.h"
+#include "cs_cuda.h"
 #include "cs_parall.h"
 
 /*----------------------------------------------------------------------------
@@ -299,6 +300,11 @@ _cs_dot_xx_superblock(cs_lnum_t         n,
 {
   double dot_xx = 0.0;
 
+# ifdef HAVE_CUDA_OFFLOAD
+  if (cs_cuda_dot_product_xx(&dot_xx, x, n))
+    return dot_xx;
+# endif
+
 # pragma omp parallel reduction(+:dot_xx) if (n > CS_THR_MIN)
   {
     cs_lnum_t s_id, e_id;
@@ -360,6 +366,11 @@ _cs_dot_xx_xy_superblock(cs_lnum_t                    n,
                          double                      *xx,
                          double                      *xy)
 {
+# ifdef HAVE_CUDA_OFFLOAD
+  if (cs_cuda_dot_product_xx_xy(xx, xy, x, y,n))
+    return;
+# endif
+
   double dot_xx = 0.0, dot_xy = 0.0;
 
 # pragma omp parallel reduction(+:dot_xx, dot_xy) if (n > CS_THR_MIN)
@@ -433,6 +444,11 @@ _cs_dot_xy_yz_superblock(cs_lnum_t                    n,
                          double                      *xy,
                          double                      *yz)
 {
+# ifdef HAVE_CUDA_OFFLOAD
+  if (cs_cuda_dot_product_xy_yz(xy, yz, x, y, z, n))
+    return;
+# endif
+
   double dot_xy = 0.0, dot_yz = 0.0;
 
 # pragma omp parallel reduction(+:dot_xy, dot_yz) if (n > CS_THR_MIN)