diff --git a/.github/workflows/builds.yml b/.github/workflows/builds.yml index 1008a20f..990ce7d4 100644 --- a/.github/workflows/builds.yml +++ b/.github/workflows/builds.yml @@ -182,3 +182,30 @@ jobs: tests/*.log benchmarks/*.log doc/tutorials/*.log + nvhpc: + runs-on: ubuntu-latest + container: nvcr.io/nvidia/nvhpc:24.7-devel-cuda12.5-ubuntu22.04 + - uses: actions/checkout@v2 + with: + submodules: true + fetch-depth: 0 + - run: sudo apt-get update + - run: sudo apt-get install -y make autoconf automake libtool pkgconf libhwloc-dev + - name: configure + run: | + module load nvhpc + ./autogen.sh + mkdir build + ./configure --prefix=`pwd`/build --with-cuda CUDA_HOME=$NVHPC_ROOT/cuda + - run: make CFLAGS=-std=c99 + - run: make check + - run: make install + - uses: actions/upload-artifact@v2 + if: failure() + with: + name: rocm + path: | + config.log + tests/*.log + benchmarks/*.log + doc/tutorials/*.log diff --git a/benchmarks/blas/l1_kernel.c b/benchmarks/blas/l1_kernel.c index 94033df9..40f7d9d2 100644 --- a/benchmarks/blas/l1_kernel.c +++ b/benchmarks/blas/l1_kernel.c @@ -56,15 +56,18 @@ double ddot(size_t n, double *a, double *b, double *c, double scalar) (void)*c; (void)scalar; size_t i; - long double dot = 0.0; + /* should be a long double for overflow checks, but some compilers (nvc) + * don't support reduce on long double in 2024. + */ + double dot = 0.0; #pragma omp parallel for reduction(+ : dot) for (i = 0; i < n; i++) { - long double temp; + double temp; temp = a[i] * b[i]; dot += temp; } - return (double)dot; + return dot; } double dnrm2(size_t n, double *a, double *b, double *c, double scalar) diff --git a/excit b/excit index be4b5927..ac9d103d 160000 --- a/excit +++ b/excit @@ -1 +1 @@ -Subproject commit be4b5927f08752bd70f797a9adbe155ad171009d +Subproject commit ac9d103d52895eaa63ffe65b485455890c59d50a diff --git a/src/Makefile.am b/src/Makefile.am index 8234bdb2..44994bad 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -90,11 +90,10 @@ endif # Cuda sources if HAVE_CUDA -libcuda_la_SOURCES=area/cuda.c dma/cuda.c -noinst_LTLIBRARIES+=libcuda.la -libcuda_la_CPPFLAGS=$(AM_CPPFLAGS) $(CUDA_CFLAGS) -libcuda_la_LDFLAGS=$(AM_LDFLAGS) $(CUDA_LIBS) -libaml_la_LIBADD=libcuda.la +AM_CPPFLAGS += $(CUDA_CFLAGS) +AM_LDFLAGS += $(CUDA_LIBS) +libaml_la_SOURCES+=area/cuda.c +libaml_la_SOURCES+=dma/cuda.c endif #############################################