diff --git a/.github/workflows/builds.yml b/.github/workflows/builds.yml
index 1008a20f..990ce7d4 100644
--- a/.github/workflows/builds.yml
+++ b/.github/workflows/builds.yml
@@ -182,3 +182,30 @@ jobs:
             tests/*.log
             benchmarks/*.log
             doc/tutorials/*.log
+  nvhpc:
+    runs-on: ubuntu-latest
+    container: nvcr.io/nvidia/nvhpc:24.7-devel-cuda12.5-ubuntu22.04
+      - uses: actions/checkout@v2
+        with:
+          submodules: true
+          fetch-depth: 0
+      - run: sudo apt-get update
+      - run: sudo apt-get install -y make autoconf automake libtool pkgconf libhwloc-dev
+      - name: configure
+        run: |
+          module load nvhpc
+          ./autogen.sh
+          mkdir build
+          ./configure --prefix=`pwd`/build --with-cuda CUDA_HOME=$NVHPC_ROOT/cuda
+      - run: make CFLAGS=-std=c99
+      - run: make check
+      - run: make install
+      - uses: actions/upload-artifact@v2
+        if: failure()
+        with:
+          name: rocm
+          path: |
+            config.log
+            tests/*.log
+            benchmarks/*.log
+            doc/tutorials/*.log
diff --git a/benchmarks/blas/l1_kernel.c b/benchmarks/blas/l1_kernel.c
index 94033df9..40f7d9d2 100644
--- a/benchmarks/blas/l1_kernel.c
+++ b/benchmarks/blas/l1_kernel.c
@@ -56,15 +56,18 @@ double ddot(size_t n, double *a, double *b, double *c, double scalar)
 	(void)*c;
 	(void)scalar;
 	size_t i;
-	long double dot = 0.0;
+	/* should be a long double for overflow checks, but some compilers (nvc)
+	 * don't support reduce on long double in 2024.
+	 */
+	double dot = 0.0;
 
 #pragma omp parallel for reduction(+ : dot)
 	for (i = 0; i < n; i++) {
-		long double temp;
+		double temp;
 		temp = a[i] * b[i];
 		dot += temp;
 	}
-	return (double)dot;
+	return dot;
 }
 
 double dnrm2(size_t n, double *a, double *b, double *c, double scalar)
diff --git a/excit b/excit
index be4b5927..ac9d103d 160000
--- a/excit
+++ b/excit
@@ -1 +1 @@
-Subproject commit be4b5927f08752bd70f797a9adbe155ad171009d
+Subproject commit ac9d103d52895eaa63ffe65b485455890c59d50a
diff --git a/src/Makefile.am b/src/Makefile.am
index 8234bdb2..44994bad 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -90,11 +90,10 @@ endif
 # Cuda sources
 
 if HAVE_CUDA
-libcuda_la_SOURCES=area/cuda.c dma/cuda.c
-noinst_LTLIBRARIES+=libcuda.la
-libcuda_la_CPPFLAGS=$(AM_CPPFLAGS) $(CUDA_CFLAGS)
-libcuda_la_LDFLAGS=$(AM_LDFLAGS) $(CUDA_LIBS)
-libaml_la_LIBADD=libcuda.la
+AM_CPPFLAGS += $(CUDA_CFLAGS)
+AM_LDFLAGS += $(CUDA_LIBS)
+libaml_la_SOURCES+=area/cuda.c
+libaml_la_SOURCES+=dma/cuda.c
 endif
 
 #############################################