diff --git a/gpu/src/pm_cuda.cpp b/gpu/src/pm_cuda.cpp
index feb2f51b..f612e8bb 100644
--- a/gpu/src/pm_cuda.cpp
+++ b/gpu/src/pm_cuda.cpp
@@ -284,6 +284,20 @@ void PM::dev_check_pointer(int rnk, const char * name, void * ptr)
 #endif
 }
 
+void PM::dev_barrier()
+{
+#ifdef _DEBUG_PM
+  printf("Inside PM::dev_barrier()\n");
+#endif
+  
+  cudaDeviceSynchronize();
+  _CUDA_CHECK_ERRORS();
+  
+#ifdef _DEBUG_PM
+  printf(" -- Leaving PM::dev_barrier()\n");
+#endif
+}
+
 void PM::dev_stream_create(cudaStream_t & s)
 {
 #ifdef _DEBUG_PM
diff --git a/gpu/src/pm_cuda.h b/gpu/src/pm_cuda.h
index d7aec984..acc7e3c9 100644
--- a/gpu/src/pm_cuda.h
+++ b/gpu/src/pm_cuda.h
@@ -64,6 +64,8 @@ namespace PM_NS {
     void dev_pull(void*, void*, size_t);
     void dev_copy(void*, void*, size_t);
 
+    void dev_barrier();
+    
     // specific to cuda
     
     int dev_push_async(void * d_ptr, void * h_ptr, size_t N, cudaStream_t &s);