mne-tools · scottrbrtsn · Dec 3, 2024 · Dec 3, 2024 · Dec 3, 2024 · Dec 3, 2024
@@ -0,0 +1 @@
+Short description of the changes, by :newcontrib:`Scott Robertson`.
diff --git a/mne/cuda.py b/mne/cuda.py
@@ -2,6 +2,7 @@
 # License: BSD-3-Clause
 # Copyright the MNE-Python contributors.
 
+
 import numpy as np
 from scipy.fft import irfft, rfft
 
@@ -19,6 +20,40 @@
 _cuda_capable = False
 
 
+def _share_cuda_mem(x):
+    """Get shared memory space to avoid copying from cpu to gpu when possible.
+
+    Allocate a mapped ndarray with a buffer that is pinned and mapped on
+    to the device. Similar to np.empty()
+
+    Requires
+    --------
+    numba
+
+
+    Parameters
+    ----------
+    x : 1-d array
+
+    Returns
+    -------
+    a mapped array: np.ndarray
+        An array to be passed into cupy.asarray, which does not copy if
+        shared memory is already allocated. If cuda and numba are not
+        available, return the original array.
+    """
+    from mne.fixes import has_numba
+
+    if _cuda_capable and has_numba:
+        from numba import cuda
+
+        out = cuda.mapped_array(x.shape)
+        out[:] = x
+    else:
+        out = x
+    return out
+
+
 def get_cuda_memory(kind="available"):
     """Get the amount of free memory for CUDA operations.
 
@@ -176,12 +211,13 @@ def _setup_cuda_fft_multiply_repeated(n_jobs, h, n_fft, kind="FFT FIR filtering"
 
             try:
                 # do the IFFT normalization now so we don't have to later
-                h_fft = cupy.array(cuda_dict["h_fft"])
+                h_fft = cupy.asarray(_share_cuda_mem(cuda_dict["h_fft"]))
                 logger.info(f"Using CUDA for {kind}")
             except Exception as exp:
                 logger.info(
                     "CUDA not used, could not instantiate memory (arrays may be too "
                     f'large: "{exp}"), falling back to n_jobs=None'
+                    f", {_explain_exception()}"
                 )
             cuda_dict.update(h_fft=h_fft, rfft=_cuda_upload_rfft, irfft=_cuda_irfft_get)
         else:
@@ -275,14 +311,16 @@ def _setup_cuda_fft_resample(n_jobs, W, new_len):
             try:
                 import cupy
 
+                W = _share_cuda_mem(W)
+
                 # do the IFFT normalization now so we don't have to later
-                W = cupy.array(W)
+                W = cupy.asarray(W)
                 logger.info("Using CUDA for FFT resampling")
             except Exception:
                 logger.info(
                     "CUDA not used, could not instantiate memory "
                     "(arrays may be too large), falling back to "
-                    "n_jobs=None"
+                    f"n_jobs=None, {_explain_exception()}"
                 )
             else:
                 cuda_dict.update(
@@ -301,7 +339,9 @@ def _cuda_upload_rfft(x, n, axis=-1):
     """Upload and compute rfft."""
     import cupy
 
-    return cupy.fft.rfft(cupy.array(x), n=n, axis=axis)
+    x = _share_cuda_mem(x)
+
+    return cupy.fft.rfft(cupy.asarray(x), n=n, axis=axis)
 
 
 def _cuda_irfft_get(x, n, axis=-1):
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Short description of the changes, by :newcontrib:`Scott Robertson`.
Copy link Member larsoner Dec 4, 2024 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. Just adding a comment so we don't forget to actually update this 🙂 Copy link Member larsoner Dec 4, 2024 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. ... and adding your name to `doc/changes/names.inc` will fix the CircleCI error: `[towncrier-fragments]:89: ERROR: Indirect hyperlink target "new contributor Scott Robertson" refers to target "scott robertson", which does not exist. [docutils]` scottrbrtsn reacted with thumbs up emoji