From c6cd611fa99ac9c3403deffb7e811f8a73d9fd97 Mon Sep 17 00:00:00 2001
From: Scott Robertson <7535147+scottrbrtsn@users.noreply.github.com>
Date: Tue, 3 Dec 2024 08:31:52 -0600
Subject: [PATCH 01/28] update mne/cuda to call cupy.asarray when possible

---
 mne/cuda.py | 46 +++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 43 insertions(+), 3 deletions(-)

diff --git a/mne/cuda.py b/mne/cuda.py
index f44dc653a1e..5b1676e92c1 100644
--- a/mne/cuda.py
+++ b/mne/cuda.py
@@ -5,6 +5,7 @@
 import numpy as np
 from scipy.fft import irfft, rfft
 
+
 from .utils import (
     _check_option,
     _explain_exception,
@@ -18,6 +19,45 @@
 
 _cuda_capable = False
 
+def get_shared_mem(
+    shape,
+    dtype=np.float64,
+    strides=None,
+    order="C",
+    stream=0,
+    portable=False,
+    wc=True,
+):
+    """Get shared memory space to avoid copying from cpu to gpu when possible.
+
+    Allocate a mapped ndarray with a buffer that is pinned and mapped on
+    to the device. Similar to np.empty()
+
+    Parameters
+    ----------
+    portable: bool
+        a boolean flag to allow the allocated device memory to be
+              usable in multiple devices.
+    wc: bool
+        a boolean flag to enable writecombined allocation which is faster
+        to write by the host and to read by the device, but slower to
+        write by the host and slower to write by the device.
+
+    Returns
+    -------
+    a mapped array: np.ndarray 
+        An array to be passed into cupy.asarray, which does not copy if shared memory is already allocated.
+    """
+    from numba import cuda
+    return cuda.mapped_array(
+        shape,
+        dtype=dtype,
+        strides=strides,
+        order=order,
+        stream=stream,
+        portable=portable,
+        wc=wc,
+    )
 
 def get_cuda_memory(kind="available"):
     """Get the amount of free memory for CUDA operations.
@@ -176,7 +216,7 @@ def _setup_cuda_fft_multiply_repeated(n_jobs, h, n_fft, kind="FFT FIR filtering"
 
             try:
                 # do the IFFT normalization now so we don't have to later
-                h_fft = cupy.array(cuda_dict["h_fft"])
+                h_fft = cupy.asarray(cuda_dict["h_fft"])
                 logger.info(f"Using CUDA for {kind}")
             except Exception as exp:
                 logger.info(
@@ -276,7 +316,7 @@ def _setup_cuda_fft_resample(n_jobs, W, new_len):
                 import cupy
 
                 # do the IFFT normalization now so we don't have to later
-                W = cupy.array(W)
+                W = cupy.asarray(W)
                 logger.info("Using CUDA for FFT resampling")
             except Exception:
                 logger.info(
@@ -301,7 +341,7 @@ def _cuda_upload_rfft(x, n, axis=-1):
     """Upload and compute rfft."""
     import cupy
 
-    return cupy.fft.rfft(cupy.array(x), n=n, axis=axis)
+    return cupy.fft.rfft(cupy.asarray(x), n=n, axis=axis)
 
 
 def _cuda_irfft_get(x, n, axis=-1):

From 02edb9c7483f051823b26fbb641336a42ca1069c Mon Sep 17 00:00:00 2001
From: Scott Robertson <7535147+scottrbrtsn@users.noreply.github.com>
Date: Tue, 3 Dec 2024 08:32:09 -0600
Subject: [PATCH 02/28] update tests as poc, they pass

---
 mne/tests/test_filter.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/mne/tests/test_filter.py b/mne/tests/test_filter.py
index e259ececbce..52e555a775e 100644
--- a/mne/tests/test_filter.py
+++ b/mne/tests/test_filter.py
@@ -16,6 +16,7 @@
 from scipy.signal import resample as sp_resample
 
 from mne import Epochs, create_info
+from mne.cuda import get_shared_mem
 from mne._fiff.pick import _DATA_CH_TYPES_SPLIT
 from mne.filter import (
     _length_factors,
@@ -408,6 +409,10 @@ def test_resample_scipy():
             err_msg = f"{N}: {window}"
             x_2_sp = sp_resample(x, 2 * N, window=window)
             for n_jobs in n_jobs_test:
+                if n_jobs == "cuda":
+                    tmp = x
+                    x = get_shared_mem(x.shape)
+                    x[:] = tmp
                 x_2 = resample(x, 2, 1, npad=0, window=window, n_jobs=n_jobs)
                 assert_allclose(x_2, x_2_sp, atol=1e-12, err_msg=err_msg)
             new_len = int(round(len(x) * (1.0 / 2.0)))
@@ -421,6 +426,12 @@ def test_resample_scipy():
 def test_n_jobs(n_jobs):
     """Test resampling against SciPy."""
     x = np.random.RandomState(0).randn(4, 100)
+
+    if n_jobs == "cuda":
+        tmp = x
+        x = get_shared_mem(x.shape)
+        x[:] = tmp
+
     y1 = resample(x, 2, 1, n_jobs=None)
     y2 = resample(x, 2, 1, n_jobs=n_jobs)
     assert_allclose(y1, y2)
@@ -846,6 +857,8 @@ def test_cuda_resampling():
             a = rng.randn(2, N)
             for fro, to in ((1, 2), (2, 1), (1, 3), (3, 1)):
                 a1 = resample(a, fro, to, n_jobs=None, npad="auto", window=window)
+                x = get_shared_mem(a.shape)
+                x[:] = a 
                 a2 = resample(a, fro, to, n_jobs="cuda", npad="auto", window=window)
                 assert_allclose(a1, a2, rtol=1e-7, atol=1e-14)
     assert_array_almost_equal(a1, a2, 14)

From 5662420fd702e313c66963ef61f599b84006eecf Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 3 Dec 2024 14:58:59 +0000
Subject: [PATCH 03/28] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 mne/cuda.py              | 6 ++++--
 mne/tests/test_filter.py | 4 ++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/mne/cuda.py b/mne/cuda.py
index 5b1676e92c1..e727c8b3a47 100644
--- a/mne/cuda.py
+++ b/mne/cuda.py
@@ -5,7 +5,6 @@
 import numpy as np
 from scipy.fft import irfft, rfft
 
-
 from .utils import (
     _check_option,
     _explain_exception,
@@ -19,6 +18,7 @@
 
 _cuda_capable = False
 
+
 def get_shared_mem(
     shape,
     dtype=np.float64,
@@ -45,10 +45,11 @@ def get_shared_mem(
 
     Returns
     -------
-    a mapped array: np.ndarray 
+    a mapped array: np.ndarray
         An array to be passed into cupy.asarray, which does not copy if shared memory is already allocated.
     """
     from numba import cuda
+
     return cuda.mapped_array(
         shape,
         dtype=dtype,
@@ -59,6 +60,7 @@ def get_shared_mem(
         wc=wc,
     )
 
+
 def get_cuda_memory(kind="available"):
     """Get the amount of free memory for CUDA operations.
 
diff --git a/mne/tests/test_filter.py b/mne/tests/test_filter.py
index 52e555a775e..0bf4766b818 100644
--- a/mne/tests/test_filter.py
+++ b/mne/tests/test_filter.py
@@ -16,8 +16,8 @@
 from scipy.signal import resample as sp_resample
 
 from mne import Epochs, create_info
-from mne.cuda import get_shared_mem
 from mne._fiff.pick import _DATA_CH_TYPES_SPLIT
+from mne.cuda import get_shared_mem
 from mne.filter import (
     _length_factors,
     _overlap_add_filter,
@@ -858,7 +858,7 @@ def test_cuda_resampling():
             for fro, to in ((1, 2), (2, 1), (1, 3), (3, 1)):
                 a1 = resample(a, fro, to, n_jobs=None, npad="auto", window=window)
                 x = get_shared_mem(a.shape)
-                x[:] = a 
+                x[:] = a
                 a2 = resample(a, fro, to, n_jobs="cuda", npad="auto", window=window)
                 assert_allclose(a1, a2, rtol=1e-7, atol=1e-14)
     assert_array_almost_equal(a1, a2, 14)

From f7a04addc4fd6f8f0295e68eae2e619e43155f75 Mon Sep 17 00:00:00 2001
From: Eric Larson <larson.eric.d@gmail.com>
Date: Tue, 3 Dec 2024 11:10:29 -0500
Subject: [PATCH 04/28] Update mne/cuda.py

---
 mne/cuda.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mne/cuda.py b/mne/cuda.py
index e727c8b3a47..79088d44f2e 100644
--- a/mne/cuda.py
+++ b/mne/cuda.py
@@ -46,7 +46,8 @@ def get_shared_mem(
     Returns
     -------
     a mapped array: np.ndarray
-        An array to be passed into cupy.asarray, which does not copy if shared memory is already allocated.
+        An array to be passed into cupy.asarray, which does not copy if
+        shared memory is already allocated.
     """
     from numba import cuda
 

From fd0463e0afaf670e58b206354b55c3d4c4e50abf Mon Sep 17 00:00:00 2001
From: Scott Robertson <7535147+scottrbrtsn@users.noreply.github.com>
Date: Tue, 3 Dec 2024 11:29:08 -0600
Subject: [PATCH 05/28] string formatting

---
 mne/cuda.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mne/cuda.py b/mne/cuda.py
index 79088d44f2e..bf8f42f3886 100644
--- a/mne/cuda.py
+++ b/mne/cuda.py
@@ -45,8 +45,8 @@ def get_shared_mem(
 
     Returns
     -------
-    a mapped array: np.ndarray
-        An array to be passed into cupy.asarray, which does not copy if
+    a mapped array: np.ndarray 
+        An array to be passed into cupy.asarray, which does not copy if 
         shared memory is already allocated.
     """
     from numba import cuda

From a03014eaf522be0d49d5d161ce202308a6d286c6 Mon Sep 17 00:00:00 2001
From: Scott Robertson <7535147+scottrbrtsn@users.noreply.github.com>
Date: Tue, 3 Dec 2024 11:48:31 -0600
Subject: [PATCH 06/28] add changelog file

---
 doc/changes/devel/13002.other.rst | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 doc/changes/devel/13002.other.rst

diff --git a/doc/changes/devel/13002.other.rst b/doc/changes/devel/13002.other.rst
new file mode 100644
index 00000000000..03e261682be
--- /dev/null
+++ b/doc/changes/devel/13002.other.rst
@@ -0,0 +1 @@
+Short description of the changes, by :newcontrib:`Scott Robertson`.

From 44b8b9acae8f88531517bf9ce3fee71ae3842072 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 3 Dec 2024 17:50:37 +0000
Subject: [PATCH 07/28] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 mne/cuda.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mne/cuda.py b/mne/cuda.py
index bf8f42f3886..79088d44f2e 100644
--- a/mne/cuda.py
+++ b/mne/cuda.py
@@ -45,8 +45,8 @@ def get_shared_mem(
 
     Returns
     -------
-    a mapped array: np.ndarray 
-        An array to be passed into cupy.asarray, which does not copy if 
+    a mapped array: np.ndarray
+        An array to be passed into cupy.asarray, which does not copy if
         shared memory is already allocated.
     """
     from numba import cuda

From 96f2a0396d9971aa7758cc5106ed3e250f21d4f8 Mon Sep 17 00:00:00 2001
From: Scott Robertson <7535147+scottrbrtsn@users.noreply.github.com>
Date: Wed, 4 Dec 2024 06:23:45 -0600
Subject: [PATCH 08/28] only get shared mem if cuda capable

---
 mne/tests/test_filter.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/mne/tests/test_filter.py b/mne/tests/test_filter.py
index 0bf4766b818..0c6b73e3adc 100644
--- a/mne/tests/test_filter.py
+++ b/mne/tests/test_filter.py
@@ -402,6 +402,7 @@ def test_resample(method):
 
 def test_resample_scipy():
     """Test resampling against SciPy."""
+    from mne.cuda import _cuda_capable  # allow above funs to set it
     n_jobs_test = (1, "cuda")
     for window in ("boxcar", "hann"):
         for N in (100, 101, 102, 103):
@@ -409,7 +410,7 @@ def test_resample_scipy():
             err_msg = f"{N}: {window}"
             x_2_sp = sp_resample(x, 2 * N, window=window)
             for n_jobs in n_jobs_test:
-                if n_jobs == "cuda":
+                if n_jobs == "cuda" and _cuda_capable:
                     tmp = x
                     x = get_shared_mem(x.shape)
                     x[:] = tmp
@@ -425,9 +426,10 @@ def test_resample_scipy():
 @pytest.mark.parametrize("n_jobs", (2, "cuda"))
 def test_n_jobs(n_jobs):
     """Test resampling against SciPy."""
+    from mne.cuda import _cuda_capable  # allow above funs to set it
     x = np.random.RandomState(0).randn(4, 100)
 
-    if n_jobs == "cuda":
+    if n_jobs == "cuda" and _cuda_capable:
         tmp = x
         x = get_shared_mem(x.shape)
         x[:] = tmp
@@ -851,15 +853,19 @@ def test_cuda_fir():
 
 def test_cuda_resampling():
     """Test CUDA resampling."""
+    from mne.cuda import _cuda_capable  # allow above funs to set it
     rng = np.random.RandomState(0)
     for window in ("boxcar", "triang"):
         for N in (997, 1000):  # one prime, one even
             a = rng.randn(2, N)
             for fro, to in ((1, 2), (2, 1), (1, 3), (3, 1)):
                 a1 = resample(a, fro, to, n_jobs=None, npad="auto", window=window)
-                x = get_shared_mem(a.shape)
-                x[:] = a
-                a2 = resample(a, fro, to, n_jobs="cuda", npad="auto", window=window)
+                if _cuda_capable:
+                    x = get_shared_mem(a.shape)
+                    x[:] = a
+                    a2 = resample(x, fro, to, n_jobs="cuda", npad="auto", window=window)
+                else:
+                    a2 = resample(a, fro, to, n_jobs="cuda", npad="auto", window=window)
                 assert_allclose(a1, a2, rtol=1e-7, atol=1e-14)
     assert_array_almost_equal(a1, a2, 14)
     assert_array_equal(resample(np.zeros(2), 2, 1, n_jobs="cuda"), np.zeros(4))

From caec586b1eb823b7cd4f520957f377382d84d630 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 4 Dec 2024 12:24:49 +0000
Subject: [PATCH 09/28] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 mne/tests/test_filter.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/mne/tests/test_filter.py b/mne/tests/test_filter.py
index 0c6b73e3adc..37eef9d605e 100644
--- a/mne/tests/test_filter.py
+++ b/mne/tests/test_filter.py
@@ -403,6 +403,7 @@ def test_resample(method):
 def test_resample_scipy():
     """Test resampling against SciPy."""
     from mne.cuda import _cuda_capable  # allow above funs to set it
+
     n_jobs_test = (1, "cuda")
     for window in ("boxcar", "hann"):
         for N in (100, 101, 102, 103):
@@ -427,6 +428,7 @@ def test_resample_scipy():
 def test_n_jobs(n_jobs):
     """Test resampling against SciPy."""
     from mne.cuda import _cuda_capable  # allow above funs to set it
+
     x = np.random.RandomState(0).randn(4, 100)
 
     if n_jobs == "cuda" and _cuda_capable:
@@ -854,6 +856,7 @@ def test_cuda_fir():
 def test_cuda_resampling():
     """Test CUDA resampling."""
     from mne.cuda import _cuda_capable  # allow above funs to set it
+
     rng = np.random.RandomState(0)
     for window in ("boxcar", "triang"):
         for N in (997, 1000):  # one prime, one even

From de298fed9bcdd46b4754dcb32aeff69df6c3950e Mon Sep 17 00:00:00 2001
From: Scott Robertson <7535147+scottrbrtsn@users.noreply.github.com>
Date: Wed, 4 Dec 2024 07:06:10 -0600
Subject: [PATCH 10/28] revert numba_capable and use the bool in fixes

---
 mne/cuda.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/mne/cuda.py b/mne/cuda.py
index 79088d44f2e..cb0e77e677b 100644
--- a/mne/cuda.py
+++ b/mne/cuda.py
@@ -3,6 +3,7 @@
 # Copyright the MNE-Python contributors.
 
 import numpy as np
+import os
 from scipy.fft import irfft, rfft
 
 from .utils import (
@@ -18,7 +19,6 @@
 
 _cuda_capable = False
 
-
 def get_shared_mem(
     shape,
     dtype=np.float64,
@@ -33,6 +33,10 @@ def get_shared_mem(
     Allocate a mapped ndarray with a buffer that is pinned and mapped on
     to the device. Similar to np.empty()
 
+    It is recommended to gate this function with 
+        os.getenv("MNE_USE_NUMBA").lower() == "true"
+    to avoid import errors. 
+
     Parameters
     ----------
     portable: bool

From ce2ff377a5c0d6a5b115179d2f53e9182d36d96c Mon Sep 17 00:00:00 2001
From: Scott Robertson <7535147+scottrbrtsn@users.noreply.github.com>
Date: Wed, 4 Dec 2024 07:06:53 -0600
Subject: [PATCH 11/28] gate get_shared_mem with fixes/has_numba bool

---
 mne/tests/test_filter.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/mne/tests/test_filter.py b/mne/tests/test_filter.py
index 37eef9d605e..f94cf49cc9b 100644
--- a/mne/tests/test_filter.py
+++ b/mne/tests/test_filter.py
@@ -402,8 +402,8 @@ def test_resample(method):
 
 def test_resample_scipy():
     """Test resampling against SciPy."""
-    from mne.cuda import _cuda_capable  # allow above funs to set it
-
+    from mne.cuda import _cuda_capable# allow cuda.init_cuda() to set it
+    from mne.fixes import has_numba
     n_jobs_test = (1, "cuda")
     for window in ("boxcar", "hann"):
         for N in (100, 101, 102, 103):
@@ -411,7 +411,7 @@ def test_resample_scipy():
             err_msg = f"{N}: {window}"
             x_2_sp = sp_resample(x, 2 * N, window=window)
             for n_jobs in n_jobs_test:
-                if n_jobs == "cuda" and _cuda_capable:
+                if n_jobs == "cuda" and _cuda_capable and has_numba:
                     tmp = x
                     x = get_shared_mem(x.shape)
                     x[:] = tmp
@@ -427,11 +427,11 @@ def test_resample_scipy():
 @pytest.mark.parametrize("n_jobs", (2, "cuda"))
 def test_n_jobs(n_jobs):
     """Test resampling against SciPy."""
-    from mne.cuda import _cuda_capable  # allow above funs to set it
-
+    from mne.cuda import _cuda_capable# allow cuda.init_cuda() to set it
+    from mne.fixes import has_numba
     x = np.random.RandomState(0).randn(4, 100)
 
-    if n_jobs == "cuda" and _cuda_capable:
+    if n_jobs == "cuda" and _cuda_capable and has_numba:
         tmp = x
         x = get_shared_mem(x.shape)
         x[:] = tmp
@@ -855,15 +855,15 @@ def test_cuda_fir():
 
 def test_cuda_resampling():
     """Test CUDA resampling."""
-    from mne.cuda import _cuda_capable  # allow above funs to set it
-
+    from mne.cuda import _cuda_capable# allow cuda.init_cuda() to set it
+    from mne.fixes import has_numba
     rng = np.random.RandomState(0)
     for window in ("boxcar", "triang"):
         for N in (997, 1000):  # one prime, one even
             a = rng.randn(2, N)
             for fro, to in ((1, 2), (2, 1), (1, 3), (3, 1)):
                 a1 = resample(a, fro, to, n_jobs=None, npad="auto", window=window)
-                if _cuda_capable:
+                if _cuda_capable and has_numba:
                     x = get_shared_mem(a.shape)
                     x[:] = a
                     a2 = resample(x, fro, to, n_jobs="cuda", npad="auto", window=window)

From 75c4dab1527a2d082eb715e044f7fe870e575705 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 4 Dec 2024 13:09:03 +0000
Subject: [PATCH 12/28] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 mne/cuda.py              | 7 ++++---
 mne/tests/test_filter.py | 9 ++++++---
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/mne/cuda.py b/mne/cuda.py
index cb0e77e677b..42f22203c9e 100644
--- a/mne/cuda.py
+++ b/mne/cuda.py
@@ -2,8 +2,8 @@
 # License: BSD-3-Clause
 # Copyright the MNE-Python contributors.
 
+
 import numpy as np
-import os
 from scipy.fft import irfft, rfft
 
 from .utils import (
@@ -19,6 +19,7 @@
 
 _cuda_capable = False
 
+
 def get_shared_mem(
     shape,
     dtype=np.float64,
@@ -33,9 +34,9 @@ def get_shared_mem(
     Allocate a mapped ndarray with a buffer that is pinned and mapped on
     to the device. Similar to np.empty()
 
-    It is recommended to gate this function with 
+    It is recommended to gate this function with
         os.getenv("MNE_USE_NUMBA").lower() == "true"
-    to avoid import errors. 
+    to avoid import errors.
 
     Parameters
     ----------
diff --git a/mne/tests/test_filter.py b/mne/tests/test_filter.py
index f94cf49cc9b..45c405d3df9 100644
--- a/mne/tests/test_filter.py
+++ b/mne/tests/test_filter.py
@@ -402,8 +402,9 @@ def test_resample(method):
 
 def test_resample_scipy():
     """Test resampling against SciPy."""
-    from mne.cuda import _cuda_capable# allow cuda.init_cuda() to set it
+    from mne.cuda import _cuda_capable  # allow cuda.init_cuda() to set it
     from mne.fixes import has_numba
+
     n_jobs_test = (1, "cuda")
     for window in ("boxcar", "hann"):
         for N in (100, 101, 102, 103):
@@ -427,8 +428,9 @@ def test_resample_scipy():
 @pytest.mark.parametrize("n_jobs", (2, "cuda"))
 def test_n_jobs(n_jobs):
     """Test resampling against SciPy."""
-    from mne.cuda import _cuda_capable# allow cuda.init_cuda() to set it
+    from mne.cuda import _cuda_capable  # allow cuda.init_cuda() to set it
     from mne.fixes import has_numba
+
     x = np.random.RandomState(0).randn(4, 100)
 
     if n_jobs == "cuda" and _cuda_capable and has_numba:
@@ -855,8 +857,9 @@ def test_cuda_fir():
 
 def test_cuda_resampling():
     """Test CUDA resampling."""
-    from mne.cuda import _cuda_capable# allow cuda.init_cuda() to set it
+    from mne.cuda import _cuda_capable  # allow cuda.init_cuda() to set it
     from mne.fixes import has_numba
+
     rng = np.random.RandomState(0)
     for window in ("boxcar", "triang"):
         for N in (997, 1000):  # one prime, one even

From 929fcb951579e5a3d05ea9e5e3750ad394500a18 Mon Sep 17 00:00:00 2001
From: Scott Robertson <7535147+scottrbrtsn@users.noreply.github.com>
Date: Wed, 4 Dec 2024 13:13:37 -0600
Subject: [PATCH 13/28] Update mne/cuda.py to soft import numba

Co-authored-by: Eric Larson <larson.eric.d@gmail.com>
---
 mne/cuda.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mne/cuda.py b/mne/cuda.py
index 42f22203c9e..1d47ebb4559 100644
--- a/mne/cuda.py
+++ b/mne/cuda.py
@@ -54,6 +54,7 @@ def get_shared_mem(
         An array to be passed into cupy.asarray, which does not copy if
         shared memory is already allocated.
     """
+    _soft_import("numba", "using shared memory")
     from numba import cuda
 
     return cuda.mapped_array(

From 214edf151c79ad23fc6c1184ca303083a67d3625 Mon Sep 17 00:00:00 2001
From: Scott Robertson <7535147+scottrbrtsn@users.noreply.github.com>
Date: Wed, 4 Dec 2024 14:10:16 -0600
Subject: [PATCH 14/28] revert tests, make shared_mem fun private and
 simplified, call appropriately within the cuda module.

---
 mne/cuda.py              | 43 +++++++++++++++++++---------------------
 mne/tests/test_filter.py | 26 +-----------------------
 2 files changed, 21 insertions(+), 48 deletions(-)

diff --git a/mne/cuda.py b/mne/cuda.py
index 1d47ebb4559..37ae9306d3d 100644
--- a/mne/cuda.py
+++ b/mne/cuda.py
@@ -19,24 +19,18 @@
 
 _cuda_capable = False
 
-
-def get_shared_mem(
-    shape,
-    dtype=np.float64,
-    strides=None,
-    order="C",
-    stream=0,
-    portable=False,
-    wc=True,
+def _share_cuda_mem(
+    x, n_jobs
 ):
     """Get shared memory space to avoid copying from cpu to gpu when possible.
 
     Allocate a mapped ndarray with a buffer that is pinned and mapped on
     to the device. Similar to np.empty()
 
-    It is recommended to gate this function with
-        os.getenv("MNE_USE_NUMBA").lower() == "true"
-    to avoid import errors.
+    Requires
+    --------
+    numba
+
 
     Parameters
     ----------
@@ -54,18 +48,16 @@ def get_shared_mem(
         An array to be passed into cupy.asarray, which does not copy if
         shared memory is already allocated.
     """
-    _soft_import("numba", "using shared memory")
     from numba import cuda
+    from mne.fixes import has_numba
 
-    return cuda.mapped_array(
-        shape,
-        dtype=dtype,
-        strides=strides,
-        order=order,
-        stream=stream,
-        portable=portable,
-        wc=wc,
-    )
+    if n_jobs == "cuda" and _cuda_capable and has_numba:
+        from numba import cuda
+        out = cuda.mapped_array(x.shape, ...)
+        out[:] = x
+    else:
+        out = x
+    return out
 
 
 def get_cuda_memory(kind="available"):
@@ -225,7 +217,8 @@ def _setup_cuda_fft_multiply_repeated(n_jobs, h, n_fft, kind="FFT FIR filtering"
 
             try:
                 # do the IFFT normalization now so we don't have to later
-                h_fft = cupy.asarray(cuda_dict["h_fft"])
+                h_fft = _share_cuda_mem(cuda_dict["h_fft"], n_jobs)
+                h_fft = cupy.asarray(h_fft)
                 logger.info(f"Using CUDA for {kind}")
             except Exception as exp:
                 logger.info(
@@ -324,6 +317,8 @@ def _setup_cuda_fft_resample(n_jobs, W, new_len):
             try:
                 import cupy
 
+                W = _share_cuda_mem(W, n_jobs)
+
                 # do the IFFT normalization now so we don't have to later
                 W = cupy.asarray(W)
                 logger.info("Using CUDA for FFT resampling")
@@ -349,6 +344,7 @@ def _setup_cuda_fft_resample(n_jobs, W, new_len):
 def _cuda_upload_rfft(x, n, axis=-1):
     """Upload and compute rfft."""
     import cupy
+    x = _share_cuda_mem(x, "cuda")
 
     return cupy.fft.rfft(cupy.asarray(x), n=n, axis=axis)
 
@@ -356,6 +352,7 @@ def _cuda_upload_rfft(x, n, axis=-1):
 def _cuda_irfft_get(x, n, axis=-1):
     """Compute irfft and get."""
     import cupy
+    x = _share_cuda_mem(x, "cuda")
 
     return cupy.fft.irfft(x, n=n, axis=axis).get()
 
diff --git a/mne/tests/test_filter.py b/mne/tests/test_filter.py
index 45c405d3df9..47f5487397a 100644
--- a/mne/tests/test_filter.py
+++ b/mne/tests/test_filter.py
@@ -17,7 +17,6 @@
 
 from mne import Epochs, create_info
 from mne._fiff.pick import _DATA_CH_TYPES_SPLIT
-from mne.cuda import get_shared_mem
 from mne.filter import (
     _length_factors,
     _overlap_add_filter,
@@ -402,9 +401,6 @@ def test_resample(method):
 
 def test_resample_scipy():
     """Test resampling against SciPy."""
-    from mne.cuda import _cuda_capable  # allow cuda.init_cuda() to set it
-    from mne.fixes import has_numba
-
     n_jobs_test = (1, "cuda")
     for window in ("boxcar", "hann"):
         for N in (100, 101, 102, 103):
@@ -412,10 +408,6 @@ def test_resample_scipy():
             err_msg = f"{N}: {window}"
             x_2_sp = sp_resample(x, 2 * N, window=window)
             for n_jobs in n_jobs_test:
-                if n_jobs == "cuda" and _cuda_capable and has_numba:
-                    tmp = x
-                    x = get_shared_mem(x.shape)
-                    x[:] = tmp
                 x_2 = resample(x, 2, 1, npad=0, window=window, n_jobs=n_jobs)
                 assert_allclose(x_2, x_2_sp, atol=1e-12, err_msg=err_msg)
             new_len = int(round(len(x) * (1.0 / 2.0)))
@@ -428,16 +420,8 @@ def test_resample_scipy():
 @pytest.mark.parametrize("n_jobs", (2, "cuda"))
 def test_n_jobs(n_jobs):
     """Test resampling against SciPy."""
-    from mne.cuda import _cuda_capable  # allow cuda.init_cuda() to set it
-    from mne.fixes import has_numba
-
     x = np.random.RandomState(0).randn(4, 100)
 
-    if n_jobs == "cuda" and _cuda_capable and has_numba:
-        tmp = x
-        x = get_shared_mem(x.shape)
-        x[:] = tmp
-
     y1 = resample(x, 2, 1, n_jobs=None)
     y2 = resample(x, 2, 1, n_jobs=n_jobs)
     assert_allclose(y1, y2)
@@ -857,21 +841,13 @@ def test_cuda_fir():
 
 def test_cuda_resampling():
     """Test CUDA resampling."""
-    from mne.cuda import _cuda_capable  # allow cuda.init_cuda() to set it
-    from mne.fixes import has_numba
-
     rng = np.random.RandomState(0)
     for window in ("boxcar", "triang"):
         for N in (997, 1000):  # one prime, one even
             a = rng.randn(2, N)
             for fro, to in ((1, 2), (2, 1), (1, 3), (3, 1)):
                 a1 = resample(a, fro, to, n_jobs=None, npad="auto", window=window)
-                if _cuda_capable and has_numba:
-                    x = get_shared_mem(a.shape)
-                    x[:] = a
-                    a2 = resample(x, fro, to, n_jobs="cuda", npad="auto", window=window)
-                else:
-                    a2 = resample(a, fro, to, n_jobs="cuda", npad="auto", window=window)
+                a2 = resample(a, fro, to, n_jobs="cuda", npad="auto", window=window)
                 assert_allclose(a1, a2, rtol=1e-7, atol=1e-14)
     assert_array_almost_equal(a1, a2, 14)
     assert_array_equal(resample(np.zeros(2), 2, 1, n_jobs="cuda"), np.zeros(4))

From cfd6fb3a6e621917c996a2b0fc07b6209bd5b824 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 4 Dec 2024 20:15:36 +0000
Subject: [PATCH 15/28] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 mne/cuda.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/mne/cuda.py b/mne/cuda.py
index 37ae9306d3d..3048c7766fa 100644
--- a/mne/cuda.py
+++ b/mne/cuda.py
@@ -19,9 +19,8 @@
 
 _cuda_capable = False
 
-def _share_cuda_mem(
-    x, n_jobs
-):
+
+def _share_cuda_mem(x, n_jobs):
     """Get shared memory space to avoid copying from cpu to gpu when possible.
 
     Allocate a mapped ndarray with a buffer that is pinned and mapped on
@@ -49,10 +48,12 @@ def _share_cuda_mem(
         shared memory is already allocated.
     """
     from numba import cuda
+
     from mne.fixes import has_numba
 
     if n_jobs == "cuda" and _cuda_capable and has_numba:
         from numba import cuda
+
         out = cuda.mapped_array(x.shape, ...)
         out[:] = x
     else:
@@ -344,6 +345,7 @@ def _setup_cuda_fft_resample(n_jobs, W, new_len):
 def _cuda_upload_rfft(x, n, axis=-1):
     """Upload and compute rfft."""
     import cupy
+
     x = _share_cuda_mem(x, "cuda")
 
     return cupy.fft.rfft(cupy.asarray(x), n=n, axis=axis)
@@ -352,6 +354,7 @@ def _cuda_upload_rfft(x, n, axis=-1):
 def _cuda_irfft_get(x, n, axis=-1):
     """Compute irfft and get."""
     import cupy
+
     x = _share_cuda_mem(x, "cuda")
 
     return cupy.fft.irfft(x, n=n, axis=axis).get()

From cb8fec5fc07714d69429bf673494bda4744825d5 Mon Sep 17 00:00:00 2001
From: Scott Robertson <7535147+scottrbrtsn@users.noreply.github.com>
Date: Wed, 4 Dec 2024 14:29:34 -0600
Subject: [PATCH 16/28] soft import numba

---
 mne/cuda.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mne/cuda.py b/mne/cuda.py
index 3048c7766fa..e123580f147 100644
--- a/mne/cuda.py
+++ b/mne/cuda.py
@@ -9,6 +9,7 @@
 from .utils import (
     _check_option,
     _explain_exception,
+    _soft_import,
     fill_doc,
     get_config,
     logger,
@@ -47,6 +48,7 @@ def _share_cuda_mem(x, n_jobs):
         An array to be passed into cupy.asarray, which does not copy if
         shared memory is already allocated.
     """
+    _soft_import("numba", "using shared memory")
     from numba import cuda
 
     from mne.fixes import has_numba

From 6973b5425ff1d710ccace9c44997df7eb0096114 Mon Sep 17 00:00:00 2001
From: Scott Robertson <7535147+scottrbrtsn@users.noreply.github.com>
Date: Wed, 4 Dec 2024 14:31:10 -0600
Subject: [PATCH 17/28] remove extra line

---
 mne/tests/test_filter.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/mne/tests/test_filter.py b/mne/tests/test_filter.py
index 47f5487397a..e259ececbce 100644
--- a/mne/tests/test_filter.py
+++ b/mne/tests/test_filter.py
@@ -421,7 +421,6 @@ def test_resample_scipy():
 def test_n_jobs(n_jobs):
     """Test resampling against SciPy."""
     x = np.random.RandomState(0).randn(4, 100)
-
     y1 = resample(x, 2, 1, n_jobs=None)
     y2 = resample(x, 2, 1, n_jobs=n_jobs)
     assert_allclose(y1, y2)

From f2950e043449d92bc844f5ebc9ce704b302c64e0 Mon Sep 17 00:00:00 2001
From: Scott Robertson <7535147+scottrbrtsn@users.noreply.github.com>
Date: Wed, 4 Dec 2024 14:37:26 -0600
Subject: [PATCH 18/28] revert soft import.

---
 mne/cuda.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/mne/cuda.py b/mne/cuda.py
index e123580f147..2ef71a8b883 100644
--- a/mne/cuda.py
+++ b/mne/cuda.py
@@ -48,9 +48,6 @@ def _share_cuda_mem(x, n_jobs):
         An array to be passed into cupy.asarray, which does not copy if
         shared memory is already allocated.
     """
-    _soft_import("numba", "using shared memory")
-    from numba import cuda
-
     from mne.fixes import has_numba
 
     if n_jobs == "cuda" and _cuda_capable and has_numba:

From ac5b5c86245354e94f1c0c1326f5ad5a8a5ee077 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 4 Dec 2024 20:37:46 +0000
Subject: [PATCH 19/28] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 mne/cuda.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/mne/cuda.py b/mne/cuda.py
index 2ef71a8b883..f4053d473da 100644
--- a/mne/cuda.py
+++ b/mne/cuda.py
@@ -9,7 +9,6 @@
 from .utils import (
     _check_option,
     _explain_exception,
-    _soft_import,
     fill_doc,
     get_config,
     logger,

From e76b5a72559d34e93abb676b784fa4ac7e4f6210 Mon Sep 17 00:00:00 2001
From: Scott Robertson <7535147+scottrbrtsn@users.noreply.github.com>
Date: Wed, 4 Dec 2024 14:43:31 -0600
Subject: [PATCH 20/28] W needs "cuda" passed in since it just set n_jobs to 1

---
 mne/cuda.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/mne/cuda.py b/mne/cuda.py
index f4053d473da..3256625ad39 100644
--- a/mne/cuda.py
+++ b/mne/cuda.py
@@ -216,8 +216,7 @@ def _setup_cuda_fft_multiply_repeated(n_jobs, h, n_fft, kind="FFT FIR filtering"
 
             try:
                 # do the IFFT normalization now so we don't have to later
-                h_fft = _share_cuda_mem(cuda_dict["h_fft"], n_jobs)
-                h_fft = cupy.asarray(h_fft)
+                h_fft = cupy.asarray(_share_cuda_mem(cuda_dict["h_fft"], n_jobs))
                 logger.info(f"Using CUDA for {kind}")
             except Exception as exp:
                 logger.info(
@@ -316,7 +315,7 @@ def _setup_cuda_fft_resample(n_jobs, W, new_len):
             try:
                 import cupy
 
-                W = _share_cuda_mem(W, n_jobs)
+                W = _share_cuda_mem(W, "cuda")
 
                 # do the IFFT normalization now so we don't have to later
                 W = cupy.asarray(W)

From 1fc59ba44ec975fa2e87b6c3a8dde178fb8bf533 Mon Sep 17 00:00:00 2001
From: Scott Robertson <7535147+scottrbrtsn@users.noreply.github.com>
Date: Wed, 4 Dec 2024 15:13:18 -0600
Subject: [PATCH 21/28] hfft needs cuda for the same reason

---
 mne/cuda.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mne/cuda.py b/mne/cuda.py
index 3256625ad39..2cc022fd505 100644
--- a/mne/cuda.py
+++ b/mne/cuda.py
@@ -216,7 +216,7 @@ def _setup_cuda_fft_multiply_repeated(n_jobs, h, n_fft, kind="FFT FIR filtering"
 
             try:
                 # do the IFFT normalization now so we don't have to later
-                h_fft = cupy.asarray(_share_cuda_mem(cuda_dict["h_fft"], n_jobs))
+                h_fft = cupy.asarray(_share_cuda_mem(cuda_dict["h_fft"], "cuda"))
                 logger.info(f"Using CUDA for {kind}")
             except Exception as exp:
                 logger.info(

From 9350498771ddafb8d22829837c689ea7a47e4724 Mon Sep 17 00:00:00 2001
From: Scott Robertson <7535147+scottrbrtsn@users.noreply.github.com>
Date: Wed, 4 Dec 2024 15:34:49 -0600
Subject: [PATCH 22/28] remove n_jobs param

---
 mne/cuda.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/mne/cuda.py b/mne/cuda.py
index 2cc022fd505..33f161a7154 100644
--- a/mne/cuda.py
+++ b/mne/cuda.py
@@ -20,7 +20,7 @@
 _cuda_capable = False
 
 
-def _share_cuda_mem(x, n_jobs):
+def _share_cuda_mem(x):
     """Get shared memory space to avoid copying from cpu to gpu when possible.
 
     Allocate a mapped ndarray with a buffer that is pinned and mapped on
@@ -216,7 +216,7 @@ def _setup_cuda_fft_multiply_repeated(n_jobs, h, n_fft, kind="FFT FIR filtering"
 
             try:
                 # do the IFFT normalization now so we don't have to later
-                h_fft = cupy.asarray(_share_cuda_mem(cuda_dict["h_fft"], "cuda"))
+                h_fft = cupy.asarray(_share_cuda_mem(cuda_dict["h_fft"]))
                 logger.info(f"Using CUDA for {kind}")
             except Exception as exp:
                 logger.info(
@@ -315,7 +315,7 @@ def _setup_cuda_fft_resample(n_jobs, W, new_len):
             try:
                 import cupy
 
-                W = _share_cuda_mem(W, "cuda")
+                W = _share_cuda_mem(W)
 
                 # do the IFFT normalization now so we don't have to later
                 W = cupy.asarray(W)
@@ -343,7 +343,7 @@ def _cuda_upload_rfft(x, n, axis=-1):
     """Upload and compute rfft."""
     import cupy
 
-    x = _share_cuda_mem(x, "cuda")
+    x = _share_cuda_mem(x)
 
     return cupy.fft.rfft(cupy.asarray(x), n=n, axis=axis)
 
@@ -352,7 +352,7 @@ def _cuda_irfft_get(x, n, axis=-1):
     """Compute irfft and get."""
     import cupy
 
-    x = _share_cuda_mem(x, "cuda")
+    x = _share_cuda_mem(x)
 
     return cupy.fft.irfft(x, n=n, axis=axis).get()
 

From 1a1c8ecae9c27ec580cd7725b3c47e3192a8e80c Mon Sep 17 00:00:00 2001
From: Scott Robertson <7535147+scottrbrtsn@users.noreply.github.com>
Date: Wed, 4 Dec 2024 15:43:18 -0600
Subject: [PATCH 23/28] remove n_jobs gate

---
 mne/cuda.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mne/cuda.py b/mne/cuda.py
index 33f161a7154..a3ad03b61c3 100644
--- a/mne/cuda.py
+++ b/mne/cuda.py
@@ -49,7 +49,7 @@ def _share_cuda_mem(x):
     """
     from mne.fixes import has_numba
 
-    if n_jobs == "cuda" and _cuda_capable and has_numba:
+    if _cuda_capable and has_numba:
         from numba import cuda
 
         out = cuda.mapped_array(x.shape, ...)

From a3342fa97dcf4cd6e3d0c5bccfc5a1fe8fdbbb3d Mon Sep 17 00:00:00 2001
From: Scott Robertson <7535147+scottrbrtsn@users.noreply.github.com>
Date: Thu, 5 Dec 2024 10:33:20 -0600
Subject: [PATCH 24/28] fix docstring param

---
 mne/cuda.py | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/mne/cuda.py b/mne/cuda.py
index a3ad03b61c3..463d65ae4be 100644
--- a/mne/cuda.py
+++ b/mne/cuda.py
@@ -33,19 +33,14 @@ def _share_cuda_mem(x):
 
     Parameters
     ----------
-    portable: bool
-        a boolean flag to allow the allocated device memory to be
-              usable in multiple devices.
-    wc: bool
-        a boolean flag to enable writecombined allocation which is faster
-        to write by the host and to read by the device, but slower to
-        write by the host and slower to write by the device.
+    x : 1-d array
 
     Returns
     -------
     a mapped array: np.ndarray
         An array to be passed into cupy.asarray, which does not copy if
-        shared memory is already allocated.
+        shared memory is already allocated. If cuda and numba are not 
+        available, return the original array.
     """
     from mne.fixes import has_numba
 

From 88ae8cbeabfaefdf6b5f05d0e31da8f4b18915b7 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 5 Dec 2024 16:33:47 +0000
Subject: [PATCH 25/28] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 mne/cuda.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mne/cuda.py b/mne/cuda.py
index 463d65ae4be..fee2ea51928 100644
--- a/mne/cuda.py
+++ b/mne/cuda.py
@@ -39,7 +39,7 @@ def _share_cuda_mem(x):
     -------
     a mapped array: np.ndarray
         An array to be passed into cupy.asarray, which does not copy if
-        shared memory is already allocated. If cuda and numba are not 
+        shared memory is already allocated. If cuda and numba are not
         available, return the original array.
     """
     from mne.fixes import has_numba

From 4bbd2c71648834391d74075f962d7c5a84be2d04 Mon Sep 17 00:00:00 2001
From: Scott Robertson <7535147+scottrbrtsn@users.noreply.github.com>
Date: Fri, 6 Dec 2024 12:23:43 -0600
Subject: [PATCH 26/28] irfft, takes cuda mem, and so x is a different type. i
 do not think getting shared mem is necessary, for filtering...need to think
 on this tho

---
 mne/cuda.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/mne/cuda.py b/mne/cuda.py
index fee2ea51928..61c4bf38453 100644
--- a/mne/cuda.py
+++ b/mne/cuda.py
@@ -47,8 +47,8 @@ def _share_cuda_mem(x):
     if _cuda_capable and has_numba:
         from numba import cuda
 
-        out = cuda.mapped_array(x.shape, ...)
-        out[:] = x
+        out = cuda.mapped_array(x.shape) 
+        out[:] = x.get()
     else:
         out = x
     return out
@@ -217,6 +217,7 @@ def _setup_cuda_fft_multiply_repeated(n_jobs, h, n_fft, kind="FFT FIR filtering"
                 logger.info(
                     "CUDA not used, could not instantiate memory (arrays may be too "
                     f'large: "{exp}"), falling back to n_jobs=None'
+                    f", {_explain_exception()}"
                 )
             cuda_dict.update(h_fft=h_fft, rfft=_cuda_upload_rfft, irfft=_cuda_irfft_get)
         else:
@@ -315,11 +316,11 @@ def _setup_cuda_fft_resample(n_jobs, W, new_len):
                 # do the IFFT normalization now so we don't have to later
                 W = cupy.asarray(W)
                 logger.info("Using CUDA for FFT resampling")
-            except Exception:
+            except Exception as e:
                 logger.info(
                     "CUDA not used, could not instantiate memory "
                     "(arrays may be too large), falling back to "
-                    "n_jobs=None"
+                    f"n_jobs=None, {_explain_exception()}"
                 )
             else:
                 cuda_dict.update(
@@ -347,8 +348,6 @@ def _cuda_irfft_get(x, n, axis=-1):
     """Compute irfft and get."""
     import cupy
 
-    x = _share_cuda_mem(x)
-
     return cupy.fft.irfft(x, n=n, axis=axis).get()
 
 

From af3cab2bb03d7f5257110cec0ec7ebd05f427e81 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 6 Dec 2024 18:24:04 +0000
Subject: [PATCH 27/28] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 mne/cuda.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mne/cuda.py b/mne/cuda.py
index 61c4bf38453..22b823ab338 100644
--- a/mne/cuda.py
+++ b/mne/cuda.py
@@ -47,7 +47,7 @@ def _share_cuda_mem(x):
     if _cuda_capable and has_numba:
         from numba import cuda
 
-        out = cuda.mapped_array(x.shape) 
+        out = cuda.mapped_array(x.shape)
         out[:] = x.get()
     else:
         out = x
@@ -316,7 +316,7 @@ def _setup_cuda_fft_resample(n_jobs, W, new_len):
                 # do the IFFT normalization now so we don't have to later
                 W = cupy.asarray(W)
                 logger.info("Using CUDA for FFT resampling")
-            except Exception as e:
+            except Exception:
                 logger.info(
                     "CUDA not used, could not instantiate memory "
                     "(arrays may be too large), falling back to "

From 74a7794f0b48518aca3d73a369e0a8aeffb5646d Mon Sep 17 00:00:00 2001
From: Scott Robertson <7535147+scottrbrtsn@users.noreply.github.com>
Date: Fri, 6 Dec 2024 12:39:02 -0600
Subject: [PATCH 28/28] remove troubleshooting woes

---
 mne/cuda.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mne/cuda.py b/mne/cuda.py
index 22b823ab338..1c3519026a3 100644
--- a/mne/cuda.py
+++ b/mne/cuda.py
@@ -48,7 +48,7 @@ def _share_cuda_mem(x):
         from numba import cuda
 
         out = cuda.mapped_array(x.shape)
-        out[:] = x.get()
+        out[:] = x
     else:
         out = x
     return out