From c6cd611fa99ac9c3403deffb7e811f8a73d9fd97 Mon Sep 17 00:00:00 2001 From: Scott Robertson <7535147+scottrbrtsn@users.noreply.github.com> Date: Tue, 3 Dec 2024 08:31:52 -0600 Subject: [PATCH 01/28] update mne/cuda to call cupy.asarray when possible --- mne/cuda.py | 46 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/mne/cuda.py b/mne/cuda.py index f44dc653a1e..5b1676e92c1 100644 --- a/mne/cuda.py +++ b/mne/cuda.py @@ -5,6 +5,7 @@ import numpy as np from scipy.fft import irfft, rfft + from .utils import ( _check_option, _explain_exception, @@ -18,6 +19,45 @@ _cuda_capable = False +def get_shared_mem( + shape, + dtype=np.float64, + strides=None, + order="C", + stream=0, + portable=False, + wc=True, +): + """Get shared memory space to avoid copying from cpu to gpu when possible. + + Allocate a mapped ndarray with a buffer that is pinned and mapped on + to the device. Similar to np.empty() + + Parameters + ---------- + portable: bool + a boolean flag to allow the allocated device memory to be + usable in multiple devices. + wc: bool + a boolean flag to enable writecombined allocation which is faster + to write by the host and to read by the device, but slower to + write by the host and slower to write by the device. + + Returns + ------- + a mapped array: np.ndarray + An array to be passed into cupy.asarray, which does not copy if shared memory is already allocated. + """ + from numba import cuda + return cuda.mapped_array( + shape, + dtype=dtype, + strides=strides, + order=order, + stream=stream, + portable=portable, + wc=wc, + ) def get_cuda_memory(kind="available"): """Get the amount of free memory for CUDA operations. @@ -176,7 +216,7 @@ def _setup_cuda_fft_multiply_repeated(n_jobs, h, n_fft, kind="FFT FIR filtering" try: # do the IFFT normalization now so we don't have to later - h_fft = cupy.array(cuda_dict["h_fft"]) + h_fft = cupy.asarray(cuda_dict["h_fft"]) logger.info(f"Using CUDA for {kind}") except Exception as exp: logger.info( @@ -276,7 +316,7 @@ def _setup_cuda_fft_resample(n_jobs, W, new_len): import cupy # do the IFFT normalization now so we don't have to later - W = cupy.array(W) + W = cupy.asarray(W) logger.info("Using CUDA for FFT resampling") except Exception: logger.info( @@ -301,7 +341,7 @@ def _cuda_upload_rfft(x, n, axis=-1): """Upload and compute rfft.""" import cupy - return cupy.fft.rfft(cupy.array(x), n=n, axis=axis) + return cupy.fft.rfft(cupy.asarray(x), n=n, axis=axis) def _cuda_irfft_get(x, n, axis=-1): From 02edb9c7483f051823b26fbb641336a42ca1069c Mon Sep 17 00:00:00 2001 From: Scott Robertson <7535147+scottrbrtsn@users.noreply.github.com> Date: Tue, 3 Dec 2024 08:32:09 -0600 Subject: [PATCH 02/28] update tests as poc, they pass --- mne/tests/test_filter.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/mne/tests/test_filter.py b/mne/tests/test_filter.py index e259ececbce..52e555a775e 100644 --- a/mne/tests/test_filter.py +++ b/mne/tests/test_filter.py @@ -16,6 +16,7 @@ from scipy.signal import resample as sp_resample from mne import Epochs, create_info +from mne.cuda import get_shared_mem from mne._fiff.pick import _DATA_CH_TYPES_SPLIT from mne.filter import ( _length_factors, @@ -408,6 +409,10 @@ def test_resample_scipy(): err_msg = f"{N}: {window}" x_2_sp = sp_resample(x, 2 * N, window=window) for n_jobs in n_jobs_test: + if n_jobs == "cuda": + tmp = x + x = get_shared_mem(x.shape) + x[:] = tmp x_2 = resample(x, 2, 1, npad=0, window=window, n_jobs=n_jobs) assert_allclose(x_2, x_2_sp, atol=1e-12, err_msg=err_msg) new_len = int(round(len(x) * (1.0 / 2.0))) @@ -421,6 +426,12 @@ def test_resample_scipy(): def test_n_jobs(n_jobs): """Test resampling against SciPy.""" x = np.random.RandomState(0).randn(4, 100) + + if n_jobs == "cuda": + tmp = x + x = get_shared_mem(x.shape) + x[:] = tmp + y1 = resample(x, 2, 1, n_jobs=None) y2 = resample(x, 2, 1, n_jobs=n_jobs) assert_allclose(y1, y2) @@ -846,6 +857,8 @@ def test_cuda_resampling(): a = rng.randn(2, N) for fro, to in ((1, 2), (2, 1), (1, 3), (3, 1)): a1 = resample(a, fro, to, n_jobs=None, npad="auto", window=window) + x = get_shared_mem(a.shape) + x[:] = a a2 = resample(a, fro, to, n_jobs="cuda", npad="auto", window=window) assert_allclose(a1, a2, rtol=1e-7, atol=1e-14) assert_array_almost_equal(a1, a2, 14) From 5662420fd702e313c66963ef61f599b84006eecf Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 3 Dec 2024 14:58:59 +0000 Subject: [PATCH 03/28] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mne/cuda.py | 6 ++++-- mne/tests/test_filter.py | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/mne/cuda.py b/mne/cuda.py index 5b1676e92c1..e727c8b3a47 100644 --- a/mne/cuda.py +++ b/mne/cuda.py @@ -5,7 +5,6 @@ import numpy as np from scipy.fft import irfft, rfft - from .utils import ( _check_option, _explain_exception, @@ -19,6 +18,7 @@ _cuda_capable = False + def get_shared_mem( shape, dtype=np.float64, @@ -45,10 +45,11 @@ def get_shared_mem( Returns ------- - a mapped array: np.ndarray + a mapped array: np.ndarray An array to be passed into cupy.asarray, which does not copy if shared memory is already allocated. """ from numba import cuda + return cuda.mapped_array( shape, dtype=dtype, @@ -59,6 +60,7 @@ def get_shared_mem( wc=wc, ) + def get_cuda_memory(kind="available"): """Get the amount of free memory for CUDA operations. diff --git a/mne/tests/test_filter.py b/mne/tests/test_filter.py index 52e555a775e..0bf4766b818 100644 --- a/mne/tests/test_filter.py +++ b/mne/tests/test_filter.py @@ -16,8 +16,8 @@ from scipy.signal import resample as sp_resample from mne import Epochs, create_info -from mne.cuda import get_shared_mem from mne._fiff.pick import _DATA_CH_TYPES_SPLIT +from mne.cuda import get_shared_mem from mne.filter import ( _length_factors, _overlap_add_filter, @@ -858,7 +858,7 @@ def test_cuda_resampling(): for fro, to in ((1, 2), (2, 1), (1, 3), (3, 1)): a1 = resample(a, fro, to, n_jobs=None, npad="auto", window=window) x = get_shared_mem(a.shape) - x[:] = a + x[:] = a a2 = resample(a, fro, to, n_jobs="cuda", npad="auto", window=window) assert_allclose(a1, a2, rtol=1e-7, atol=1e-14) assert_array_almost_equal(a1, a2, 14) From f7a04addc4fd6f8f0295e68eae2e619e43155f75 Mon Sep 17 00:00:00 2001 From: Eric Larson Date: Tue, 3 Dec 2024 11:10:29 -0500 Subject: [PATCH 04/28] Update mne/cuda.py --- mne/cuda.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mne/cuda.py b/mne/cuda.py index e727c8b3a47..79088d44f2e 100644 --- a/mne/cuda.py +++ b/mne/cuda.py @@ -46,7 +46,8 @@ def get_shared_mem( Returns ------- a mapped array: np.ndarray - An array to be passed into cupy.asarray, which does not copy if shared memory is already allocated. + An array to be passed into cupy.asarray, which does not copy if + shared memory is already allocated. """ from numba import cuda From fd0463e0afaf670e58b206354b55c3d4c4e50abf Mon Sep 17 00:00:00 2001 From: Scott Robertson <7535147+scottrbrtsn@users.noreply.github.com> Date: Tue, 3 Dec 2024 11:29:08 -0600 Subject: [PATCH 05/28] string formatting --- mne/cuda.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mne/cuda.py b/mne/cuda.py index 79088d44f2e..bf8f42f3886 100644 --- a/mne/cuda.py +++ b/mne/cuda.py @@ -45,8 +45,8 @@ def get_shared_mem( Returns ------- - a mapped array: np.ndarray - An array to be passed into cupy.asarray, which does not copy if + a mapped array: np.ndarray + An array to be passed into cupy.asarray, which does not copy if shared memory is already allocated. """ from numba import cuda From a03014eaf522be0d49d5d161ce202308a6d286c6 Mon Sep 17 00:00:00 2001 From: Scott Robertson <7535147+scottrbrtsn@users.noreply.github.com> Date: Tue, 3 Dec 2024 11:48:31 -0600 Subject: [PATCH 06/28] add changelog file --- doc/changes/devel/13002.other.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 doc/changes/devel/13002.other.rst diff --git a/doc/changes/devel/13002.other.rst b/doc/changes/devel/13002.other.rst new file mode 100644 index 00000000000..03e261682be --- /dev/null +++ b/doc/changes/devel/13002.other.rst @@ -0,0 +1 @@ +Short description of the changes, by :newcontrib:`Scott Robertson`. From 44b8b9acae8f88531517bf9ce3fee71ae3842072 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 3 Dec 2024 17:50:37 +0000 Subject: [PATCH 07/28] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mne/cuda.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mne/cuda.py b/mne/cuda.py index bf8f42f3886..79088d44f2e 100644 --- a/mne/cuda.py +++ b/mne/cuda.py @@ -45,8 +45,8 @@ def get_shared_mem( Returns ------- - a mapped array: np.ndarray - An array to be passed into cupy.asarray, which does not copy if + a mapped array: np.ndarray + An array to be passed into cupy.asarray, which does not copy if shared memory is already allocated. """ from numba import cuda From 96f2a0396d9971aa7758cc5106ed3e250f21d4f8 Mon Sep 17 00:00:00 2001 From: Scott Robertson <7535147+scottrbrtsn@users.noreply.github.com> Date: Wed, 4 Dec 2024 06:23:45 -0600 Subject: [PATCH 08/28] only get shared mem if cuda capable --- mne/tests/test_filter.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/mne/tests/test_filter.py b/mne/tests/test_filter.py index 0bf4766b818..0c6b73e3adc 100644 --- a/mne/tests/test_filter.py +++ b/mne/tests/test_filter.py @@ -402,6 +402,7 @@ def test_resample(method): def test_resample_scipy(): """Test resampling against SciPy.""" + from mne.cuda import _cuda_capable # allow above funs to set it n_jobs_test = (1, "cuda") for window in ("boxcar", "hann"): for N in (100, 101, 102, 103): @@ -409,7 +410,7 @@ def test_resample_scipy(): err_msg = f"{N}: {window}" x_2_sp = sp_resample(x, 2 * N, window=window) for n_jobs in n_jobs_test: - if n_jobs == "cuda": + if n_jobs == "cuda" and _cuda_capable: tmp = x x = get_shared_mem(x.shape) x[:] = tmp @@ -425,9 +426,10 @@ def test_resample_scipy(): @pytest.mark.parametrize("n_jobs", (2, "cuda")) def test_n_jobs(n_jobs): """Test resampling against SciPy.""" + from mne.cuda import _cuda_capable # allow above funs to set it x = np.random.RandomState(0).randn(4, 100) - if n_jobs == "cuda": + if n_jobs == "cuda" and _cuda_capable: tmp = x x = get_shared_mem(x.shape) x[:] = tmp @@ -851,15 +853,19 @@ def test_cuda_fir(): def test_cuda_resampling(): """Test CUDA resampling.""" + from mne.cuda import _cuda_capable # allow above funs to set it rng = np.random.RandomState(0) for window in ("boxcar", "triang"): for N in (997, 1000): # one prime, one even a = rng.randn(2, N) for fro, to in ((1, 2), (2, 1), (1, 3), (3, 1)): a1 = resample(a, fro, to, n_jobs=None, npad="auto", window=window) - x = get_shared_mem(a.shape) - x[:] = a - a2 = resample(a, fro, to, n_jobs="cuda", npad="auto", window=window) + if _cuda_capable: + x = get_shared_mem(a.shape) + x[:] = a + a2 = resample(x, fro, to, n_jobs="cuda", npad="auto", window=window) + else: + a2 = resample(a, fro, to, n_jobs="cuda", npad="auto", window=window) assert_allclose(a1, a2, rtol=1e-7, atol=1e-14) assert_array_almost_equal(a1, a2, 14) assert_array_equal(resample(np.zeros(2), 2, 1, n_jobs="cuda"), np.zeros(4)) From caec586b1eb823b7cd4f520957f377382d84d630 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 4 Dec 2024 12:24:49 +0000 Subject: [PATCH 09/28] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mne/tests/test_filter.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mne/tests/test_filter.py b/mne/tests/test_filter.py index 0c6b73e3adc..37eef9d605e 100644 --- a/mne/tests/test_filter.py +++ b/mne/tests/test_filter.py @@ -403,6 +403,7 @@ def test_resample(method): def test_resample_scipy(): """Test resampling against SciPy.""" from mne.cuda import _cuda_capable # allow above funs to set it + n_jobs_test = (1, "cuda") for window in ("boxcar", "hann"): for N in (100, 101, 102, 103): @@ -427,6 +428,7 @@ def test_resample_scipy(): def test_n_jobs(n_jobs): """Test resampling against SciPy.""" from mne.cuda import _cuda_capable # allow above funs to set it + x = np.random.RandomState(0).randn(4, 100) if n_jobs == "cuda" and _cuda_capable: @@ -854,6 +856,7 @@ def test_cuda_fir(): def test_cuda_resampling(): """Test CUDA resampling.""" from mne.cuda import _cuda_capable # allow above funs to set it + rng = np.random.RandomState(0) for window in ("boxcar", "triang"): for N in (997, 1000): # one prime, one even From de298fed9bcdd46b4754dcb32aeff69df6c3950e Mon Sep 17 00:00:00 2001 From: Scott Robertson <7535147+scottrbrtsn@users.noreply.github.com> Date: Wed, 4 Dec 2024 07:06:10 -0600 Subject: [PATCH 10/28] revert numba_capable and use the bool in fixes --- mne/cuda.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/mne/cuda.py b/mne/cuda.py index 79088d44f2e..cb0e77e677b 100644 --- a/mne/cuda.py +++ b/mne/cuda.py @@ -3,6 +3,7 @@ # Copyright the MNE-Python contributors. import numpy as np +import os from scipy.fft import irfft, rfft from .utils import ( @@ -18,7 +19,6 @@ _cuda_capable = False - def get_shared_mem( shape, dtype=np.float64, @@ -33,6 +33,10 @@ def get_shared_mem( Allocate a mapped ndarray with a buffer that is pinned and mapped on to the device. Similar to np.empty() + It is recommended to gate this function with + os.getenv("MNE_USE_NUMBA").lower() == "true" + to avoid import errors. + Parameters ---------- portable: bool From ce2ff377a5c0d6a5b115179d2f53e9182d36d96c Mon Sep 17 00:00:00 2001 From: Scott Robertson <7535147+scottrbrtsn@users.noreply.github.com> Date: Wed, 4 Dec 2024 07:06:53 -0600 Subject: [PATCH 11/28] gate get_shared_mem with fixes/has_numba bool --- mne/tests/test_filter.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/mne/tests/test_filter.py b/mne/tests/test_filter.py index 37eef9d605e..f94cf49cc9b 100644 --- a/mne/tests/test_filter.py +++ b/mne/tests/test_filter.py @@ -402,8 +402,8 @@ def test_resample(method): def test_resample_scipy(): """Test resampling against SciPy.""" - from mne.cuda import _cuda_capable # allow above funs to set it - + from mne.cuda import _cuda_capable# allow cuda.init_cuda() to set it + from mne.fixes import has_numba n_jobs_test = (1, "cuda") for window in ("boxcar", "hann"): for N in (100, 101, 102, 103): @@ -411,7 +411,7 @@ def test_resample_scipy(): err_msg = f"{N}: {window}" x_2_sp = sp_resample(x, 2 * N, window=window) for n_jobs in n_jobs_test: - if n_jobs == "cuda" and _cuda_capable: + if n_jobs == "cuda" and _cuda_capable and has_numba: tmp = x x = get_shared_mem(x.shape) x[:] = tmp @@ -427,11 +427,11 @@ def test_resample_scipy(): @pytest.mark.parametrize("n_jobs", (2, "cuda")) def test_n_jobs(n_jobs): """Test resampling against SciPy.""" - from mne.cuda import _cuda_capable # allow above funs to set it - + from mne.cuda import _cuda_capable# allow cuda.init_cuda() to set it + from mne.fixes import has_numba x = np.random.RandomState(0).randn(4, 100) - if n_jobs == "cuda" and _cuda_capable: + if n_jobs == "cuda" and _cuda_capable and has_numba: tmp = x x = get_shared_mem(x.shape) x[:] = tmp @@ -855,15 +855,15 @@ def test_cuda_fir(): def test_cuda_resampling(): """Test CUDA resampling.""" - from mne.cuda import _cuda_capable # allow above funs to set it - + from mne.cuda import _cuda_capable# allow cuda.init_cuda() to set it + from mne.fixes import has_numba rng = np.random.RandomState(0) for window in ("boxcar", "triang"): for N in (997, 1000): # one prime, one even a = rng.randn(2, N) for fro, to in ((1, 2), (2, 1), (1, 3), (3, 1)): a1 = resample(a, fro, to, n_jobs=None, npad="auto", window=window) - if _cuda_capable: + if _cuda_capable and has_numba: x = get_shared_mem(a.shape) x[:] = a a2 = resample(x, fro, to, n_jobs="cuda", npad="auto", window=window) From 75c4dab1527a2d082eb715e044f7fe870e575705 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 4 Dec 2024 13:09:03 +0000 Subject: [PATCH 12/28] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mne/cuda.py | 7 ++++--- mne/tests/test_filter.py | 9 ++++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/mne/cuda.py b/mne/cuda.py index cb0e77e677b..42f22203c9e 100644 --- a/mne/cuda.py +++ b/mne/cuda.py @@ -2,8 +2,8 @@ # License: BSD-3-Clause # Copyright the MNE-Python contributors. + import numpy as np -import os from scipy.fft import irfft, rfft from .utils import ( @@ -19,6 +19,7 @@ _cuda_capable = False + def get_shared_mem( shape, dtype=np.float64, @@ -33,9 +34,9 @@ def get_shared_mem( Allocate a mapped ndarray with a buffer that is pinned and mapped on to the device. Similar to np.empty() - It is recommended to gate this function with + It is recommended to gate this function with os.getenv("MNE_USE_NUMBA").lower() == "true" - to avoid import errors. + to avoid import errors. Parameters ---------- diff --git a/mne/tests/test_filter.py b/mne/tests/test_filter.py index f94cf49cc9b..45c405d3df9 100644 --- a/mne/tests/test_filter.py +++ b/mne/tests/test_filter.py @@ -402,8 +402,9 @@ def test_resample(method): def test_resample_scipy(): """Test resampling against SciPy.""" - from mne.cuda import _cuda_capable# allow cuda.init_cuda() to set it + from mne.cuda import _cuda_capable # allow cuda.init_cuda() to set it from mne.fixes import has_numba + n_jobs_test = (1, "cuda") for window in ("boxcar", "hann"): for N in (100, 101, 102, 103): @@ -427,8 +428,9 @@ def test_resample_scipy(): @pytest.mark.parametrize("n_jobs", (2, "cuda")) def test_n_jobs(n_jobs): """Test resampling against SciPy.""" - from mne.cuda import _cuda_capable# allow cuda.init_cuda() to set it + from mne.cuda import _cuda_capable # allow cuda.init_cuda() to set it from mne.fixes import has_numba + x = np.random.RandomState(0).randn(4, 100) if n_jobs == "cuda" and _cuda_capable and has_numba: @@ -855,8 +857,9 @@ def test_cuda_fir(): def test_cuda_resampling(): """Test CUDA resampling.""" - from mne.cuda import _cuda_capable# allow cuda.init_cuda() to set it + from mne.cuda import _cuda_capable # allow cuda.init_cuda() to set it from mne.fixes import has_numba + rng = np.random.RandomState(0) for window in ("boxcar", "triang"): for N in (997, 1000): # one prime, one even From 929fcb951579e5a3d05ea9e5e3750ad394500a18 Mon Sep 17 00:00:00 2001 From: Scott Robertson <7535147+scottrbrtsn@users.noreply.github.com> Date: Wed, 4 Dec 2024 13:13:37 -0600 Subject: [PATCH 13/28] Update mne/cuda.py to soft import numba Co-authored-by: Eric Larson --- mne/cuda.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mne/cuda.py b/mne/cuda.py index 42f22203c9e..1d47ebb4559 100644 --- a/mne/cuda.py +++ b/mne/cuda.py @@ -54,6 +54,7 @@ def get_shared_mem( An array to be passed into cupy.asarray, which does not copy if shared memory is already allocated. """ + _soft_import("numba", "using shared memory") from numba import cuda return cuda.mapped_array( From 214edf151c79ad23fc6c1184ca303083a67d3625 Mon Sep 17 00:00:00 2001 From: Scott Robertson <7535147+scottrbrtsn@users.noreply.github.com> Date: Wed, 4 Dec 2024 14:10:16 -0600 Subject: [PATCH 14/28] revert tests, make shared_mem fun private and simplified, call appropriately within the cuda module. --- mne/cuda.py | 43 +++++++++++++++++++--------------------- mne/tests/test_filter.py | 26 +----------------------- 2 files changed, 21 insertions(+), 48 deletions(-) diff --git a/mne/cuda.py b/mne/cuda.py index 1d47ebb4559..37ae9306d3d 100644 --- a/mne/cuda.py +++ b/mne/cuda.py @@ -19,24 +19,18 @@ _cuda_capable = False - -def get_shared_mem( - shape, - dtype=np.float64, - strides=None, - order="C", - stream=0, - portable=False, - wc=True, +def _share_cuda_mem( + x, n_jobs ): """Get shared memory space to avoid copying from cpu to gpu when possible. Allocate a mapped ndarray with a buffer that is pinned and mapped on to the device. Similar to np.empty() - It is recommended to gate this function with - os.getenv("MNE_USE_NUMBA").lower() == "true" - to avoid import errors. + Requires + -------- + numba + Parameters ---------- @@ -54,18 +48,16 @@ def get_shared_mem( An array to be passed into cupy.asarray, which does not copy if shared memory is already allocated. """ - _soft_import("numba", "using shared memory") from numba import cuda + from mne.fixes import has_numba - return cuda.mapped_array( - shape, - dtype=dtype, - strides=strides, - order=order, - stream=stream, - portable=portable, - wc=wc, - ) + if n_jobs == "cuda" and _cuda_capable and has_numba: + from numba import cuda + out = cuda.mapped_array(x.shape, ...) + out[:] = x + else: + out = x + return out def get_cuda_memory(kind="available"): @@ -225,7 +217,8 @@ def _setup_cuda_fft_multiply_repeated(n_jobs, h, n_fft, kind="FFT FIR filtering" try: # do the IFFT normalization now so we don't have to later - h_fft = cupy.asarray(cuda_dict["h_fft"]) + h_fft = _share_cuda_mem(cuda_dict["h_fft"], n_jobs) + h_fft = cupy.asarray(h_fft) logger.info(f"Using CUDA for {kind}") except Exception as exp: logger.info( @@ -324,6 +317,8 @@ def _setup_cuda_fft_resample(n_jobs, W, new_len): try: import cupy + W = _share_cuda_mem(W, n_jobs) + # do the IFFT normalization now so we don't have to later W = cupy.asarray(W) logger.info("Using CUDA for FFT resampling") @@ -349,6 +344,7 @@ def _setup_cuda_fft_resample(n_jobs, W, new_len): def _cuda_upload_rfft(x, n, axis=-1): """Upload and compute rfft.""" import cupy + x = _share_cuda_mem(x, "cuda") return cupy.fft.rfft(cupy.asarray(x), n=n, axis=axis) @@ -356,6 +352,7 @@ def _cuda_upload_rfft(x, n, axis=-1): def _cuda_irfft_get(x, n, axis=-1): """Compute irfft and get.""" import cupy + x = _share_cuda_mem(x, "cuda") return cupy.fft.irfft(x, n=n, axis=axis).get() diff --git a/mne/tests/test_filter.py b/mne/tests/test_filter.py index 45c405d3df9..47f5487397a 100644 --- a/mne/tests/test_filter.py +++ b/mne/tests/test_filter.py @@ -17,7 +17,6 @@ from mne import Epochs, create_info from mne._fiff.pick import _DATA_CH_TYPES_SPLIT -from mne.cuda import get_shared_mem from mne.filter import ( _length_factors, _overlap_add_filter, @@ -402,9 +401,6 @@ def test_resample(method): def test_resample_scipy(): """Test resampling against SciPy.""" - from mne.cuda import _cuda_capable # allow cuda.init_cuda() to set it - from mne.fixes import has_numba - n_jobs_test = (1, "cuda") for window in ("boxcar", "hann"): for N in (100, 101, 102, 103): @@ -412,10 +408,6 @@ def test_resample_scipy(): err_msg = f"{N}: {window}" x_2_sp = sp_resample(x, 2 * N, window=window) for n_jobs in n_jobs_test: - if n_jobs == "cuda" and _cuda_capable and has_numba: - tmp = x - x = get_shared_mem(x.shape) - x[:] = tmp x_2 = resample(x, 2, 1, npad=0, window=window, n_jobs=n_jobs) assert_allclose(x_2, x_2_sp, atol=1e-12, err_msg=err_msg) new_len = int(round(len(x) * (1.0 / 2.0))) @@ -428,16 +420,8 @@ def test_resample_scipy(): @pytest.mark.parametrize("n_jobs", (2, "cuda")) def test_n_jobs(n_jobs): """Test resampling against SciPy.""" - from mne.cuda import _cuda_capable # allow cuda.init_cuda() to set it - from mne.fixes import has_numba - x = np.random.RandomState(0).randn(4, 100) - if n_jobs == "cuda" and _cuda_capable and has_numba: - tmp = x - x = get_shared_mem(x.shape) - x[:] = tmp - y1 = resample(x, 2, 1, n_jobs=None) y2 = resample(x, 2, 1, n_jobs=n_jobs) assert_allclose(y1, y2) @@ -857,21 +841,13 @@ def test_cuda_fir(): def test_cuda_resampling(): """Test CUDA resampling.""" - from mne.cuda import _cuda_capable # allow cuda.init_cuda() to set it - from mne.fixes import has_numba - rng = np.random.RandomState(0) for window in ("boxcar", "triang"): for N in (997, 1000): # one prime, one even a = rng.randn(2, N) for fro, to in ((1, 2), (2, 1), (1, 3), (3, 1)): a1 = resample(a, fro, to, n_jobs=None, npad="auto", window=window) - if _cuda_capable and has_numba: - x = get_shared_mem(a.shape) - x[:] = a - a2 = resample(x, fro, to, n_jobs="cuda", npad="auto", window=window) - else: - a2 = resample(a, fro, to, n_jobs="cuda", npad="auto", window=window) + a2 = resample(a, fro, to, n_jobs="cuda", npad="auto", window=window) assert_allclose(a1, a2, rtol=1e-7, atol=1e-14) assert_array_almost_equal(a1, a2, 14) assert_array_equal(resample(np.zeros(2), 2, 1, n_jobs="cuda"), np.zeros(4)) From cfd6fb3a6e621917c996a2b0fc07b6209bd5b824 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 4 Dec 2024 20:15:36 +0000 Subject: [PATCH 15/28] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mne/cuda.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/mne/cuda.py b/mne/cuda.py index 37ae9306d3d..3048c7766fa 100644 --- a/mne/cuda.py +++ b/mne/cuda.py @@ -19,9 +19,8 @@ _cuda_capable = False -def _share_cuda_mem( - x, n_jobs -): + +def _share_cuda_mem(x, n_jobs): """Get shared memory space to avoid copying from cpu to gpu when possible. Allocate a mapped ndarray with a buffer that is pinned and mapped on @@ -49,10 +48,12 @@ def _share_cuda_mem( shared memory is already allocated. """ from numba import cuda + from mne.fixes import has_numba if n_jobs == "cuda" and _cuda_capable and has_numba: from numba import cuda + out = cuda.mapped_array(x.shape, ...) out[:] = x else: @@ -344,6 +345,7 @@ def _setup_cuda_fft_resample(n_jobs, W, new_len): def _cuda_upload_rfft(x, n, axis=-1): """Upload and compute rfft.""" import cupy + x = _share_cuda_mem(x, "cuda") return cupy.fft.rfft(cupy.asarray(x), n=n, axis=axis) @@ -352,6 +354,7 @@ def _cuda_upload_rfft(x, n, axis=-1): def _cuda_irfft_get(x, n, axis=-1): """Compute irfft and get.""" import cupy + x = _share_cuda_mem(x, "cuda") return cupy.fft.irfft(x, n=n, axis=axis).get() From cb8fec5fc07714d69429bf673494bda4744825d5 Mon Sep 17 00:00:00 2001 From: Scott Robertson <7535147+scottrbrtsn@users.noreply.github.com> Date: Wed, 4 Dec 2024 14:29:34 -0600 Subject: [PATCH 16/28] soft import numba --- mne/cuda.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mne/cuda.py b/mne/cuda.py index 3048c7766fa..e123580f147 100644 --- a/mne/cuda.py +++ b/mne/cuda.py @@ -9,6 +9,7 @@ from .utils import ( _check_option, _explain_exception, + _soft_import, fill_doc, get_config, logger, @@ -47,6 +48,7 @@ def _share_cuda_mem(x, n_jobs): An array to be passed into cupy.asarray, which does not copy if shared memory is already allocated. """ + _soft_import("numba", "using shared memory") from numba import cuda from mne.fixes import has_numba From 6973b5425ff1d710ccace9c44997df7eb0096114 Mon Sep 17 00:00:00 2001 From: Scott Robertson <7535147+scottrbrtsn@users.noreply.github.com> Date: Wed, 4 Dec 2024 14:31:10 -0600 Subject: [PATCH 17/28] remove extra line --- mne/tests/test_filter.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mne/tests/test_filter.py b/mne/tests/test_filter.py index 47f5487397a..e259ececbce 100644 --- a/mne/tests/test_filter.py +++ b/mne/tests/test_filter.py @@ -421,7 +421,6 @@ def test_resample_scipy(): def test_n_jobs(n_jobs): """Test resampling against SciPy.""" x = np.random.RandomState(0).randn(4, 100) - y1 = resample(x, 2, 1, n_jobs=None) y2 = resample(x, 2, 1, n_jobs=n_jobs) assert_allclose(y1, y2) From f2950e043449d92bc844f5ebc9ce704b302c64e0 Mon Sep 17 00:00:00 2001 From: Scott Robertson <7535147+scottrbrtsn@users.noreply.github.com> Date: Wed, 4 Dec 2024 14:37:26 -0600 Subject: [PATCH 18/28] revert soft import. --- mne/cuda.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/mne/cuda.py b/mne/cuda.py index e123580f147..2ef71a8b883 100644 --- a/mne/cuda.py +++ b/mne/cuda.py @@ -48,9 +48,6 @@ def _share_cuda_mem(x, n_jobs): An array to be passed into cupy.asarray, which does not copy if shared memory is already allocated. """ - _soft_import("numba", "using shared memory") - from numba import cuda - from mne.fixes import has_numba if n_jobs == "cuda" and _cuda_capable and has_numba: From ac5b5c86245354e94f1c0c1326f5ad5a8a5ee077 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 4 Dec 2024 20:37:46 +0000 Subject: [PATCH 19/28] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mne/cuda.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mne/cuda.py b/mne/cuda.py index 2ef71a8b883..f4053d473da 100644 --- a/mne/cuda.py +++ b/mne/cuda.py @@ -9,7 +9,6 @@ from .utils import ( _check_option, _explain_exception, - _soft_import, fill_doc, get_config, logger, From e76b5a72559d34e93abb676b784fa4ac7e4f6210 Mon Sep 17 00:00:00 2001 From: Scott Robertson <7535147+scottrbrtsn@users.noreply.github.com> Date: Wed, 4 Dec 2024 14:43:31 -0600 Subject: [PATCH 20/28] W needs "cuda" passed in since it just set n_jobs to 1 --- mne/cuda.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/mne/cuda.py b/mne/cuda.py index f4053d473da..3256625ad39 100644 --- a/mne/cuda.py +++ b/mne/cuda.py @@ -216,8 +216,7 @@ def _setup_cuda_fft_multiply_repeated(n_jobs, h, n_fft, kind="FFT FIR filtering" try: # do the IFFT normalization now so we don't have to later - h_fft = _share_cuda_mem(cuda_dict["h_fft"], n_jobs) - h_fft = cupy.asarray(h_fft) + h_fft = cupy.asarray(_share_cuda_mem(cuda_dict["h_fft"], n_jobs)) logger.info(f"Using CUDA for {kind}") except Exception as exp: logger.info( @@ -316,7 +315,7 @@ def _setup_cuda_fft_resample(n_jobs, W, new_len): try: import cupy - W = _share_cuda_mem(W, n_jobs) + W = _share_cuda_mem(W, "cuda") # do the IFFT normalization now so we don't have to later W = cupy.asarray(W) From 1fc59ba44ec975fa2e87b6c3a8dde178fb8bf533 Mon Sep 17 00:00:00 2001 From: Scott Robertson <7535147+scottrbrtsn@users.noreply.github.com> Date: Wed, 4 Dec 2024 15:13:18 -0600 Subject: [PATCH 21/28] hfft needs cuda for the same reason --- mne/cuda.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mne/cuda.py b/mne/cuda.py index 3256625ad39..2cc022fd505 100644 --- a/mne/cuda.py +++ b/mne/cuda.py @@ -216,7 +216,7 @@ def _setup_cuda_fft_multiply_repeated(n_jobs, h, n_fft, kind="FFT FIR filtering" try: # do the IFFT normalization now so we don't have to later - h_fft = cupy.asarray(_share_cuda_mem(cuda_dict["h_fft"], n_jobs)) + h_fft = cupy.asarray(_share_cuda_mem(cuda_dict["h_fft"], "cuda")) logger.info(f"Using CUDA for {kind}") except Exception as exp: logger.info( From 9350498771ddafb8d22829837c689ea7a47e4724 Mon Sep 17 00:00:00 2001 From: Scott Robertson <7535147+scottrbrtsn@users.noreply.github.com> Date: Wed, 4 Dec 2024 15:34:49 -0600 Subject: [PATCH 22/28] remove n_jobs param --- mne/cuda.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mne/cuda.py b/mne/cuda.py index 2cc022fd505..33f161a7154 100644 --- a/mne/cuda.py +++ b/mne/cuda.py @@ -20,7 +20,7 @@ _cuda_capable = False -def _share_cuda_mem(x, n_jobs): +def _share_cuda_mem(x): """Get shared memory space to avoid copying from cpu to gpu when possible. Allocate a mapped ndarray with a buffer that is pinned and mapped on @@ -216,7 +216,7 @@ def _setup_cuda_fft_multiply_repeated(n_jobs, h, n_fft, kind="FFT FIR filtering" try: # do the IFFT normalization now so we don't have to later - h_fft = cupy.asarray(_share_cuda_mem(cuda_dict["h_fft"], "cuda")) + h_fft = cupy.asarray(_share_cuda_mem(cuda_dict["h_fft"])) logger.info(f"Using CUDA for {kind}") except Exception as exp: logger.info( @@ -315,7 +315,7 @@ def _setup_cuda_fft_resample(n_jobs, W, new_len): try: import cupy - W = _share_cuda_mem(W, "cuda") + W = _share_cuda_mem(W) # do the IFFT normalization now so we don't have to later W = cupy.asarray(W) @@ -343,7 +343,7 @@ def _cuda_upload_rfft(x, n, axis=-1): """Upload and compute rfft.""" import cupy - x = _share_cuda_mem(x, "cuda") + x = _share_cuda_mem(x) return cupy.fft.rfft(cupy.asarray(x), n=n, axis=axis) @@ -352,7 +352,7 @@ def _cuda_irfft_get(x, n, axis=-1): """Compute irfft and get.""" import cupy - x = _share_cuda_mem(x, "cuda") + x = _share_cuda_mem(x) return cupy.fft.irfft(x, n=n, axis=axis).get() From 1a1c8ecae9c27ec580cd7725b3c47e3192a8e80c Mon Sep 17 00:00:00 2001 From: Scott Robertson <7535147+scottrbrtsn@users.noreply.github.com> Date: Wed, 4 Dec 2024 15:43:18 -0600 Subject: [PATCH 23/28] remove n_jobs gate --- mne/cuda.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mne/cuda.py b/mne/cuda.py index 33f161a7154..a3ad03b61c3 100644 --- a/mne/cuda.py +++ b/mne/cuda.py @@ -49,7 +49,7 @@ def _share_cuda_mem(x): """ from mne.fixes import has_numba - if n_jobs == "cuda" and _cuda_capable and has_numba: + if _cuda_capable and has_numba: from numba import cuda out = cuda.mapped_array(x.shape, ...) From a3342fa97dcf4cd6e3d0c5bccfc5a1fe8fdbbb3d Mon Sep 17 00:00:00 2001 From: Scott Robertson <7535147+scottrbrtsn@users.noreply.github.com> Date: Thu, 5 Dec 2024 10:33:20 -0600 Subject: [PATCH 24/28] fix docstring param --- mne/cuda.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/mne/cuda.py b/mne/cuda.py index a3ad03b61c3..463d65ae4be 100644 --- a/mne/cuda.py +++ b/mne/cuda.py @@ -33,19 +33,14 @@ def _share_cuda_mem(x): Parameters ---------- - portable: bool - a boolean flag to allow the allocated device memory to be - usable in multiple devices. - wc: bool - a boolean flag to enable writecombined allocation which is faster - to write by the host and to read by the device, but slower to - write by the host and slower to write by the device. + x : 1-d array Returns ------- a mapped array: np.ndarray An array to be passed into cupy.asarray, which does not copy if - shared memory is already allocated. + shared memory is already allocated. If cuda and numba are not + available, return the original array. """ from mne.fixes import has_numba From 88ae8cbeabfaefdf6b5f05d0e31da8f4b18915b7 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 5 Dec 2024 16:33:47 +0000 Subject: [PATCH 25/28] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mne/cuda.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mne/cuda.py b/mne/cuda.py index 463d65ae4be..fee2ea51928 100644 --- a/mne/cuda.py +++ b/mne/cuda.py @@ -39,7 +39,7 @@ def _share_cuda_mem(x): ------- a mapped array: np.ndarray An array to be passed into cupy.asarray, which does not copy if - shared memory is already allocated. If cuda and numba are not + shared memory is already allocated. If cuda and numba are not available, return the original array. """ from mne.fixes import has_numba From 4bbd2c71648834391d74075f962d7c5a84be2d04 Mon Sep 17 00:00:00 2001 From: Scott Robertson <7535147+scottrbrtsn@users.noreply.github.com> Date: Fri, 6 Dec 2024 12:23:43 -0600 Subject: [PATCH 26/28] irfft, takes cuda mem, and so x is a different type. i do not think getting shared mem is necessary, for filtering...need to think on this tho --- mne/cuda.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/mne/cuda.py b/mne/cuda.py index fee2ea51928..61c4bf38453 100644 --- a/mne/cuda.py +++ b/mne/cuda.py @@ -47,8 +47,8 @@ def _share_cuda_mem(x): if _cuda_capable and has_numba: from numba import cuda - out = cuda.mapped_array(x.shape, ...) - out[:] = x + out = cuda.mapped_array(x.shape) + out[:] = x.get() else: out = x return out @@ -217,6 +217,7 @@ def _setup_cuda_fft_multiply_repeated(n_jobs, h, n_fft, kind="FFT FIR filtering" logger.info( "CUDA not used, could not instantiate memory (arrays may be too " f'large: "{exp}"), falling back to n_jobs=None' + f", {_explain_exception()}" ) cuda_dict.update(h_fft=h_fft, rfft=_cuda_upload_rfft, irfft=_cuda_irfft_get) else: @@ -315,11 +316,11 @@ def _setup_cuda_fft_resample(n_jobs, W, new_len): # do the IFFT normalization now so we don't have to later W = cupy.asarray(W) logger.info("Using CUDA for FFT resampling") - except Exception: + except Exception as e: logger.info( "CUDA not used, could not instantiate memory " "(arrays may be too large), falling back to " - "n_jobs=None" + f"n_jobs=None, {_explain_exception()}" ) else: cuda_dict.update( @@ -347,8 +348,6 @@ def _cuda_irfft_get(x, n, axis=-1): """Compute irfft and get.""" import cupy - x = _share_cuda_mem(x) - return cupy.fft.irfft(x, n=n, axis=axis).get() From af3cab2bb03d7f5257110cec0ec7ebd05f427e81 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 6 Dec 2024 18:24:04 +0000 Subject: [PATCH 27/28] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mne/cuda.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mne/cuda.py b/mne/cuda.py index 61c4bf38453..22b823ab338 100644 --- a/mne/cuda.py +++ b/mne/cuda.py @@ -47,7 +47,7 @@ def _share_cuda_mem(x): if _cuda_capable and has_numba: from numba import cuda - out = cuda.mapped_array(x.shape) + out = cuda.mapped_array(x.shape) out[:] = x.get() else: out = x @@ -316,7 +316,7 @@ def _setup_cuda_fft_resample(n_jobs, W, new_len): # do the IFFT normalization now so we don't have to later W = cupy.asarray(W) logger.info("Using CUDA for FFT resampling") - except Exception as e: + except Exception: logger.info( "CUDA not used, could not instantiate memory " "(arrays may be too large), falling back to " From 74a7794f0b48518aca3d73a369e0a8aeffb5646d Mon Sep 17 00:00:00 2001 From: Scott Robertson <7535147+scottrbrtsn@users.noreply.github.com> Date: Fri, 6 Dec 2024 12:39:02 -0600 Subject: [PATCH 28/28] remove troubleshooting woes --- mne/cuda.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mne/cuda.py b/mne/cuda.py index 22b823ab338..1c3519026a3 100644 --- a/mne/cuda.py +++ b/mne/cuda.py @@ -48,7 +48,7 @@ def _share_cuda_mem(x): from numba import cuda out = cuda.mapped_array(x.shape) - out[:] = x.get() + out[:] = x else: out = x return out