Skip to content

Commit

Permalink
Use cupy arrays instead of numba.cuda arrays
Browse files Browse the repository at this point in the history
Also call faddeeva_cuda with prespecified numbers of blocks and threads
  • Loading branch information
smokestacklightnin committed Aug 9, 2023
1 parent 31964f4 commit 4be0cf6
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 36 deletions.
34 changes: 8 additions & 26 deletions stardis/opacities/tests/test_voigt.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import numpy as np
from math import sqrt
from numba import cuda
import cupy as cp
from stardis.opacities.voigt import (
faddeeva,
_faddeeva_cuda,
Expand Down Expand Up @@ -45,15 +46,17 @@ def test_faddeeva_cuda_unwrapped_sample_values(
faddeeva_cuda_unwrapped_sample_values_input,
faddeeva_cuda_unwrapped_sample_values_expected_result,
):
test_values = cuda.to_device(faddeeva_cuda_unwrapped_sample_values_input)
result_values = cuda.device_array_like(test_values)
test_values = cp.asarray(faddeeva_cuda_unwrapped_sample_values_input)
result_values = cp.empty_like(test_values)

nthreads = 256
length = len(faddeeva_cuda_unwrapped_sample_values_input)
nblocks = 1 + (length // nthreads)

_faddeeva_cuda.forall(length)(result_values, test_values)
_faddeeva_cuda[nblocks, nthreads](result_values, test_values)

assert np.allclose(
result_values.copy_to_host(),
cp.asnumpy(result_values),
faddeeva_cuda_unwrapped_sample_values_expected_result,
)

Expand Down Expand Up @@ -95,32 +98,11 @@ def test_faddeeva_cuda_wrapped_sample_cuda_values(
faddeeva_cuda_wrapped_sample_cuda_values_expected_result,
):
assert np.allclose(
faddeeva_cuda(
cuda.device_array_like(faddeeva_cuda_wrapped_sample_cuda_values_input)
),
faddeeva_cuda(cp.asarray(faddeeva_cuda_wrapped_sample_cuda_values_input)),
faddeeva_cuda_wrapped_sample_cuda_values_expected_result,
)


@pytest.mark.skipif(
not GPUs_available, reason="No GPU is available to test CUDA function"
)
@pytest.mark.parametrize(
"faddeeva_cuda_wrapped_noncomplex_input_input",
[
np.array([0, 0], dtype=int),
np.array([0, 0], dtype=float),
],
)
def test_faddeeva_cuda_wrapped_noncomplex_input(
faddeeva_cuda_wrapped_noncomplex_input_input,
):
with pytest.raises(TypeError):
_ = faddeeva_cuda(
cuda.device_array_like(faddeeva_cuda_wrapped_noncomplex_input_input)
)


test_voigt_profile_division_by_zero_test_values = [
-100,
-5,
Expand Down
17 changes: 7 additions & 10 deletions stardis/opacities/voigt.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import numpy as np
import numba
from numba import cuda
import cupy as cp
import cmath

SQRT_PI = np.sqrt(np.pi, dtype=float)
Expand Down Expand Up @@ -91,18 +92,14 @@ def _faddeeva_cuda(res, z):
res[tid] = _faddeeva(z[tid])


def faddeeva_cuda(z):
def faddeeva_cuda(z, nthreads=256, ret_np_ndarray=True):
size = len(z)
if hasattr(z, "astype"):
z = z.astype(complex)
if not np.iscomplexobj(z):
raise TypeError(
f"Faddeeva with cuda only works with complex arguments. Expected any complex datatyep and instead got {z.dtype}."
)
res = cuda.device_array_like(z)
nblocks = 1 + (size // nthreads)
z = cp.asarray(z, dtype=complex)
res = cp.empty_like(z)

_faddeeva_cuda.forall(size)(res, z)
return res.copy_to_host()
_faddeeva_cuda[nblocks, nthreads](res, z)
return cp.asnumpy(res) if ret_np_ndarray else res


@numba.njit
Expand Down

0 comments on commit 4be0cf6

Please sign in to comment.