From 9ffad9bb2099692aaefdabc5240044cfa6058bc6 Mon Sep 17 00:00:00 2001 From: smokestacklightnin <125844868+smokestacklightnin@users.noreply.github.com> Date: Fri, 18 Aug 2023 12:36:57 -0700 Subject: [PATCH] Add CUDA JIT to `calc_n_effective()` (#119) * Add basic test for `calc_doppler_width()` * Refactor `calc_doppler_width()` and add test for vectorized implementation * Typecast to float * Add unwrapped cuda implementation of doppler_width Also typecast all global constants to float * Add wrapped cuda implementation of calc_doppler_width * Return cupy array by default * Add test for `calc_n_effective()` * Typecast inputs and refactor * Add cuda implementations and tests for `calc_n_effective()` * Return cupy array by default --- stardis/opacities/broadening.py | 140 ++++++++++-- stardis/opacities/tests/test_broadening.py | 238 +++++++++++++++++++++ 2 files changed, 366 insertions(+), 12 deletions(-) create mode 100644 stardis/opacities/tests/test_broadening.py diff --git a/stardis/opacities/broadening.py b/stardis/opacities/broadening.py index 71abda1b..0fd17a0a 100644 --- a/stardis/opacities/broadening.py +++ b/stardis/opacities/broadening.py @@ -1,19 +1,26 @@ import numpy as np from astropy import constants as const +import math import numba +from numba import cuda +GPUs_available = cuda.is_available() -SPEED_OF_LIGHT = const.c.cgs.value -BOLTZMANN_CONSTANT = const.k_B.cgs.value -PLANCK_CONSTANT = const.h.cgs.value -RYDBERG_ENERGY = (const.h.cgs * const.c.cgs * const.Ryd.cgs).value -ELEMENTARY_CHARGE = const.e.esu.value -BOHR_RADIUS = const.a0.cgs.value -VACUUM_ELECTRIC_PERMITTIVITY = 1 / (4 * np.pi) +if GPUs_available: + import cupy as cp + +PI = float(np.pi) +SPEED_OF_LIGHT = float(const.c.cgs.value) +BOLTZMANN_CONSTANT = float(const.k_B.cgs.value) +PLANCK_CONSTANT = float(const.h.cgs.value) +RYDBERG_ENERGY = float((const.h.cgs * const.c.cgs * const.Ryd.cgs).value) +ELEMENTARY_CHARGE = float(const.e.esu.value) +BOHR_RADIUS = float(const.a0.cgs.value) +VACUUM_ELECTRIC_PERMITTIVITY = 1.0 / (4.0 * PI) @numba.njit -def calc_doppler_width(nu_line, temperature, atomic_mass): +def _calc_doppler_width(nu_line, temperature, atomic_mass): """ Calculates doppler width. https://ui.adsabs.harvard.edu/abs/2003rtsa.book.....R/ @@ -31,15 +38,66 @@ def calc_doppler_width(nu_line, temperature, atomic_mass): ------- float """ + nu_line, temperature, atomic_mass = ( + float(nu_line), + float(temperature), + float(atomic_mass), + ) + return ( nu_line / SPEED_OF_LIGHT - * np.sqrt(2 * BOLTZMANN_CONSTANT * temperature / atomic_mass) + * math.sqrt(2.0 * BOLTZMANN_CONSTANT * temperature / atomic_mass) ) +@numba.vectorize(nopython=True) +def calc_doppler_width(nu_line, temperature, atomic_mass): + return _calc_doppler_width(nu_line, temperature, atomic_mass) + + +@cuda.jit +def _calc_doppler_width_cuda(res, nu_line, temperature, atomic_mass): + tid = cuda.grid(1) + size = len(res) + + if tid < size: + res[tid] = _calc_doppler_width(nu_line[tid], temperature[tid], atomic_mass[tid]) + + +def calc_doppler_width_cuda( + nu_line, + temperature, + atomic_mass, + nthreads=256, + ret_np_ndarray=False, + dtype=float, +): + arg_list = ( + nu_line, + temperature, + atomic_mass, + ) + + shortest_arg_idx = np.argmin(map(len, arg_list)) + size = len(arg_list[shortest_arg_idx]) + + nblocks = 1 + (size // nthreads) + + arg_list = tuple(map(lambda v: cp.array(v, dtype=dtype), arg_list)) + + res = cp.empty_like(arg_list[shortest_arg_idx], dtype=dtype) + + _calc_doppler_width_cuda[nblocks, nthreads]( + res, + *arg_list, + ) + + return cp.asnumpy(res) if ret_np_ndarray else res + + @numba.njit -def calc_n_effective(ion_number, ionization_energy, level_energy): +def _calc_n_effective(ion_number, ionization_energy, level_energy): """ Calculates the effective principal quantum number of an energy level. @@ -56,7 +114,65 @@ def calc_n_effective(ion_number, ionization_energy, level_energy): ------- float """ - return np.sqrt(RYDBERG_ENERGY / (ionization_energy - level_energy)) * ion_number + ion_number, ionization_energy, level_energy = ( + int(ion_number), + float(ionization_energy), + float(level_energy), + ) + return math.sqrt(RYDBERG_ENERGY / (ionization_energy - level_energy)) * ion_number + + +@numba.vectorize(nopython=True) +def calc_n_effective(ion_number, ionization_energy, level_energy): + return _calc_n_effective( + ion_number, + ionization_energy, + level_energy, + ) + + +@cuda.jit +def _calc_n_effective_cuda(res, ion_number, ionization_energy, level_energy): + tid = cuda.grid(1) + size = len(res) + + if tid < size: + res[tid] = _calc_n_effective( + ion_number[tid], + ionization_energy[tid], + level_energy[tid], + ) + + +def calc_n_effective_cuda( + ion_number, + ionization_energy, + level_energy, + nthreads=256, + ret_np_ndarray=False, + dtype=float, +): + arg_list = ( + ion_number, + ionization_energy, + level_energy, + ) + + shortest_arg_idx = np.argmin(map(len, arg_list)) + size = len(arg_list[shortest_arg_idx]) + + nblocks = 1 + (size // nthreads) + + arg_list = tuple(map(lambda v: cp.array(v, dtype=dtype), arg_list)) + + res = cp.empty_like(arg_list[shortest_arg_idx], dtype=dtype) + + _calc_n_effective_cuda[nblocks, nthreads]( + res, + *arg_list, + ) + + return cp.asnumpy(res) if ret_np_ndarray else res @numba.njit @@ -186,7 +302,7 @@ def calc_gamma_van_der_waals( gamma_van_der_waals = ( 17 - * (8 * BOLTZMANN_CONSTANT * temperature / (np.pi * h_mass)) ** 0.3 + * (8 * BOLTZMANN_CONSTANT * temperature / (PI * h_mass)) ** 0.3 * c6**0.4 * h_density ) diff --git a/stardis/opacities/tests/test_broadening.py b/stardis/opacities/tests/test_broadening.py new file mode 100644 index 00000000..be63407c --- /dev/null +++ b/stardis/opacities/tests/test_broadening.py @@ -0,0 +1,238 @@ +import pytest +import numpy as np +from astropy import constants as const +from numba import cuda + +from stardis.opacities.broadening import ( + calc_doppler_width, + _calc_doppler_width_cuda, + calc_doppler_width_cuda, + calc_n_effective, + _calc_n_effective_cuda, + calc_n_effective_cuda, +) + +GPUs_available = cuda.is_available() + +if GPUs_available: + import cupy as cp + + +PI = np.pi +SPEED_OF_LIGHT = const.c.cgs.value +BOLTZMANN_CONSTANT = const.k_B.cgs.value +PLANCK_CONSTANT = const.h.cgs.value +RYDBERG_ENERGY = (const.h.cgs * const.c.cgs * const.Ryd.cgs).value +ELEMENTARY_CHARGE = const.e.esu.value +BOHR_RADIUS = const.a0.cgs.value +VACUUM_ELECTRIC_PERMITTIVITY = 1 / (4 * PI) + + +@pytest.mark.parametrize( + "calc_doppler_width_sample_values_input_nu_line,calc_doppler_width_sample_values_input_temperature,calc_doppler_width_sample_values_input_atomic_mass, calc_doppler_width_sample_values_expected_result", + [ + ( + SPEED_OF_LIGHT, + 0.5, + BOLTZMANN_CONSTANT, + 1.0, + ), + ( + np.array(2 * [SPEED_OF_LIGHT]), + np.array(2 * [0.5]), + np.array(2 * [BOLTZMANN_CONSTANT]), + np.array(2 * [1.0]), + ), + ], +) +def test_calc_doppler_width_sample_values( + calc_doppler_width_sample_values_input_nu_line, + calc_doppler_width_sample_values_input_temperature, + calc_doppler_width_sample_values_input_atomic_mass, + calc_doppler_width_sample_values_expected_result, +): + assert np.allclose( + calc_doppler_width( + calc_doppler_width_sample_values_input_nu_line, + calc_doppler_width_sample_values_input_temperature, + calc_doppler_width_sample_values_input_atomic_mass, + ), + calc_doppler_width_sample_values_expected_result, + ) + + +@pytest.mark.skipif( + not GPUs_available, reason="No GPU is available to test CUDA function" +) +@pytest.mark.parametrize( + "calc_doppler_width_cuda_unwrapped_sample_values_input_nu_line,calc_doppler_width_cuda_unwrapped_sample_values_input_temperature,calc_doppler_width_cuda_unwrapped_sample_values_input_atomic_mass,calc_doppler_width_cuda_unwrapped_sample_values_expected_result", + [ + ( + np.array(2 * [SPEED_OF_LIGHT]), + np.array(2 * [0.5]), + np.array(2 * [BOLTZMANN_CONSTANT]), + np.array(2 * [1.0]), + ), + ], +) +def test_calc_doppler_width_cuda_unwrapped_sample_values( + calc_doppler_width_cuda_unwrapped_sample_values_input_nu_line, + calc_doppler_width_cuda_unwrapped_sample_values_input_temperature, + calc_doppler_width_cuda_unwrapped_sample_values_input_atomic_mass, + calc_doppler_width_cuda_unwrapped_sample_values_expected_result, +): + arg_list = ( + calc_doppler_width_cuda_unwrapped_sample_values_input_nu_line, + calc_doppler_width_cuda_unwrapped_sample_values_input_temperature, + calc_doppler_width_cuda_unwrapped_sample_values_input_atomic_mass, + ) + + arg_list = tuple(map(cp.array, arg_list)) + result_values = cp.empty_like(arg_list[0]) + + nthreads = 256 + length = len(calc_doppler_width_cuda_unwrapped_sample_values_expected_result) + nblocks = 1 + (length // nthreads) + + _calc_doppler_width_cuda[nblocks, nthreads](result_values, *arg_list) + + assert np.allclose( + cp.asnumpy(result_values), + calc_doppler_width_cuda_unwrapped_sample_values_expected_result, + ) + + +@pytest.mark.skipif( + not GPUs_available, reason="No GPU is available to test CUDA function" +) +@pytest.mark.parametrize( + "calc_doppler_width_cuda_sample_values_input_nu_line, calc_doppler_width_cuda_sample_values_input_temperature, calc_doppler_width_cuda_sample_values_input_atomic_mass, calc_doppler_width_cuda_wrapped_sample_cuda_values_expected_result", + [ + ( + np.array(2 * [SPEED_OF_LIGHT]), + np.array(2 * [0.5]), + np.array(2 * [BOLTZMANN_CONSTANT]), + np.array(2 * [1.0]), + ), + ], +) +def test_calc_doppler_width_cuda_wrapped_sample_cuda_values( + calc_doppler_width_cuda_sample_values_input_nu_line, + calc_doppler_width_cuda_sample_values_input_temperature, + calc_doppler_width_cuda_sample_values_input_atomic_mass, + calc_doppler_width_cuda_wrapped_sample_cuda_values_expected_result, +): + arg_list = ( + calc_doppler_width_cuda_sample_values_input_nu_line, + calc_doppler_width_cuda_sample_values_input_temperature, + calc_doppler_width_cuda_sample_values_input_atomic_mass, + ) + assert np.allclose( + calc_doppler_width_cuda(*map(cp.asarray, arg_list)), + calc_doppler_width_cuda_wrapped_sample_cuda_values_expected_result, + ) + + +@pytest.mark.parametrize( + "calc_n_effective_sample_values_input_ion_number,calc_n_effective_sample_values_input_ionization_energy,calc_n_effective_sample_values_input_level_energy, calc_n_effective_sample_values_expected_result", + [ + ( + 1.0, + RYDBERG_ENERGY, + 0, + 1.0, + ), + ( + np.array(2 * [1]), + np.array(2 * [RYDBERG_ENERGY]), + np.array(2 * [0]), + np.array(2 * [1.0]), + ), + ], +) +def test_calc_n_effective_sample_values( + calc_n_effective_sample_values_input_ion_number, + calc_n_effective_sample_values_input_ionization_energy, + calc_n_effective_sample_values_input_level_energy, + calc_n_effective_sample_values_expected_result, +): + assert np.allclose( + calc_n_effective( + calc_n_effective_sample_values_input_ion_number, + calc_n_effective_sample_values_input_ionization_energy, + calc_n_effective_sample_values_input_level_energy, + ), + calc_n_effective_sample_values_expected_result, + ) + + +@pytest.mark.skipif( + not GPUs_available, reason="No GPU is available to test CUDA function" +) +@pytest.mark.parametrize( + "calc_n_effective_cuda_unwrapped_sample_values_input_ion_number,calc_n_effective_cuda_unwrapped_sample_values_input_ionization_energy,calc_n_effective_cuda_unwrapped_sample_values_input_level_energy,calc_n_effective_cuda_unwrapped_sample_values_expected_result", + [ + ( + np.array(2 * [1]), + np.array(2 * [RYDBERG_ENERGY]), + np.array(2 * [0]), + np.array(2 * [1.0]), + ), + ], +) +def test_calc_n_effective_cuda_unwrapped_sample_values( + calc_n_effective_cuda_unwrapped_sample_values_input_ion_number, + calc_n_effective_cuda_unwrapped_sample_values_input_ionization_energy, + calc_n_effective_cuda_unwrapped_sample_values_input_level_energy, + calc_n_effective_cuda_unwrapped_sample_values_expected_result, +): + arg_list = ( + calc_n_effective_cuda_unwrapped_sample_values_input_ion_number, + calc_n_effective_cuda_unwrapped_sample_values_input_ionization_energy, + calc_n_effective_cuda_unwrapped_sample_values_input_level_energy, + ) + + arg_list = tuple(map(cp.array, arg_list)) + result_values = cp.empty_like(arg_list[0]) + + nthreads = 256 + length = len(calc_n_effective_cuda_unwrapped_sample_values_expected_result) + nblocks = 1 + (length // nthreads) + + _calc_n_effective_cuda[nblocks, nthreads](result_values, *arg_list) + + assert np.allclose( + cp.asnumpy(result_values), + calc_n_effective_cuda_unwrapped_sample_values_expected_result, + ) + + +@pytest.mark.skipif( + not GPUs_available, reason="No GPU is available to test CUDA function" +) +@pytest.mark.parametrize( + "calc_n_effective_cuda_sample_values_input_ion_number, calc_n_effective_cuda_sample_values_input_ionization_energy, calc_n_effective_cuda_sample_values_input_level_energy, calc_n_effective_cuda_wrapped_sample_cuda_values_expected_result", + [ + ( + np.array(2 * [1]), + np.array(2 * [RYDBERG_ENERGY]), + np.array(2 * [0]), + np.array(2 * [1.0]), + ), + ], +) +def test_calc_n_effective_cuda_wrapped_sample_cuda_values( + calc_n_effective_cuda_sample_values_input_ion_number, + calc_n_effective_cuda_sample_values_input_ionization_energy, + calc_n_effective_cuda_sample_values_input_level_energy, + calc_n_effective_cuda_wrapped_sample_cuda_values_expected_result, +): + arg_list = ( + calc_n_effective_cuda_sample_values_input_ion_number, + calc_n_effective_cuda_sample_values_input_ionization_energy, + calc_n_effective_cuda_sample_values_input_level_energy, + ) + assert np.allclose( + calc_n_effective_cuda(*map(cp.asarray, arg_list)), + calc_n_effective_cuda_wrapped_sample_cuda_values_expected_result, + )