diff --git a/python/paddle/audio/features/layers.py b/python/paddle/audio/features/layers.py index 5f72d27d854d5..1f578f072b8e5 100644 --- a/python/paddle/audio/features/layers.py +++ b/python/paddle/audio/features/layers.py @@ -31,6 +31,8 @@ 'hamming', 'hann', 'kaiser', + 'bartlett', + 'nuttall', 'gaussian', 'exponential', 'triang', @@ -50,7 +52,7 @@ class Spectrogram(nn.Layer): n_fft (int, optional): The number of frequency components of the discrete Fourier transform. Defaults to 512. hop_length (Optional[int], optional): The hop length of the short time FFT. If `None`, it is set to `win_length//4`. Defaults to None. win_length (Optional[int], optional): The window length of the short time FFT. If `None`, it is set to same as `n_fft`. Defaults to None. - window (str, optional): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'kaiser', 'gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor'. Defaults to 'hann'. + window (str, optional): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor', 'bartlett', 'kaiser', 'nuttall'. Defaults to 'hann'. power (float, optional): Exponent for the magnitude spectrogram. Defaults to 2.0. center (bool, optional): Whether to pad `x` to make that the :math:`t \times hop\\_length` at the center of `t`-th frame. Defaults to True. pad_mode (str, optional): Choose padding pattern when `center` is `True`. Defaults to 'reflect'. @@ -135,7 +137,7 @@ class MelSpectrogram(nn.Layer): n_fft (int, optional): The number of frequency components of the discrete Fourier transform. Defaults to 512. hop_length (Optional[int], optional): The hop length of the short time FFT. If `None`, it is set to `win_length//4`. Defaults to None. win_length (Optional[int], optional): The window length of the short time FFT. If `None`, it is set to same as `n_fft`. Defaults to None. - window (str, optional): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'kaiser', 'gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor'. Defaults to 'hann'. + window (str, optional): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor', 'bartlett', 'kaiser', 'nuttall'. Defaults to 'hann'. power (float, optional): Exponent for the magnitude spectrogram. Defaults to 2.0. center (bool, optional): Whether to pad `x` to make that the :math:`t \times hop\\_length` at the center of `t`-th frame. Defaults to True. pad_mode (str, optional): Choose padding pattern when `center` is `True`. Defaults to 'reflect'. @@ -242,7 +244,7 @@ class LogMelSpectrogram(nn.Layer): n_fft (int, optional): The number of frequency components of the discrete Fourier transform. Defaults to 512. hop_length (Optional[int], optional): The hop length of the short time FFT. If `None`, it is set to `win_length//4`. Defaults to None. win_length (Optional[int], optional): The window length of the short time FFT. If `None`, it is set to same as `n_fft`. Defaults to None. - window (str, optional): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'kaiser', 'gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor'. Defaults to 'hann'. + window (str, optional): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor', 'bartlett', 'kaiser', 'nuttall'. Defaults to 'hann'. power (float, optional): Exponent for the magnitude spectrogram. Defaults to 2.0. center (bool, optional): Whether to pad `x` to make that the :math:`t \times hop\\_length` at the center of `t`-th frame. Defaults to True. pad_mode (str, optional): Choose padding pattern when `center` is `True`. Defaults to 'reflect'. @@ -350,7 +352,7 @@ class MFCC(nn.Layer): n_fft (int, optional): The number of frequency components of the discrete Fourier transform. Defaults to 512. hop_length (Optional[int], optional): The hop length of the short time FFT. If `None`, it is set to `win_length//4`. Defaults to None. win_length (Optional[int], optional): The window length of the short time FFT. If `None`, it is set to same as `n_fft`. Defaults to None. - window (str, optional): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'kaiser', 'gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor'. Defaults to 'hann'. + window (str, optional): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor', 'bartlett', 'kaiser', 'nuttall'. Defaults to 'hann'. power (float, optional): Exponent for the magnitude spectrogram. Defaults to 2.0. center (bool, optional): Whether to pad `x` to make that the :math:`t \times hop\\_length` at the center of `t`-th frame. Defaults to True. pad_mode (str, optional): Choose padding pattern when `center` is `True`. Defaults to 'reflect'. diff --git a/python/paddle/audio/functional/window.py b/python/paddle/audio/functional/window.py index 22197ec192b44..2962cb0fbed0e 100644 --- a/python/paddle/audio/functional/window.py +++ b/python/paddle/audio/functional/window.py @@ -55,6 +55,61 @@ def _cat(x: list[Tensor], data_type: str) -> Tensor: return paddle.concat(l) +@window_function_register.register() +def _bartlett(M: int, sym: bool = True, dtype: str = 'float64') -> Tensor: + """ + Computes the Bartlett window. + This function is consistent with scipy.signal.windows.bartlett(). + """ + if _len_guards(M): + return paddle.ones((M,), dtype=dtype) + M, needs_trunc = _extend(M, sym) + + n = paddle.arange(0, M, dtype=dtype) + M = paddle.to_tensor(M, dtype=dtype) + w = paddle.where( + paddle.less_equal(n, (M - 1) / 2.0), + 2.0 * n / (M - 1), + 2.0 - 2.0 * n / (M - 1), + ) + + return _truncate(w, needs_trunc) + + +@window_function_register.register() +def _kaiser( + M: int, beta: float, sym: bool = True, dtype: str = 'float64' +) -> Tensor: + """Compute the Kaiser window. + This function is consistent with scipy.signal.windows.kaiser(). + """ + if _len_guards(M): + return paddle.ones((M,), dtype=dtype) + M, needs_trunc = _extend(M, sym) + + beta = paddle.to_tensor(beta, dtype=dtype) + + n = paddle.arange(0, M, dtype=dtype) + M = paddle.to_tensor(M, dtype=dtype) + alpha = (M - 1) / 2.0 + w = paddle.i0( + beta * paddle.sqrt(1 - ((n - alpha) / alpha) ** 2.0) + ) / paddle.i0(beta) + + return _truncate(w, needs_trunc) + + +@window_function_register.register() +def _nuttall(M: int, sym: bool = True, dtype: str = 'float64') -> Tensor: + """Nuttall window. + This function is consistent with scipy.signal.windows.nuttall(). + """ + a = paddle.to_tensor( + [0.3635819, 0.4891775, 0.1365995, 0.0106411], dtype=dtype + ) + return _general_cosine(M, a=a, sym=sym, dtype=dtype) + + @window_function_register.register() def _acosh(x: Tensor | float) -> Tensor: if isinstance(x, float): @@ -347,7 +402,7 @@ def get_window( """Return a window of a given length and type. Args: - window (Union[str, Tuple[str, float]]): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'gaussian', 'general_gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor'. + window (Union[str, Tuple[str, float]]): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'gaussian', 'general_gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor', 'bartlett', 'kaiser', 'nuttall'. win_length (int): Number of samples. fftbins (bool, optional): If True, create a "periodic" window. Otherwise, create a "symmetric" window, for use in filter design. Defaults to True. dtype (str, optional): The data type of the return window. Defaults to 'float64'. @@ -364,17 +419,16 @@ def get_window( >>> cosine_window = paddle.audio.functional.get_window('cosine', n_fft) >>> std = 7 - >>> gaussian_window = paddle.audio.functional.get_window(('gaussian',std), n_fft) + >>> gaussian_window = paddle.audio.functional.get_window(('gaussian', std), n_fft) """ sym = not fftbins - args = () if isinstance(window, tuple): winstr = window[0] if len(window) > 1: args = window[1:] elif isinstance(window, str): - if window in ['gaussian', 'exponential']: + if window in ['gaussian', 'exponential', 'kaiser']: raise ValueError( "The '" + window + "' window needs one or " "more parameters -- pass a tuple." @@ -388,7 +442,6 @@ def get_window( winfunc = window_function_register.get('_' + winstr) except KeyError as e: raise ValueError("Unknown window type.") from e - params = (win_length, *args) kwargs = {'sym': sym} return winfunc(*params, dtype=dtype, **kwargs) diff --git a/test/legacy_test/test_audio_functions.py b/test/legacy_test/test_audio_functions.py index bac0828fc49d1..3804ae9dc381a 100644 --- a/test/legacy_test/test_audio_functions.py +++ b/test/legacy_test/test_audio_functions.py @@ -257,6 +257,7 @@ def test_gaussian_window_and_exception(self, n_fft: int): np.testing.assert_array_almost_equal( window_scipy_exp, window_paddle_exp.numpy(), decimal=5 ) + try: window_paddle = paddle.audio.functional.get_window("hann", -1) except ValueError: @@ -290,7 +291,14 @@ def dct(n_filters, n_input): np.testing.assert_array_almost_equal(librosa_dct, paddle_dct, decimal=5) @parameterize( - [128, 256, 512], ["hamming", "hann", "triang", "bohman"], [True, False] + [128, 256, 512], + [ + "hamming", + "hann", + "triang", + "bohman", + ], + [True, False], ) def test_stft_and_spect( self, n_fft: int, window_str: str, center_flag: bool @@ -345,7 +353,14 @@ def test_stft_and_spect( ) @parameterize( - [128, 256, 512], [64, 82], ["hamming", "hann", "triang", "bohman"] + [128, 256, 512], + [64, 82], + [ + "hamming", + "hann", + "triang", + "bohman", + ], ) def test_istft(self, n_fft: int, hop_length: int, window_str: str): if len(self.waveform.shape) == 2: # (C, T) diff --git a/test/legacy_test/test_get_window.py b/test/legacy_test/test_get_window.py new file mode 100644 index 0000000000000..189b45d257458 --- /dev/null +++ b/test/legacy_test/test_get_window.py @@ -0,0 +1,120 @@ +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import itertools +import unittest + +from parameterized import parameterized +from scipy import signal + +import paddle +import paddle.audio +from paddle.base import core + + +def parameterize(*params): + return parameterized.expand(list(itertools.product(*params))) + + +class TestAudioFuncitons(unittest.TestCase): + def setUp(self): + paddle.disable_static( + paddle.CUDAPlace(0) + if core.is_compiled_with_cuda() + else paddle.CPUPlace() + ) + + @parameterize( + [ + "hamming", + "hann", + "triang", + "bohman", + "blackman", + "cosine", + "tukey", + "taylor", + "bartlett", + "nuttall", + ], + [1, 512], + ) + def test_window(self, window_type: str, n_fft: int): + window_scipy = signal.get_window(window_type, n_fft) + window_paddle = paddle.audio.functional.get_window(window_type, n_fft) + window_scipy = paddle.to_tensor(window_scipy, dtype=window_paddle.dtype) + paddle.allclose( + window_scipy, + window_paddle, + atol=0.0001, + rtol=0.0001, + ) + + @parameterize([1, 512]) + def test_window_and_exception(self, n_fft: int): + window_scipy_gaussain = signal.windows.gaussian(n_fft, std=7) + window_paddle_gaussian = paddle.audio.functional.get_window( + ('gaussian', 7), n_fft, False + ) + window_scipy_gaussain = paddle.to_tensor( + window_scipy_gaussain, dtype=window_paddle_gaussian.dtype + ) + paddle.allclose( + window_scipy_gaussain, + window_paddle_gaussian, + atol=0.0001, + rtol=0.0001, + ) + + window_scipy_general_gaussain = signal.windows.general_gaussian( + n_fft, 1, 7 + ) + window_paddle_general_gaussian = paddle.audio.functional.get_window( + ('general_gaussian', 1, 7), n_fft, False + ) + window_scipy_general_gaussain = paddle.to_tensor( + window_scipy_general_gaussain, + dtype=window_paddle_general_gaussian.dtype, + ) + paddle.allclose( + window_scipy_gaussain, + window_paddle_gaussian, + atol=0.0001, + rtol=0.0001, + ) + + window_scipy_exp = signal.windows.exponential(n_fft) + window_paddle_exp = paddle.audio.functional.get_window( + ('exponential', None, 1), n_fft, False + ) + window_scipy_exp = paddle.to_tensor( + window_scipy_exp, dtype=window_paddle_exp.dtype + ) + paddle.allclose( + window_scipy_exp, window_paddle_exp, atol=0.0001, rtol=0.0001 + ) + + window_scipy_kaiser = signal.windows.kaiser(n_fft, beta=14.0) + window_paddle_kaiser = paddle.audio.functional.get_window( + ('kaiser', 14.0), n_fft + ) + window_scipy_kaiser = paddle.to_tensor( + window_scipy_kaiser, dtype=window_paddle_kaiser.dtype + ) + paddle.allclose( + window_scipy_kaiser, window_paddle_kaiser, atol=0.0001, rtol=0.0001 + ) + + +if __name__ == '__main__': + unittest.main()