Skip to content

Commit

Permalink
dev(narugo): save the newest features
Browse files Browse the repository at this point in the history
  • Loading branch information
narugo1992 committed Aug 27, 2024
1 parent 8dbedef commit 88aeda7
Show file tree
Hide file tree
Showing 6 changed files with 180 additions and 11 deletions.
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ pandas
scipy
hfutils>=0.4.1
soundfile>=0.12
fastdtw
fastdtw
librosa>=0.10
8 changes: 4 additions & 4 deletions soundutils/data/sound.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def resample(self, sample_rate) -> 'Sound':
if sample_rate == self._sample_rate:
return self

resampled_length = int(self.samples * (sample_rate / self._sample_rate))
resampled_length = int(round(self.samples * (sample_rate / self._sample_rate)))
resampled_data = signal.resample(self._data, resampled_length)

return Sound(data=resampled_data, sample_rate=sample_rate)
Expand Down Expand Up @@ -141,7 +141,7 @@ def _fmt_time(x, pos):

ax.set_xlabel('Time [hh:mm:ss.mmm]')
ax.set_ylabel('Amplitude')
ax.set_title(f'{title or "Audio Signal"}\n'
f'Channels: {self.channels}, Sample Rate: {self._sample_rate}\n'
f'Time: {self.time:.3f}s ({plural_word(self.samples, "frame")})\n')
ax.set_title(f'{title or "Audio Signal"}, '
f'Channels: {self.channels}, Sample Rate: {self._sample_rate}, '
f'Time: {self.time:.3f}s ({plural_word(self.samples, "frame")})')
ax.legend()
1 change: 1 addition & 0 deletions soundutils/similarity/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from .base import SoundAlignError, SoundLengthNotMatch, SoundResampleRateNotMatch, SoundChannelsNotMatch
from .correlation import sound_pearson_similarity
from .dtw import sound_fastdtw
from .mfcc import sound_mfcc_similarity
from .mse import sound_mse, sound_rmse
from .spectral import sound_spectral_centroid_distance
40 changes: 40 additions & 0 deletions soundutils/similarity/mfcc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from typing import Literal

import librosa
import numpy as np
from scipy.spatial.distance import cosine

from .base import _align_sounds
from ..data import SoundTyping


def sound_mfcc_similarity(
sound1: SoundTyping, sound2: SoundTyping,
n_mfcc: int = 13, mode: Literal['flat', 'mean'] = 'flat',
resample_rate_align: Literal['max', 'min', 'none'] = 'none',
time_align: Literal['none', 'pad', 'prefix', 'resample_max', 'resample_min'] = 'none',
channels_align: Literal['none'] = 'none',
) -> float:
(data1, sr1), (data2, sr2) = _align_sounds(
sound1=sound1,
sound2=sound2,
resample_rate_align=resample_rate_align,
time_align=time_align,
channels_align=channels_align,
)

similarities = []
for ch in range(data1.shape[0]):
mfcc1 = librosa.feature.mfcc(y=data1[ch], sr=sr1, n_mfcc=n_mfcc)
mfcc2 = librosa.feature.mfcc(y=data2[ch], sr=sr2, n_mfcc=n_mfcc)
if mode == 'flat':
mfcc1_feat = mfcc1.flatten()
mfcc2_feat = mfcc2.flatten()
elif mode == 'mean':
mfcc1_feat = np.mean(mfcc1, axis=-1)
mfcc2_feat = np.mean(mfcc2, axis=-1)
else:
raise ValueError(f'Invalid mode for MFCC - {mode!r}.')
similarities.append(1 - cosine(mfcc1_feat, mfcc2_feat))

return np.mean(similarities).item()
18 changes: 12 additions & 6 deletions test/data/test_sound.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import os
import re

import numpy as np
import pytest
import torch
import torchaudio
from hbutils.testing import isolated_directory, tmatrix

from soundutils.data import Sound
from soundutils.similarity import sound_spectral_centroid_distance
from soundutils.similarity import sound_spectral_centroid_distance, sound_mfcc_similarity
from ..testings import get_testfile


Expand Down Expand Up @@ -129,9 +129,15 @@ def test_repr(self, file, regex):
def test_resample(self, file, sample_rate):
sound_file = get_testfile('assets', file)
sound = Sound.open(sound_file)
resampler = torchaudio.transforms.Resample(sound.sample_rate, sample_rate)
data, _ = sound.to_numpy()
r_data = resampler(torch.from_numpy(data).type(torch.float32)).numpy()
expected_new_sound = Sound.from_numpy(r_data, sample_rate)

body, ext = os.path.splitext(file)
expected_sound_file = get_testfile('assets', f'{body}_sr{sample_rate}{ext}')
new_sound = sound.resample(sample_rate)
new_sound.save(expected_sound_file)
assert sound_spectral_centroid_distance(new_sound, expected_sound_file) < 2
assert new_sound.sample_rate == sample_rate
assert sound_mfcc_similarity(
new_sound, expected_new_sound,
time_align='pad',
resample_rate_align='min',
) >= 0.98
121 changes: 121 additions & 0 deletions test/similarity/test_mfcc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
import pytest

from soundutils.similarity import SoundLengthNotMatch, SoundChannelsNotMatch, SoundResampleRateNotMatch, \
sound_mfcc_similarity
from test.testings import get_testfile


@pytest.mark.unittest
class TestSimilarityMFCC:
@pytest.mark.parametrize(['file1', 'file2', 'v'], [
('texas_short.wav', 'texas_short.wav', 1.0),
('texas_short.wav', 'texas_long.wav', SoundLengthNotMatch),
('texas_long.wav', 'texas_long.wav', 1.0),
('texas_long.wav', 'texas_long.flac', 1.0),
('texas_long.wav', 'texas_long.mp3', 0.9987497286333322),
('stereo_sine_wave.wav', 'stereo_sine_wave_44100.wav', SoundResampleRateNotMatch),
('texas_short.wav', 'stereo_sine_wave_44100.wav', SoundChannelsNotMatch),
])
def test_sound_mfcc_similarity(self, file1, file2, v):
file1 = get_testfile('assets', file1)
file2 = get_testfile('assets', file2)
if isinstance(v, type) and issubclass(v, BaseException):
with pytest.raises(v):
_ = sound_mfcc_similarity(file1, file2)
else:
assert sound_mfcc_similarity(file1, file2) == pytest.approx(v)

@pytest.mark.parametrize(['file1', 'file2', 'v'], [
('texas_short.wav', 'texas_short.wav', 1.0),
('texas_short.wav', 'texas_long.wav', 0.8899687300652068),
('texas_long.wav', 'texas_short.wav', 0.8899687300652068),
('texas_long.wav', 'texas_long.wav', 1.0),
('texas_long.wav', 'texas_long.flac', 1.0),
('texas_long.wav', 'texas_long.mp3', 0.9987497286333322),
('stereo_sine_wave.wav', 'stereo_sine_wave_44100.wav', SoundResampleRateNotMatch),
('texas_short.wav', 'stereo_sine_wave_44100.wav', SoundChannelsNotMatch),
])
def test_sound_mfcc_similarity_pad(self, file1, file2, v):
file1 = get_testfile('assets', file1)
file2 = get_testfile('assets', file2)
if isinstance(v, type) and issubclass(v, BaseException):
with pytest.raises(v):
_ = sound_mfcc_similarity(file1, file2, time_align='pad')
else:
assert sound_mfcc_similarity(file1, file2, time_align='pad') == pytest.approx(v)

@pytest.mark.parametrize(['file1', 'file2', 'time_align', 'v'], [
('texas_short.wav', 'texas_short.wav', 'none', 1.0),
('texas_short.wav', 'texas_short.wav', 'pad', 1.0),
('texas_short.wav', 'texas_short.wav', 'prefix', 1.0),
('texas_short.wav', 'texas_short.wav', 'resample_max', 1.0),
('texas_short.wav', 'texas_short.wav', 'resample_min', 1.0),
('texas_short.wav', 'texas_short.wav', 'bullshit', ValueError),
('texas_short.wav', 'texas_long.wav', 'none', SoundLengthNotMatch),
('texas_short.wav', 'texas_long.wav', 'pad', 0.8899687300652068),
('texas_long.wav', 'texas_short.wav', 'pad', 0.8899687300652068),
('texas_short.wav', 'texas_long.wav', 'prefix', 0.8728770796805216),
('texas_short.wav', 'texas_long.wav', 'resample_max', 0.8713947848528786),
('texas_long.wav', 'texas_short.wav', 'resample_max', 0.8713947848528786),
('texas_short.wav', 'texas_long.wav', 'resample_min', 0.6971805796646183),
('texas_long.wav', 'texas_short.wav', 'resample_min', 0.6971805796646183),
('texas_long.wav', 'texas_long.wav', 'none', 1.0),
('texas_long.wav', 'texas_long.flac', 'none', 1.0),
('texas_long.wav', 'texas_long.mp3', 'none', 0.9987497286333322),
])
def test_sound_mfcc_similarity_time_align(self, file1, file2, time_align, v):
file1 = get_testfile('assets', file1)
file2 = get_testfile('assets', file2)
if isinstance(v, type) and issubclass(v, BaseException):
with pytest.raises(v):
_ = sound_mfcc_similarity(file1, file2, time_align=time_align)
else:
assert sound_mfcc_similarity(file1, file2, time_align=time_align) == pytest.approx(v)

@pytest.mark.parametrize(['file1', 'file2', 'sr_align', 'v'], [
('stereo_sine_wave.wav', 'stereo_sine_wave.wav', 'none', 1.0),
('stereo_sine_wave.wav', 'stereo_sine_wave.wav', 'min', 1.0),
('stereo_sine_wave.wav', 'stereo_sine_wave.wav', 'max', 1.0),
('stereo_sine_wave.wav', 'stereo_sine_wave.wav', 'bullshit', ValueError),
('stereo_sine_wave.wav', 'stereo_sine_wave_44100.wav', 'none', SoundResampleRateNotMatch),
('stereo_sine_wave.wav', 'stereo_sine_wave_44100.wav', 'min', 0.9999701859565737),
('stereo_sine_wave.wav', 'stereo_sine_wave_44100.wav', 'max', 0.9999913655328565),
('stereo_sine_wave.wav', 'stereo_sine_wave_3x_40_900.wav', 'none', SoundResampleRateNotMatch),
('stereo_sine_wave.wav', 'stereo_sine_wave_3x_40_900.wav', 'min', 0.9623946880540923),
('stereo_sine_wave.wav', 'stereo_sine_wave_3x_40_900.wav', 'max', 0.9517846942566877),
])
def test_sound_mfcc_similarity_sr_align(self, file1, file2, sr_align, v):
file1 = get_testfile('assets', file1)
file2 = get_testfile('assets', file2)
if isinstance(v, type) and issubclass(v, BaseException):
with pytest.raises(v):
_ = sound_mfcc_similarity(file1, file2, time_align='pad', resample_rate_align=sr_align)
else:
assert sound_mfcc_similarity(file1, file2, time_align='pad', resample_rate_align=sr_align) \
== pytest.approx(v)

@pytest.mark.parametrize(['file1', 'file2', 'mode', 'v'], [
('stereo_sine_wave.wav', 'stereo_sine_wave.wav', 'flat', 1.0),
('stereo_sine_wave.wav', 'stereo_sine_wave.wav', 'mean', 1.0),
('stereo_sine_wave.wav', 'stereo_sine_wave_44100.wav', 'flat', 0.9999701859565737),
('stereo_sine_wave.wav', 'stereo_sine_wave_44100.wav', 'mean', 0.9999993147811599),
('texas_long.wav', 'texas_long.wav', 'flat', 1.0),
('texas_long.wav', 'texas_long.wav', 'mean', 1.0),
('texas_long.wav', 'texas_long_sr8000.wav', 'flat', 0.999767841519668),
('texas_long.wav', 'texas_long_sr16000.wav', 'flat', 0.9998774575084983),
('texas_long.wav', 'texas_long.wav', 'bullshit', ValueError),
])
def test_sound_mfcc_similarity_pad_min(self, file1, file2, mode, v):
file1 = get_testfile('assets', file1)
file2 = get_testfile('assets', file2)
if isinstance(v, type) and issubclass(v, BaseException):
with pytest.raises(v):
_ = sound_mfcc_similarity(file1, file2, mode=mode, time_align='pad', resample_rate_align='min')
else:
assert sound_mfcc_similarity(file1, file2, mode=mode, time_align='pad', resample_rate_align='min') \
== pytest.approx(v)

0 comments on commit 88aeda7

Please sign in to comment.