-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
dev(narugo): save the newest features
- Loading branch information
1 parent
8dbedef
commit 88aeda7
Showing
6 changed files
with
180 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,4 +7,5 @@ pandas | |
scipy | ||
hfutils>=0.4.1 | ||
soundfile>=0.12 | ||
fastdtw | ||
fastdtw | ||
librosa>=0.10 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
from .base import SoundAlignError, SoundLengthNotMatch, SoundResampleRateNotMatch, SoundChannelsNotMatch | ||
from .correlation import sound_pearson_similarity | ||
from .dtw import sound_fastdtw | ||
from .mfcc import sound_mfcc_similarity | ||
from .mse import sound_mse, sound_rmse | ||
from .spectral import sound_spectral_centroid_distance |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
from typing import Literal | ||
|
||
import librosa | ||
import numpy as np | ||
from scipy.spatial.distance import cosine | ||
|
||
from .base import _align_sounds | ||
from ..data import SoundTyping | ||
|
||
|
||
def sound_mfcc_similarity( | ||
sound1: SoundTyping, sound2: SoundTyping, | ||
n_mfcc: int = 13, mode: Literal['flat', 'mean'] = 'flat', | ||
resample_rate_align: Literal['max', 'min', 'none'] = 'none', | ||
time_align: Literal['none', 'pad', 'prefix', 'resample_max', 'resample_min'] = 'none', | ||
channels_align: Literal['none'] = 'none', | ||
) -> float: | ||
(data1, sr1), (data2, sr2) = _align_sounds( | ||
sound1=sound1, | ||
sound2=sound2, | ||
resample_rate_align=resample_rate_align, | ||
time_align=time_align, | ||
channels_align=channels_align, | ||
) | ||
|
||
similarities = [] | ||
for ch in range(data1.shape[0]): | ||
mfcc1 = librosa.feature.mfcc(y=data1[ch], sr=sr1, n_mfcc=n_mfcc) | ||
mfcc2 = librosa.feature.mfcc(y=data2[ch], sr=sr2, n_mfcc=n_mfcc) | ||
if mode == 'flat': | ||
mfcc1_feat = mfcc1.flatten() | ||
mfcc2_feat = mfcc2.flatten() | ||
elif mode == 'mean': | ||
mfcc1_feat = np.mean(mfcc1, axis=-1) | ||
mfcc2_feat = np.mean(mfcc2, axis=-1) | ||
else: | ||
raise ValueError(f'Invalid mode for MFCC - {mode!r}.') | ||
similarities.append(1 - cosine(mfcc1_feat, mfcc2_feat)) | ||
|
||
return np.mean(similarities).item() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
import pytest | ||
|
||
from soundutils.similarity import SoundLengthNotMatch, SoundChannelsNotMatch, SoundResampleRateNotMatch, \ | ||
sound_mfcc_similarity | ||
from test.testings import get_testfile | ||
|
||
|
||
@pytest.mark.unittest | ||
class TestSimilarityMFCC: | ||
@pytest.mark.parametrize(['file1', 'file2', 'v'], [ | ||
('texas_short.wav', 'texas_short.wav', 1.0), | ||
('texas_short.wav', 'texas_long.wav', SoundLengthNotMatch), | ||
('texas_long.wav', 'texas_long.wav', 1.0), | ||
('texas_long.wav', 'texas_long.flac', 1.0), | ||
('texas_long.wav', 'texas_long.mp3', 0.9987497286333322), | ||
('stereo_sine_wave.wav', 'stereo_sine_wave_44100.wav', SoundResampleRateNotMatch), | ||
('texas_short.wav', 'stereo_sine_wave_44100.wav', SoundChannelsNotMatch), | ||
]) | ||
def test_sound_mfcc_similarity(self, file1, file2, v): | ||
file1 = get_testfile('assets', file1) | ||
file2 = get_testfile('assets', file2) | ||
if isinstance(v, type) and issubclass(v, BaseException): | ||
with pytest.raises(v): | ||
_ = sound_mfcc_similarity(file1, file2) | ||
else: | ||
assert sound_mfcc_similarity(file1, file2) == pytest.approx(v) | ||
|
||
@pytest.mark.parametrize(['file1', 'file2', 'v'], [ | ||
('texas_short.wav', 'texas_short.wav', 1.0), | ||
('texas_short.wav', 'texas_long.wav', 0.8899687300652068), | ||
('texas_long.wav', 'texas_short.wav', 0.8899687300652068), | ||
('texas_long.wav', 'texas_long.wav', 1.0), | ||
('texas_long.wav', 'texas_long.flac', 1.0), | ||
('texas_long.wav', 'texas_long.mp3', 0.9987497286333322), | ||
('stereo_sine_wave.wav', 'stereo_sine_wave_44100.wav', SoundResampleRateNotMatch), | ||
('texas_short.wav', 'stereo_sine_wave_44100.wav', SoundChannelsNotMatch), | ||
]) | ||
def test_sound_mfcc_similarity_pad(self, file1, file2, v): | ||
file1 = get_testfile('assets', file1) | ||
file2 = get_testfile('assets', file2) | ||
if isinstance(v, type) and issubclass(v, BaseException): | ||
with pytest.raises(v): | ||
_ = sound_mfcc_similarity(file1, file2, time_align='pad') | ||
else: | ||
assert sound_mfcc_similarity(file1, file2, time_align='pad') == pytest.approx(v) | ||
|
||
@pytest.mark.parametrize(['file1', 'file2', 'time_align', 'v'], [ | ||
('texas_short.wav', 'texas_short.wav', 'none', 1.0), | ||
('texas_short.wav', 'texas_short.wav', 'pad', 1.0), | ||
('texas_short.wav', 'texas_short.wav', 'prefix', 1.0), | ||
('texas_short.wav', 'texas_short.wav', 'resample_max', 1.0), | ||
('texas_short.wav', 'texas_short.wav', 'resample_min', 1.0), | ||
('texas_short.wav', 'texas_short.wav', 'bullshit', ValueError), | ||
('texas_short.wav', 'texas_long.wav', 'none', SoundLengthNotMatch), | ||
('texas_short.wav', 'texas_long.wav', 'pad', 0.8899687300652068), | ||
('texas_long.wav', 'texas_short.wav', 'pad', 0.8899687300652068), | ||
('texas_short.wav', 'texas_long.wav', 'prefix', 0.8728770796805216), | ||
('texas_short.wav', 'texas_long.wav', 'resample_max', 0.8713947848528786), | ||
('texas_long.wav', 'texas_short.wav', 'resample_max', 0.8713947848528786), | ||
('texas_short.wav', 'texas_long.wav', 'resample_min', 0.6971805796646183), | ||
('texas_long.wav', 'texas_short.wav', 'resample_min', 0.6971805796646183), | ||
('texas_long.wav', 'texas_long.wav', 'none', 1.0), | ||
('texas_long.wav', 'texas_long.flac', 'none', 1.0), | ||
('texas_long.wav', 'texas_long.mp3', 'none', 0.9987497286333322), | ||
]) | ||
def test_sound_mfcc_similarity_time_align(self, file1, file2, time_align, v): | ||
file1 = get_testfile('assets', file1) | ||
file2 = get_testfile('assets', file2) | ||
if isinstance(v, type) and issubclass(v, BaseException): | ||
with pytest.raises(v): | ||
_ = sound_mfcc_similarity(file1, file2, time_align=time_align) | ||
else: | ||
assert sound_mfcc_similarity(file1, file2, time_align=time_align) == pytest.approx(v) | ||
|
||
@pytest.mark.parametrize(['file1', 'file2', 'sr_align', 'v'], [ | ||
('stereo_sine_wave.wav', 'stereo_sine_wave.wav', 'none', 1.0), | ||
('stereo_sine_wave.wav', 'stereo_sine_wave.wav', 'min', 1.0), | ||
('stereo_sine_wave.wav', 'stereo_sine_wave.wav', 'max', 1.0), | ||
('stereo_sine_wave.wav', 'stereo_sine_wave.wav', 'bullshit', ValueError), | ||
('stereo_sine_wave.wav', 'stereo_sine_wave_44100.wav', 'none', SoundResampleRateNotMatch), | ||
('stereo_sine_wave.wav', 'stereo_sine_wave_44100.wav', 'min', 0.9999701859565737), | ||
('stereo_sine_wave.wav', 'stereo_sine_wave_44100.wav', 'max', 0.9999913655328565), | ||
('stereo_sine_wave.wav', 'stereo_sine_wave_3x_40_900.wav', 'none', SoundResampleRateNotMatch), | ||
('stereo_sine_wave.wav', 'stereo_sine_wave_3x_40_900.wav', 'min', 0.9623946880540923), | ||
('stereo_sine_wave.wav', 'stereo_sine_wave_3x_40_900.wav', 'max', 0.9517846942566877), | ||
]) | ||
def test_sound_mfcc_similarity_sr_align(self, file1, file2, sr_align, v): | ||
file1 = get_testfile('assets', file1) | ||
file2 = get_testfile('assets', file2) | ||
if isinstance(v, type) and issubclass(v, BaseException): | ||
with pytest.raises(v): | ||
_ = sound_mfcc_similarity(file1, file2, time_align='pad', resample_rate_align=sr_align) | ||
else: | ||
assert sound_mfcc_similarity(file1, file2, time_align='pad', resample_rate_align=sr_align) \ | ||
== pytest.approx(v) | ||
|
||
@pytest.mark.parametrize(['file1', 'file2', 'mode', 'v'], [ | ||
('stereo_sine_wave.wav', 'stereo_sine_wave.wav', 'flat', 1.0), | ||
('stereo_sine_wave.wav', 'stereo_sine_wave.wav', 'mean', 1.0), | ||
('stereo_sine_wave.wav', 'stereo_sine_wave_44100.wav', 'flat', 0.9999701859565737), | ||
('stereo_sine_wave.wav', 'stereo_sine_wave_44100.wav', 'mean', 0.9999993147811599), | ||
('texas_long.wav', 'texas_long.wav', 'flat', 1.0), | ||
('texas_long.wav', 'texas_long.wav', 'mean', 1.0), | ||
('texas_long.wav', 'texas_long_sr8000.wav', 'flat', 0.999767841519668), | ||
('texas_long.wav', 'texas_long_sr16000.wav', 'flat', 0.9998774575084983), | ||
('texas_long.wav', 'texas_long.wav', 'bullshit', ValueError), | ||
]) | ||
def test_sound_mfcc_similarity_pad_min(self, file1, file2, mode, v): | ||
file1 = get_testfile('assets', file1) | ||
file2 = get_testfile('assets', file2) | ||
if isinstance(v, type) and issubclass(v, BaseException): | ||
with pytest.raises(v): | ||
_ = sound_mfcc_similarity(file1, file2, mode=mode, time_align='pad', resample_rate_align='min') | ||
else: | ||
assert sound_mfcc_similarity(file1, file2, mode=mode, time_align='pad', resample_rate_align='min') \ | ||
== pytest.approx(v) |