-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
dev(narugo): add unittest for sp metrics
- Loading branch information
1 parent
044f77a
commit cabdc04
Showing
10 changed files
with
250 additions
and
80 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
from .base import SoundAlignError, SoundLengthNotMatch, SoundResampleRateNotMatch, SoundChannelsNotMatch | ||
from .correlation import sound_correlation | ||
from .cosine import sound_cosine_similarity | ||
from .euclidean import sound_euclidean | ||
from .correlation import sound_pearson_similarity | ||
from .dtw import sound_fastdtw | ||
from .mse import sound_mse, sound_rmse | ||
from .spectral import sound_spectral_centroid_distance |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
19 changes: 10 additions & 9 deletions
19
soundutils/similarity/euclidean.py → soundutils/similarity/dtw.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,27 +1,28 @@ | ||
from typing import Literal | ||
|
||
import numpy as np | ||
from fastdtw import fastdtw | ||
from scipy.spatial.distance import euclidean | ||
|
||
from .base import _align_sounds | ||
from ..data import SoundTyping | ||
|
||
|
||
def sound_euclidean( | ||
def sound_fastdtw( | ||
sound1: SoundTyping, sound2: SoundTyping, | ||
resample_rate_align: Literal['max', 'min', 'none'] = 'none', | ||
time_align: Literal['pad', 'resample', 'none'] = 'none', | ||
time_align: Literal['noncheck', 'pad', 'prefix', 'resample_max', 'resample_min'] = 'noncheck', | ||
channels_align: Literal['none'] = 'none', | ||
radius: int = 1, | ||
) -> float: | ||
data1, data2 = _align_sounds( | ||
(data1, sr1), (data2, sr2) = _align_sounds( | ||
sound1=sound1, | ||
sound2=sound2, | ||
resample_rate_align=resample_rate_align, | ||
time_align=time_align, | ||
channels_align=channels_align, | ||
) | ||
|
||
# Euclidean distance | ||
euclidean_distance = np.mean( | ||
[euclidean(data1[i, :], data2[i, :]) for i in range(data1.shape[0])]) | ||
return euclidean_distance.item() | ||
return fastdtw( | ||
data1.T, data2.T, | ||
radius=radius, | ||
dist=euclidean, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
from typing import Literal | ||
|
||
import numpy as np | ||
|
||
from .base import _align_sounds | ||
from ..data import SoundTyping | ||
|
||
|
||
def sound_mse( | ||
sound1: SoundTyping, sound2: SoundTyping, | ||
resample_rate_align: Literal['max', 'min', 'none'] = 'none', | ||
time_align: Literal['none', 'pad', 'prefix', 'resample_max', 'resample_min'] = 'none', | ||
channels_align: Literal['none'] = 'none', | ||
p: float = 2.0 | ||
) -> float: | ||
(data1, sr1), (data2, sr2) = _align_sounds( | ||
sound1=sound1, | ||
sound2=sound2, | ||
resample_rate_align=resample_rate_align, | ||
time_align=time_align, | ||
channels_align=channels_align, | ||
) | ||
|
||
return np.mean((data1 - data2) ** p).item() | ||
|
||
|
||
def sound_rmse( | ||
sound1: SoundTyping, sound2: SoundTyping, | ||
resample_rate_align: Literal['max', 'min', 'none'] = 'none', | ||
time_align: Literal['none', 'pad', 'prefix', 'resample_max', 'resample_min'] = 'none', | ||
channels_align: Literal['none'] = 'none', | ||
p: float = 2.0, | ||
) -> float: | ||
(data1, sr1), (data2, sr2) = _align_sounds( | ||
sound1=sound1, | ||
sound2=sound2, | ||
resample_rate_align=resample_rate_align, | ||
time_align=time_align, | ||
channels_align=channels_align, | ||
) | ||
return (np.mean((data1 - data2) ** p) ** (1.0 / p)).item() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
from typing import Literal, Optional | ||
|
||
import numpy as np | ||
from scipy.signal import spectrogram | ||
|
||
from .base import _align_sounds | ||
from ..data import SoundTyping | ||
|
||
|
||
def sound_spectral_centroid_distance( | ||
sound1: SoundTyping, sound2: SoundTyping, | ||
resample_rate_align: Literal['max', 'min', 'none'] = 'none', | ||
time_align: Literal['none', 'pad', 'prefix', 'resample_max', 'resample_min'] = 'none', | ||
channels_align: Literal['none'] = 'none', | ||
eps: Optional[float] = None | ||
) -> float: | ||
(data1, sr1), (data2, sr2) = _align_sounds( | ||
sound1=sound1, | ||
sound2=sound2, | ||
resample_rate_align=resample_rate_align, | ||
time_align=time_align, | ||
channels_align=channels_align, | ||
) | ||
|
||
assert sr1 == sr2, 'Sample rate not match and not aligned, this must be a bug.' | ||
sr = sr1 | ||
|
||
channels = data1.shape[0] | ||
distances = [] | ||
eps = eps if eps is not None else np.finfo(data1.dtype).eps | ||
for ch in range(channels): | ||
_, _, Sxx1 = spectrogram(data1[ch], sr) | ||
_, _, Sxx2 = spectrogram(data2[ch], sr) | ||
|
||
Sxx1 += eps | ||
Sxx2 += eps | ||
|
||
centroid1 = np.sum(Sxx1 * np.arange(Sxx1.shape[0])[:, np.newaxis], axis=0) / np.sum(Sxx1, axis=0) | ||
centroid2 = np.sum(Sxx2 * np.arange(Sxx2.shape[0])[:, np.newaxis], axis=0) / np.sum(Sxx2, axis=0) | ||
|
||
distances.append(np.mean(np.abs(centroid1 - centroid2))) | ||
|
||
return np.mean(distances).item() |
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.