From 6da1890b483f36fcf42fd2267be872d653e27c51 Mon Sep 17 00:00:00 2001 From: Justin Salamon Date: Wed, 23 Sep 2020 19:11:37 -0700 Subject: [PATCH 01/25] bump version to 1.6.3 --- scaper/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scaper/version.py b/scaper/version.py index babdf3f..c65a965 100644 --- a/scaper/version.py +++ b/scaper/version.py @@ -3,4 +3,4 @@ """Version info""" short_version = '1.6' -version = '1.6.2' +version = '1.6.3' From 05261f84f2b8ac55c14c67b6ccb942f2be67e698 Mon Sep 17 00:00:00 2001 From: Justin Salamon Date: Wed, 23 Sep 2020 19:39:30 -0700 Subject: [PATCH 02/25] Remove unused imports and clean up formatting --- scaper/audio.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/scaper/audio.py b/scaper/audio.py index 8c3dafa..aa8e4d8 100644 --- a/scaper/audio.py +++ b/scaper/audio.py @@ -1,18 +1,15 @@ # CREATED: 4/23/17 15:37 by Justin Salamon -''' +""" Utility functions for audio processing using FFMPEG (beyond sox). Based on: https://github.com/mathos/neg23/ -''' +""" -import subprocess -import sox import numpy as np import pyloudnorm import soundfile -import tempfile from .scaper_exceptions import ScaperError -from .util import _close_temp_files + def get_integrated_lufs(audio_array, samplerate, min_duration=0.5, filter_class='K-weighting', block_size=0.400): @@ -104,5 +101,5 @@ def match_sample_length(audio_path, duration_in_samples): audio = np.pad(audio, pad_width, 'constant') - soundfile.write(audio_path, audio, sr, - subtype=audio_info.subtype, format=audio_info.format) + soundfile.write(audio_path, audio, sr, + subtype=audio_info.subtype, format=audio_info.format) From 2400ef70e50db83f876e036b623cf690d44c3a06 Mon Sep 17 00:00:00 2001 From: Justin Salamon Date: Wed, 23 Sep 2020 22:31:20 -0700 Subject: [PATCH 03/25] Implement peak normalize --- scaper/audio.py | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/scaper/audio.py b/scaper/audio.py index aa8e4d8..0740b53 100644 --- a/scaper/audio.py +++ b/scaper/audio.py @@ -103,3 +103,40 @@ def match_sample_length(audio_path, duration_in_samples): soundfile.write(audio_path, audio, sr, subtype=audio_info.subtype, format=audio_info.format) + + +def peak_normalize(soundscape_audio, event_audio_list): + """ + Compute the scale factor required to peak normalize the audio such that + max(abs(soundscape_audio)) = 1. + + Parameters + ---------- + soundscape_audio : np.ndarray + The soudnscape audio. + event_audio_list : list + List of np.ndarrays containing the audio samples of each isolated + foreground event. + + Returns + ------- + scaled_soundscape_audio : np.ndarray + The peak normalized soundscape audio. + scaled_event_audio_list : list + List of np.ndarrays containing the scaled audio samples of + each isolated foreground event. All events are scaled by scale_factor. + scale_factor : float + The scale factor used to peak normalize the soundscape audio. + """ + eps = 1e-10 + max_sample = np.max(np.abs(soundscape_audio)) + scale_factor = 1.0 / (max_sample + eps) + + # scale the event audio and the soundscape audio: + scaled_soundscape_audio = soundscape_audio * scale_factor + + scaled_event_audio_list = [] + for event_audio in event_audio_list: + scaled_event_audio_list.append(event_audio * scale_factor) + + return scaled_soundscape_audio, scaled_event_audio_list, scale_factor From 68be28067915073e7251529e2dc97e415ae9946d Mon Sep 17 00:00:00 2001 From: Justin Salamon Date: Wed, 23 Sep 2020 22:31:54 -0700 Subject: [PATCH 04/25] First pass at fix_clipping and peak_normalization, tests failing --- scaper/core.py | 144 +++++++++++++++++++++++++++++++++++++------------ 1 file changed, 111 insertions(+), 33 deletions(-) diff --git a/scaper/core.py b/scaper/core.py index 7decea7..3a42380 100644 --- a/scaper/core.py +++ b/scaper/core.py @@ -30,6 +30,7 @@ from .util import polyphony_gini from .util import is_real_number, is_real_array from .audio import get_integrated_lufs +from .audio import peak_normalize from .version import version as scaper_version SUPPORTED_DIST = {"const": _sample_const, @@ -214,7 +215,7 @@ def generate_from_jams(jams_infile, # Cast ann.sandbox.scaper to a Sandbox object ann.sandbox.scaper = jams.Sandbox(**ann.sandbox.scaper) - soundscape_audio, event_audio_list = \ + soundscape_audio, event_audio_list, scale_factor = \ sc._generate_audio(audio_outfile, ann, reverb=reverb, save_isolated_events=save_isolated_events, isolated_events_path=isolated_events_path, @@ -1696,8 +1697,14 @@ def _instantiate(self, allow_repeated_label=True, # Return return jam - def _generate_audio(self, audio_path, ann, reverb=None, - save_isolated_events=False, isolated_events_path=None, + def _generate_audio(self, + audio_path, + ann, + reverb=None, + fix_clipping=False, + peak_normalization=False, + save_isolated_events=False, + isolated_events_path=None, disable_sox_warnings=True): ''' Generate audio based on a scaper annotation and save to disk. @@ -1713,6 +1720,19 @@ def _generate_audio(self, audio_path, ann, reverb=None, (no reverberation) and 1 (maximum reverberation). Use None (default) to prevent the soundscape from going through the reverb module at all. + fix_clipping: bool + When True (default=False), checks the soundscape audio for clipping + (abs(sample) > 1). If so, the soundscape waveform is peak normalized, + i.e., scaled such that max(abs(soundscape_audio)) = 1. The audio for + each isolated event is also scaled accordingly. Note: this will change + the actual value of `ref_db` in the generated audio. The scaling + factor that was used is returned. + peak_normalization : bool + When True (default=False), normalize the generated soundscape audio + such that max(abs(soundscape_audio)) = 1. The audio for + each isolated event is also scaled accordingly. Note: this will change + the actual value of `ref_db` in the generated audio. The scaling + factor that was used is returned. save_isolated_events : bool If True, this will save the isolated foreground events and backgrounds in a directory adjacent to the generated soundscape @@ -1743,6 +1763,11 @@ def _generate_audio(self, audio_path, ann, reverb=None, in the same order in which they appear in the jams annotations data list, and can be matched with: `for obs, event_audio in zip(ann.data, event_audio_list): ...`. + scale_factor : float + If peak_normalization is True, or fix_clipping is True and the + soundscape audio needs to be scaled to avoid clipping, scale_factor + is the value used to scale the soundscape audio and the audio of the + isolated events. None otherwise. Raises ------ @@ -1768,6 +1793,7 @@ def _generate_audio(self, audio_path, ann, reverb=None, # List for storing all generated audio (one array for every event) soundscape_audio = None event_audio_list = [] + scale_factor = None with _set_temp_logging_level(temp_logging_level): @@ -1913,6 +1939,58 @@ def _generate_audio(self, audio_path, ann, reverb=None, 'Unsupported event role: {:s}'.format( e.value['role'])) + # Finally combine all the files and optionally apply reverb. + # If there are no events, throw a warning. + if len(event_audio_list) == 0: + warnings.warn( + "No events to synthesize (silent soundscape), no audio " + "generated.", ScaperWarning) + else: + tfm = sox.Transformer() + if reverb is not None: + tfm.reverb(reverberance=reverb * 100) + + # Sum all events to get soundscape audio + soundscape_audio = sum(event_audio_list) + + # Check for clipping and fix [optional] + max_sample = np.max(np.abs(soundscape_audio)) + clipping = max_sample > 1 + if clipping: + warnings.warn('Soundscape audio is clipping!', + ScaperWarning) + + if peak_normalization or (clipping and fix_clipping): + + # normalize soundscape audio and scale event audio + soundscape_audio, event_audio_list, scale_factor = \ + peak_normalize(soundscape_audio, event_audio_list) + + warnings.warn( + 'Peak normalization applied (scale factor = {})'.format( + scale_factor), + ScaperWarning) + + if scale_factor < 0.05: + warnings.warn( + 'Scale factor for peak normalization is extreme ' + '(<0.05), actual event SNR values in the soundscape ' + 'audio may not match their specified values.' + ) + + # Apply effects and reshape + soundscape_audio = tfm.build_array( + input_array=soundscape_audio, + sample_rate_in=self.sr, + ) + soundscape_audio = soundscape_audio.reshape(-1, self.n_channels) + + # Optionally save soundscape audio to disk + if audio_path is not None: + soundfile.write(audio_path, soundscape_audio, self.sr, + subtype='PCM_32') + + # Optionally save isolated events to disk if save_isolated_events: base, ext = os.path.splitext(audio_path) if isolated_events_path is None: @@ -1922,20 +2000,20 @@ def _generate_audio(self, audio_path, ann, reverb=None, _role_count = role_counter[e.value['role']] event_audio_path = os.path.join( - event_folder, + event_folder, '{:s}{:d}_{:s}{:s}'.format( e.value['role'], _role_count, e.value['label'], ext)) role_counter[e.value['role']] += 1 - + if not os.path.exists(event_folder): - # In Python 3.2 and above we could do + # In Python 3.2 and above we could do # os.makedirs(..., exist_ok=True) but we test back to # Python 2.7. os.makedirs(event_folder) soundfile.write(event_audio_path, event_audio_list[-1], self.sr, subtype='PCM_32') isolated_events_audio_path.append(event_audio_path) - #TODO what do we do in this case? for now throw a warning + # TODO what do we do in this case? for now throw a warning if reverb is not None: warnings.warn( "Reverb is on and save_isolated_events is True. Reverberation " @@ -1944,34 +2022,12 @@ def _generate_audio(self, audio_path, ann, reverb=None, "audio of the isolated events will not add up to the " "mixture", ScaperWarning) - # Finally combine all the files and optionally apply reverb. - # If there are no events, throw a warning. - if len(event_audio_list) == 0: - warnings.warn( - "No events to synthesize (silent soundscape), no audio " - "generated.", ScaperWarning) - else: - tfm = sox.Transformer() - if reverb is not None: - tfm.reverb(reverberance=reverb * 100) - # TODO: do we want to normalize the final output? - soundscape_audio = sum(event_audio_list) - soundscape_audio = tfm.build_array( - input_array=soundscape_audio, - sample_rate_in=self.sr, - ) - soundscape_audio = soundscape_audio.reshape(-1, self.n_channels) - - # Save to disk if output path provided - if audio_path is not None: - soundfile.write(audio_path, soundscape_audio, self.sr, subtype='PCM_32') - # Document output paths ann.sandbox.scaper.soundscape_audio_path = audio_path ann.sandbox.scaper.isolated_events_audio_path = isolated_events_audio_path # Return audio for in-memory processing - return soundscape_audio, event_audio_list + return soundscape_audio, event_audio_list, scale_factor def generate(self, audio_path=None, @@ -1979,6 +2035,8 @@ def generate(self, allow_repeated_label=True, allow_repeated_source=True, reverb=None, + fix_clipping=False, + peak_normalization=False, save_isolated_events=False, isolated_events_path=None, disable_sox_warnings=True, @@ -2014,6 +2072,23 @@ def generate(self, (no reverberation) and 1 (maximum reverberation). Use None (default) to prevent the soundscape from going through the reverb module at all. + fix_clipping: bool + When True (default=False), checks the soundscape audio for clipping + (abs(sample) > 1). If so, the soundscape waveform is peak normalized, + i.e., scaled such that max(abs(soundscape_audio)) = 1. The audio for + each isolated event is also scaled accordingly. Note: this will change + the actual value of `ref_db` in the generated audio. The updated + `ref_db` value will be stored in the JAMS annotation. The SNR of + foreground events with respect to the background is unaffected except + when extreme scaling is required to prevent clipping. + peak_normalization : bool + When True (default=False), normalize the generated soundscape audio + such that max(abs(soundscape_audio)) = 1. The audio for + each isolated event is also scaled accordingly. Note: this will change + the actual value of `ref_db` in the generated audio. The updated + `ref_db` value will be stored in the JAMS annotation. The SNR of + foreground events with respect to the background is unaffected except + when extreme scaling is required to achieve peak normalization. save_isolated_events : bool If True, this will save the isolated foreground events and backgrounds in a directory adjacent to the generated soundscape @@ -2104,11 +2179,14 @@ def generate(self, # Generate the audio and save to disk if not no_audio: - soundscape_audio, event_audio_list = \ - self._generate_audio(audio_path, ann, reverb=reverb, + soundscape_audio, event_audio_list, scale_factor = \ + self._generate_audio(audio_path, ann, + reverb=reverb, save_isolated_events=save_isolated_events, isolated_events_path=isolated_events_path, - disable_sox_warnings=disable_sox_warnings) + disable_sox_warnings=disable_sox_warnings, + fix_clipping=fix_clipping, + peak_normalization=peak_normalization) # Save JAMS to disk too if jams_path is not None: From c4b8f267a342bb4a57730b216de28f713060e882 Mon Sep 17 00:00:00 2001 From: Justin Salamon Date: Wed, 23 Sep 2020 22:51:53 -0700 Subject: [PATCH 05/25] fix bug in code for saving isolated events audio --- scaper/core.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/scaper/core.py b/scaper/core.py index 3a42380..5185319 100644 --- a/scaper/core.py +++ b/scaper/core.py @@ -1975,7 +1975,8 @@ def _generate_audio(self, warnings.warn( 'Scale factor for peak normalization is extreme ' '(<0.05), actual event SNR values in the soundscape ' - 'audio may not match their specified values.' + 'audio may not match their specified values.', + ScaperWarning ) # Apply effects and reshape @@ -1998,20 +1999,24 @@ def _generate_audio(self, else: event_folder = isolated_events_path - _role_count = role_counter[e.value['role']] - event_audio_path = os.path.join( - event_folder, - '{:s}{:d}_{:s}{:s}'.format( - e.value['role'], _role_count, e.value['label'], ext)) - role_counter[e.value['role']] += 1 - if not os.path.exists(event_folder): # In Python 3.2 and above we could do # os.makedirs(..., exist_ok=True) but we test back to # Python 2.7. os.makedirs(event_folder) - soundfile.write(event_audio_path, event_audio_list[-1], self.sr, subtype='PCM_32') - isolated_events_audio_path.append(event_audio_path) + + iso_idx = 0 + for i, e in enumerate(ann.data): + _role_count = role_counter[e.value['role']] + event_audio_path = os.path.join( + event_folder, + '{:s}{:d}_{:s}{:s}'.format( + e.value['role'], _role_count, e.value['label'], ext)) + role_counter[e.value['role']] += 1 + + soundfile.write(event_audio_path, event_audio_list[iso_idx], self.sr, subtype='PCM_32') + isolated_events_audio_path.append(event_audio_path) + iso_idx += 1 # TODO what do we do in this case? for now throw a warning if reverb is not None: From 9ae18af219a0c7c654cfcf90ca9c363cbddca271 Mon Sep 17 00:00:00 2001 From: Justin Salamon Date: Thu, 24 Sep 2020 16:21:04 -0700 Subject: [PATCH 06/25] test peak_normalize --- scaper/audio.py | 5 --- tests/test_audio.py | 75 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 5 deletions(-) diff --git a/scaper/audio.py b/scaper/audio.py index 0740b53..59b03eb 100644 --- a/scaper/audio.py +++ b/scaper/audio.py @@ -1,10 +1,5 @@ # CREATED: 4/23/17 15:37 by Justin Salamon -""" -Utility functions for audio processing using FFMPEG (beyond sox). Based on: -https://github.com/mathos/neg23/ -""" - import numpy as np import pyloudnorm import soundfile diff --git a/tests/test_audio.py b/tests/test_audio.py index 9aad29e..9633c09 100644 --- a/tests/test_audio.py +++ b/tests/test_audio.py @@ -1,8 +1,10 @@ # CREATED: 5/5/17 14:36 by Justin Salamon from scaper.audio import get_integrated_lufs, match_sample_length +from scaper.audio import peak_normalize from scaper.util import _close_temp_files import numpy as np +import scipy.signal as sg import os import pytest from scaper.scaper_exceptions import ScaperError @@ -26,6 +28,7 @@ HUMANVOICE_LUFS_I = -20.061513106500225 DOGBARK_LUFS_I = -11.1952428800271 # for x4 concatenated file + def test_get_integrated_lufs(): # test correct functionality audiofiles = [SIREN_FILE, CARHORN_FILE, HUMANVOICE_FILE, DOGBARK_FILE] @@ -36,6 +39,7 @@ def test_get_integrated_lufs(): i = get_integrated_lufs(audio, sr) assert np.allclose(i, li) + def change_format_and_subtype(audio_path): audio, sr = sf.read(audio_path) audio_info = sf.info(audio_path) @@ -54,6 +58,7 @@ def change_format_and_subtype(audio_path): sf.write(audio_path, audio, sr, subtype=_subtype, format=_format) + def test_match_sample_length(): durations_to_match = [1, 2, 5, 7, 22500, 44100, 88200, 100001] invalid_durations_to_match = [0, -1, .5, 1.0] @@ -95,3 +100,73 @@ def test_match_sample_length(): for _duration in invalid_durations_to_match: pytest.raises(ScaperError, match_sample_length, carhorn.name, _duration) pytest.raises(ScaperError, match_sample_length, siren.name, _duration) + + +def test_peak_normalize(): + + def sine(x, f, sr, A): + return A * np.sin(2 * np.pi * f * x / sr) + + def square(x,f, sr, A): + return A * sg.square(2 * np.pi * f * x / sr) + + def saw(x, f, sr, A): + return A * sg.sawtooth(2 * np.pi * f * x / sr) + + samplerates = [16000, 44100] + frequencies = [50, 100, 500, 1000, 5000, 10000, 15000, 20000] + amplitudes = [0.1, 0.5, 1.0, 1.5, 2.0] + event_factors = [0.5, 0.8] + eps = 1e-10 + + # test with toy data + for waveform in [sine, square, saw]: + for sr in samplerates: + for f in frequencies: + for A in amplitudes: + + n_samples = sr + x = np.arange(n_samples) + audio = waveform(x, f, sr, A) + max_sample = np.max(np.abs(audio)) + estimated_scale_factor = 1.0 / (A + eps) + print("\nsr: {}, f: {}, A: {}".format(sr, f, A)) + print("max sample audio: {}".format(max_sample)) + print("estimated scale_factor: {}".format(estimated_scale_factor)) + + event_audio_list = [] + for factor in event_factors: + event_audio_list.append(waveform(x, f, sr, A * factor)) + + audio_norm, event_audio_list_norm, scale_factor = \ + peak_normalize(audio, event_audio_list) + + print("allclose on factors: {}".format(np.allclose( + scale_factor, estimated_scale_factor))) + + print("actual scale factor: {}".format(scale_factor)) + + # test scale factor + max_sample_audio = np.max(np.abs(audio_norm)) + print("max sample audio norm: {}".format(max_sample_audio)) + assert np.allclose(scale_factor, estimated_scale_factor, + atol=1e-03) + + # test soundscape audio + assert max_sample_audio < 1.0 + assert np.allclose(max_sample_audio, 1.0) + + # test event audio + for event_audio, factor in zip(event_audio_list_norm, + event_factors): + + max_sample_event = np.max(np.abs(event_audio)) + + if not np.allclose( + max_sample_event, A * factor * scale_factor): + print(max_sample_audio, max_sample_event, + A * factor * scale_factor) + + assert np.allclose(max_sample_event, + A * factor * scale_factor, + atol=1e-3) From 627f7cf3bc946ad866eb58f10adff393220052d4 Mon Sep 17 00:00:00 2001 From: Justin Salamon Date: Thu, 24 Sep 2020 16:24:17 -0700 Subject: [PATCH 07/25] Warn user when ref_db changes due to clipping prevention --- scaper/core.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/scaper/core.py b/scaper/core.py index 5185319..2c70e90 100644 --- a/scaper/core.py +++ b/scaper/core.py @@ -1800,8 +1800,6 @@ def _generate_audio(self, isolated_events_audio_path = [] duration_in_samples = int(self.duration * self.sr) - role_counter = {'background': 0, 'foreground': 0} - for i, e in enumerate(ann.data): if e.value['role'] == 'background': # Concatenate background if necessary. @@ -1966,16 +1964,23 @@ def _generate_audio(self, soundscape_audio, event_audio_list, scale_factor = \ peak_normalize(soundscape_audio, event_audio_list) - warnings.warn( - 'Peak normalization applied (scale factor = {})'.format( - scale_factor), - ScaperWarning) + ref_db_drop = 20 * np.log10(scale_factor) + + if clipping and fix_clipping: + warnings.warn( + 'Peak normalization applied to fix clipping with ' + 'scale factor = {}. The actual ref_db of the ' + 'generated soundscape audio will be lower by ' + 'approximately {:.2f}dB with respect to the target ' + 'ref_db of {})'.format( + scale_factor, ref_db_drop, self.ref_db), + ScaperWarning) if scale_factor < 0.05: warnings.warn( 'Scale factor for peak normalization is extreme ' - '(<0.05), actual event SNR values in the soundscape ' - 'audio may not match their specified values.', + '(<0.05), event SNR values in the generated soundscape ' + 'audio may not perfectly match their specified values.', ScaperWarning ) @@ -2006,6 +2011,7 @@ def _generate_audio(self, os.makedirs(event_folder) iso_idx = 0 + role_counter = {'background': 0, 'foreground': 0} for i, e in enumerate(ann.data): _role_count = role_counter[e.value['role']] event_audio_path = os.path.join( From 8de1713558404b01f72c945bb93780fc28d5f3bf Mon Sep 17 00:00:00 2001 From: Justin Salamon Date: Thu, 24 Sep 2020 18:03:52 -0700 Subject: [PATCH 08/25] Return ref_db_change --- scaper/core.py | 63 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 55 insertions(+), 8 deletions(-) diff --git a/scaper/core.py b/scaper/core.py index 2c70e90..ffb09d2 100644 --- a/scaper/core.py +++ b/scaper/core.py @@ -215,7 +215,9 @@ def generate_from_jams(jams_infile, # Cast ann.sandbox.scaper to a Sandbox object ann.sandbox.scaper = jams.Sandbox(**ann.sandbox.scaper) - soundscape_audio, event_audio_list, scale_factor = \ + + # Generate audio + soundscape_audio, event_audio_list, scale_factor, ref_db_change = \ sc._generate_audio(audio_outfile, ann, reverb=reverb, save_isolated_events=save_isolated_events, isolated_events_path=isolated_events_path, @@ -1687,6 +1689,21 @@ def _instantiate(self, allow_repeated_label=True, scaper_version=scaper_version, soundscape_audio_path=None, isolated_events_audio_path=[]) + # # Adding generate parameters that aren't already stored: + # audio_path=None, + # jams_path=None, + # fix_clipping=None, + # peak_normalization=None, + # save_isolated_events=None, + # isolated_events_path=None, + # disable_sox_warnings=disable_instantiation_warnings, + # no_audio=None, + # txt_path=None, + # txt_sep=None, + # disable_instantiation_warnings=disable_instantiation_warnings, + # peak_normalization_scale_factor=None, + # ref_db_change=None, + # ref_db_generated=None) # Add annotation to jams jam.annotations.append(ann) @@ -1767,7 +1784,11 @@ def _generate_audio(self, If peak_normalization is True, or fix_clipping is True and the soundscape audio needs to be scaled to avoid clipping, scale_factor is the value used to scale the soundscape audio and the audio of the - isolated events. None otherwise. + isolated events. Otherwise will return 1.0. + ref_db_change : float + The change (in dB) to the soundscape audio's ref_db if peak + normalization is applied to fix clipping or because the user + specified it. Otherwise will return 0. Raises ------ @@ -1793,7 +1814,8 @@ def _generate_audio(self, # List for storing all generated audio (one array for every event) soundscape_audio = None event_audio_list = [] - scale_factor = None + scale_factor = 1.0 + ref_db_change = 0 with _set_temp_logging_level(temp_logging_level): @@ -1964,16 +1986,16 @@ def _generate_audio(self, soundscape_audio, event_audio_list, scale_factor = \ peak_normalize(soundscape_audio, event_audio_list) - ref_db_drop = 20 * np.log10(scale_factor) + ref_db_change = 20 * np.log10(scale_factor) if clipping and fix_clipping: warnings.warn( 'Peak normalization applied to fix clipping with ' 'scale factor = {}. The actual ref_db of the ' - 'generated soundscape audio will be lower by ' + 'generated soundscape audio will change by ' 'approximately {:.2f}dB with respect to the target ' 'ref_db of {})'.format( - scale_factor, ref_db_drop, self.ref_db), + scale_factor, ref_db_change, self.ref_db), ScaperWarning) if scale_factor < 0.05: @@ -2034,11 +2056,14 @@ def _generate_audio(self, "mixture", ScaperWarning) # Document output paths + # TODO: this is redundant with data stored in ann.sandbox.scaper.generate, + # but we're keeping it here for now for backwards compatibility e.g. with + # FUSS. Eventually we should remove this from here. ann.sandbox.scaper.soundscape_audio_path = audio_path ann.sandbox.scaper.isolated_events_audio_path = isolated_events_audio_path # Return audio for in-memory processing - return soundscape_audio, event_audio_list, scale_factor + return soundscape_audio, event_audio_list, scale_factor, ref_db_change def generate(self, audio_path=None, @@ -2189,8 +2214,10 @@ def generate(self, soundscape_audio, event_audio_list = None, None # Generate the audio and save to disk + scale_factor = 1.0 + ref_db_change = 0 if not no_audio: - soundscape_audio, event_audio_list, scale_factor = \ + soundscape_audio, event_audio_list, scale_factor, ref_db_change = \ self._generate_audio(audio_path, ann, reverb=reverb, save_isolated_events=save_isolated_events, @@ -2199,6 +2226,26 @@ def generate(self, fix_clipping=fix_clipping, peak_normalization=peak_normalization) + # TODO: Stick to heavy handed overwriting for now, in the future we + # should consolidate this with what happens inside _instantiate(). + # ann.sandbox.scaper.audio_path = audio_path, + # ann.sandbox.scaper.jams_path = jams_path, + # ann.sandbox.scaper.allow_repeated_label = allow_repeated_label, + # ann.sandbox.scaper.allow_repeated_source = allow_repeated_source, + # ann.sandbox.scaper.reverb = reverb, + # ann.sandbox.scaper.fix_clipping = fix_clipping, + # ann.sandbox.scaper.peak_normalization = peak_normalization, + # ann.sandbox.scaper.save_isolated_events = save_isolated_events, + # ann.sandbox.scaper.isolated_events_path = isolated_events_path, + # ann.sandbox.scaper.disable_sox_warnings = disable_instantiation_warnings, + # ann.sandbox.scaper.no_audio = no_audio, + # ann.sandbox.scaper.txt_path = txt_path, + # ann.sandbox.scaper.txt_sep = txt_sep, + # ann.sandbox.scaper.disable_instantiation_warnings = disable_instantiation_warnings, + # ann.sandbox.scaper.peak_normalization_scale_factor = scale_factor, + # ann.sandbox.scaper.ref_db_change = ref_db_change + # ann.sandbox.scaper.ref_db_generated = self.ref_db + ref_db_change + # Save JAMS to disk too if jams_path is not None: soundscape_jam.save(jams_path) From 9de26b67c092d8d66d8808ee5ce7a967e44b9482 Mon Sep 17 00:00:00 2001 From: Justin Salamon Date: Thu, 24 Sep 2020 18:40:35 -0700 Subject: [PATCH 09/25] update regression jams with new generate fields --- .../bgonly_soundscape_20200501_22050.jams | 18 ++++++++++++++++-- .../bgonly_soundscape_20200501_44100.jams | 18 ++++++++++++++++-- .../reverb_soundscape_20200501_22050.jams | 18 ++++++++++++++++-- .../reverb_soundscape_20200501_44100.jams | 18 ++++++++++++++++-- .../regression/soundscape_20200501_22050.jams | 18 ++++++++++++++++-- .../regression/soundscape_20200501_44100.jams | 18 ++++++++++++++++-- 6 files changed, 96 insertions(+), 12 deletions(-) diff --git a/tests/data/regression/bgonly_soundscape_20200501_22050.jams b/tests/data/regression/bgonly_soundscape_20200501_22050.jams index 83e90de..4dd6f1c 100644 --- a/tests/data/regression/bgonly_soundscape_20200501_22050.jams +++ b/tests/data/regression/bgonly_soundscape_20200501_22050.jams @@ -93,9 +93,23 @@ "allow_repeated_label": true, "allow_repeated_source": true, "reverb": 0.2, - "scaper_version": "1.6.2", + "scaper_version": "1.6.3", "soundscape_audio_path": "tests/data/regression/bgonly_soundscape_20200501_22050.wav", - "isolated_events_audio_path": [] + "isolated_events_audio_path": [], + "audio_path": "tests/data/regression/bgonly_soundscape_20200501_22050.wav", + "jams_path": "tests/data/regression/bgonly_soundscape_20200501_22050.jams", + "fix_clipping": false, + "peak_normalization": false, + "save_isolated_events": false, + "isolated_events_path": null, + "disable_sox_warnings": true, + "no_audio": false, + "txt_path": "tests/data/regression/bgonly_soundscape_20200501_22050.txt", + "txt_sep": "\t", + "disable_instantiation_warnings": true, + "peak_normalization_scale_factor": 1.0, + "ref_db_change": 0, + "ref_db_generated": -20 } }, "time": 0, diff --git a/tests/data/regression/bgonly_soundscape_20200501_44100.jams b/tests/data/regression/bgonly_soundscape_20200501_44100.jams index 1c43ec9..d1cf2cc 100644 --- a/tests/data/regression/bgonly_soundscape_20200501_44100.jams +++ b/tests/data/regression/bgonly_soundscape_20200501_44100.jams @@ -93,9 +93,23 @@ "allow_repeated_label": true, "allow_repeated_source": true, "reverb": 0.2, - "scaper_version": "1.6.2", + "scaper_version": "1.6.3", "soundscape_audio_path": "tests/data/regression/bgonly_soundscape_20200501_44100.wav", - "isolated_events_audio_path": [] + "isolated_events_audio_path": [], + "audio_path": "tests/data/regression/bgonly_soundscape_20200501_44100.wav", + "jams_path": "tests/data/regression/bgonly_soundscape_20200501_44100.jams", + "fix_clipping": false, + "peak_normalization": false, + "save_isolated_events": false, + "isolated_events_path": null, + "disable_sox_warnings": true, + "no_audio": false, + "txt_path": "tests/data/regression/bgonly_soundscape_20200501_44100.txt", + "txt_sep": "\t", + "disable_instantiation_warnings": true, + "peak_normalization_scale_factor": 1.0, + "ref_db_change": 0, + "ref_db_generated": -20 } }, "time": 0, diff --git a/tests/data/regression/reverb_soundscape_20200501_22050.jams b/tests/data/regression/reverb_soundscape_20200501_22050.jams index 388b3a3..38dbb8e 100644 --- a/tests/data/regression/reverb_soundscape_20200501_22050.jams +++ b/tests/data/regression/reverb_soundscape_20200501_22050.jams @@ -235,9 +235,23 @@ "allow_repeated_label": true, "allow_repeated_source": true, "reverb": 0.2, - "scaper_version": "1.6.2", + "scaper_version": "1.6.3", "soundscape_audio_path": "tests/data/regression/reverb_soundscape_20200501_22050.wav", - "isolated_events_audio_path": [] + "isolated_events_audio_path": [], + "audio_path": "tests/data/regression/reverb_soundscape_20200501_22050.wav", + "jams_path": "tests/data/regression/reverb_soundscape_20200501_22050.jams", + "fix_clipping": false, + "peak_normalization": false, + "save_isolated_events": false, + "isolated_events_path": null, + "disable_sox_warnings": true, + "no_audio": false, + "txt_path": "tests/data/regression/reverb_soundscape_20200501_22050.txt", + "txt_sep": "\t", + "disable_instantiation_warnings": true, + "peak_normalization_scale_factor": 1.0, + "ref_db_change": 0, + "ref_db_generated": -50 } }, "time": 0, diff --git a/tests/data/regression/reverb_soundscape_20200501_44100.jams b/tests/data/regression/reverb_soundscape_20200501_44100.jams index 75cd0d1..cad1151 100644 --- a/tests/data/regression/reverb_soundscape_20200501_44100.jams +++ b/tests/data/regression/reverb_soundscape_20200501_44100.jams @@ -235,9 +235,23 @@ "allow_repeated_label": true, "allow_repeated_source": true, "reverb": 0.2, - "scaper_version": "1.6.2", + "scaper_version": "1.6.3", "soundscape_audio_path": "tests/data/regression/reverb_soundscape_20200501_44100.wav", - "isolated_events_audio_path": [] + "isolated_events_audio_path": [], + "audio_path": "tests/data/regression/reverb_soundscape_20200501_44100.wav", + "jams_path": "tests/data/regression/reverb_soundscape_20200501_44100.jams", + "fix_clipping": false, + "peak_normalization": false, + "save_isolated_events": false, + "isolated_events_path": null, + "disable_sox_warnings": true, + "no_audio": false, + "txt_path": "tests/data/regression/reverb_soundscape_20200501_44100.txt", + "txt_sep": "\t", + "disable_instantiation_warnings": true, + "peak_normalization_scale_factor": 1.0, + "ref_db_change": 0, + "ref_db_generated": -50 } }, "time": 0, diff --git a/tests/data/regression/soundscape_20200501_22050.jams b/tests/data/regression/soundscape_20200501_22050.jams index 351d286..508cab9 100644 --- a/tests/data/regression/soundscape_20200501_22050.jams +++ b/tests/data/regression/soundscape_20200501_22050.jams @@ -235,9 +235,23 @@ "allow_repeated_label": true, "allow_repeated_source": true, "reverb": null, - "scaper_version": "1.6.2", + "scaper_version": "1.6.3", "soundscape_audio_path": "tests/data/regression/soundscape_20200501_22050.wav", - "isolated_events_audio_path": [] + "isolated_events_audio_path": [], + "audio_path": "tests/data/regression/soundscape_20200501_22050.wav", + "jams_path": "tests/data/regression/soundscape_20200501_22050.jams", + "fix_clipping": false, + "peak_normalization": false, + "save_isolated_events": false, + "isolated_events_path": null, + "disable_sox_warnings": true, + "no_audio": false, + "txt_path": "tests/data/regression/soundscape_20200501_22050.txt", + "txt_sep": "\t", + "disable_instantiation_warnings": true, + "peak_normalization_scale_factor": 1.0, + "ref_db_change": 0, + "ref_db_generated": -50 } }, "time": 0, diff --git a/tests/data/regression/soundscape_20200501_44100.jams b/tests/data/regression/soundscape_20200501_44100.jams index 6c7daf0..06b1496 100644 --- a/tests/data/regression/soundscape_20200501_44100.jams +++ b/tests/data/regression/soundscape_20200501_44100.jams @@ -235,9 +235,23 @@ "allow_repeated_label": true, "allow_repeated_source": true, "reverb": null, - "scaper_version": "1.6.2", + "scaper_version": "1.6.3", "soundscape_audio_path": "tests/data/regression/soundscape_20200501_44100.wav", - "isolated_events_audio_path": [] + "isolated_events_audio_path": [], + "audio_path": "tests/data/regression/soundscape_20200501_44100.wav", + "jams_path": "tests/data/regression/soundscape_20200501_44100.jams", + "fix_clipping": false, + "peak_normalization": false, + "save_isolated_events": false, + "isolated_events_path": null, + "disable_sox_warnings": true, + "no_audio": false, + "txt_path": "tests/data/regression/soundscape_20200501_44100.txt", + "txt_sep": "\t", + "disable_instantiation_warnings": true, + "peak_normalization_scale_factor": 1.0, + "ref_db_change": 0, + "ref_db_generated": -50 } }, "time": 0, From f912321cac9df53c3d29e8a21953be87dd814325 Mon Sep 17 00:00:00 2001 From: Justin Salamon Date: Thu, 24 Sep 2020 19:37:38 -0700 Subject: [PATCH 10/25] Store all generation parmas in the jams sandbox --- scaper/core.py | 77 +++++++++++++++++++++++++++----------------------- 1 file changed, 42 insertions(+), 35 deletions(-) diff --git a/scaper/core.py b/scaper/core.py index ffb09d2..85ce629 100644 --- a/scaper/core.py +++ b/scaper/core.py @@ -210,15 +210,21 @@ def generate_from_jams(jams_infile, sc.fade_in_len = ann.sandbox.scaper['fade_in_len'] sc.fade_out_len = ann.sandbox.scaper['fade_out_len'] - # Generate audio and save to disk + # Pull generation parameters from annotation reverb = ann.sandbox.scaper['reverb'] + fix_clipping = ann.sandbox.scaper['fix_clipping'] + peak_normalization = ann.sandbox.scaper['peak_normalization'] # Cast ann.sandbox.scaper to a Sandbox object ann.sandbox.scaper = jams.Sandbox(**ann.sandbox.scaper) # Generate audio soundscape_audio, event_audio_list, scale_factor, ref_db_change = \ - sc._generate_audio(audio_outfile, ann, reverb=reverb, + sc._generate_audio(audio_outfile, + ann, + reverb=reverb, + fix_clipping=fix_clipping, + peak_normalization=peak_normalization, save_isolated_events=save_isolated_events, isolated_events_path=isolated_events_path, disable_sox_warnings=disable_sox_warnings) @@ -1688,22 +1694,22 @@ def _instantiate(self, allow_repeated_label=True, reverb=reverb, scaper_version=scaper_version, soundscape_audio_path=None, - isolated_events_audio_path=[]) - # # Adding generate parameters that aren't already stored: - # audio_path=None, - # jams_path=None, - # fix_clipping=None, - # peak_normalization=None, - # save_isolated_events=None, - # isolated_events_path=None, - # disable_sox_warnings=disable_instantiation_warnings, - # no_audio=None, - # txt_path=None, - # txt_sep=None, - # disable_instantiation_warnings=disable_instantiation_warnings, - # peak_normalization_scale_factor=None, - # ref_db_change=None, - # ref_db_generated=None) + isolated_events_audio_path=[], + # Initialize missing generate parameters + audio_path=None, + jams_path=None, + fix_clipping=None, + peak_normalization=None, + save_isolated_events=None, + isolated_events_path=None, + disable_sox_warnings=None, + no_audio=None, + txt_path=None, + txt_sep=None, + disable_instantiation_warnings=None, + peak_normalization_scale_factor=None, + ref_db_change=None, + ref_db_generated=None) # Add annotation to jams jam.annotations.append(ann) @@ -2228,23 +2234,24 @@ def generate(self, # TODO: Stick to heavy handed overwriting for now, in the future we # should consolidate this with what happens inside _instantiate(). - # ann.sandbox.scaper.audio_path = audio_path, - # ann.sandbox.scaper.jams_path = jams_path, - # ann.sandbox.scaper.allow_repeated_label = allow_repeated_label, - # ann.sandbox.scaper.allow_repeated_source = allow_repeated_source, - # ann.sandbox.scaper.reverb = reverb, - # ann.sandbox.scaper.fix_clipping = fix_clipping, - # ann.sandbox.scaper.peak_normalization = peak_normalization, - # ann.sandbox.scaper.save_isolated_events = save_isolated_events, - # ann.sandbox.scaper.isolated_events_path = isolated_events_path, - # ann.sandbox.scaper.disable_sox_warnings = disable_instantiation_warnings, - # ann.sandbox.scaper.no_audio = no_audio, - # ann.sandbox.scaper.txt_path = txt_path, - # ann.sandbox.scaper.txt_sep = txt_sep, - # ann.sandbox.scaper.disable_instantiation_warnings = disable_instantiation_warnings, - # ann.sandbox.scaper.peak_normalization_scale_factor = scale_factor, - # ann.sandbox.scaper.ref_db_change = ref_db_change - # ann.sandbox.scaper.ref_db_generated = self.ref_db + ref_db_change + # print("THIS!!!", allow_repeated_label, type(allow_repeated_label)) + ann.sandbox.scaper.audio_path = audio_path + ann.sandbox.scaper.jams_path = jams_path + ann.sandbox.scaper.allow_repeated_label = allow_repeated_label + ann.sandbox.scaper.allow_repeated_source = allow_repeated_source + ann.sandbox.scaper.reverb = reverb + ann.sandbox.scaper.fix_clipping = fix_clipping + ann.sandbox.scaper.peak_normalization = peak_normalization + ann.sandbox.scaper.save_isolated_events = save_isolated_events + ann.sandbox.scaper.isolated_events_path = isolated_events_path + ann.sandbox.scaper.disable_sox_warnings = disable_sox_warnings + ann.sandbox.scaper.no_audio = no_audio + ann.sandbox.scaper.txt_path = txt_path + ann.sandbox.scaper.txt_sep = txt_sep + ann.sandbox.scaper.disable_instantiation_warnings = disable_instantiation_warnings + ann.sandbox.scaper.peak_normalization_scale_factor = scale_factor + ann.sandbox.scaper.ref_db_change = ref_db_change + ann.sandbox.scaper.ref_db_generated = self.ref_db + ref_db_change # Save JAMS to disk too if jams_path is not None: From c71be9581c6e490a0bfa84231a745be908192df6 Mon Sep 17 00:00:00 2001 From: Justin Salamon Date: Thu, 24 Sep 2020 19:38:15 -0700 Subject: [PATCH 11/25] Add ability to exclude sandbox keys from jams comparison --- tests/test_core.py | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/tests/test_core.py b/tests/test_core.py index 6561290..165dc92 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -59,7 +59,7 @@ def _get_test_paths(name): } -def _compare_scaper_jams(jam, regjam): +def _compare_scaper_jams(jam, regjam, exclude_additional_scaper_sandbox_keys=[]): """ Check whether two scaper jams objects are equal up to floating point precision, ignoring jams_version and scaper_version. @@ -113,6 +113,8 @@ def _compare_scaper_jams(jam, regjam): 'bg_spec', 'fg_spec', 'scaper_version', 'soundscape_audio_path', 'isolated_events_audio_path', ] + excluded_scaper_sandbox_keys.extend(exclude_additional_scaper_sandbox_keys) + # everything but the specs and version can be compared directly: for k in set(ann.sandbox.scaper.keys()) | set(regann.sandbox.scaper.keys()): if k not in excluded_scaper_sandbox_keys: @@ -1300,9 +1302,30 @@ def test_scaper_instantiate(): pitch_shift=None, time_stretch=('const', 1.2)) + # Instantiate jam = sc._instantiate(disable_instantiation_warnings=True) + + # Ignore all fields set by generate but not by instantiate + sandbox_exclude = [ + 'txt_path', + 'jams_path', + 'audio_path', + 'no_audio', + 'save_isolated_events', + 'fix_clipping', + 'peak_normalization', + 'peak_normalization_scale_factor', + 'ref_db_change', + 'ref_db_generated', + 'txt_sep', + 'disable_sox_warnings', + 'disable_instantiation_warnings' + ] + + # Load regression jam regjam = jams.load(REG_JAM_PATH) - _compare_scaper_jams(jam, regjam) + _compare_scaper_jams(jam, regjam, + exclude_additional_scaper_sandbox_keys=sandbox_exclude) def test_generate_with_seeding(atol=1e-4, rtol=1e-8): @@ -1372,9 +1395,12 @@ def _compare_generators(generators, atol=1e-4, rtol=1e-8): # load all the jams data # make sure they are all the same as the first one + exclude_sandbox = ['audio_path', 'jams_path', 'txt_path'] jams_data = [jams.load(jam_file.name) for jam_file in jam_files] for x in jams_data: - _compare_scaper_jams(x, jams_data[0]) + _compare_scaper_jams( + x, jams_data[0], + exclude_additional_scaper_sandbox_keys=exclude_sandbox) # load the txt files and compare them def _load_txt(txt_file): From 69878f47e384766737a4d0992d17b69e1396daaa Mon Sep 17 00:00:00 2001 From: Justin Salamon Date: Thu, 24 Sep 2020 19:51:01 -0700 Subject: [PATCH 12/25] exclude sandbox keys not relevant to tests --- tests/test_core.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/tests/test_core.py b/tests/test_core.py index 165dc92..7ae5275 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1833,7 +1833,11 @@ def _test_generate(SR, REG_WAV_PATH, REG_JAM_PATH, REG_TXT_PATH, atol=1e-4, rtol # validate jams jam = jams.load(jam_file.name) regjam = jams.load(REG_JAM_PATH) - _compare_scaper_jams(jam, regjam) + + sandbox_exclude = ['audio_path', 'jams_path', 'txt_path'] + _compare_scaper_jams( + jam, regjam, + exclude_additional_scaper_sandbox_keys=sandbox_exclude) # validate txt _compare_txt_annotation(txt_file.name, REG_TXT_PATH) @@ -1892,8 +1896,15 @@ def _test_generate_return_api(SR, REG_WAV_PATH, REG_JAM_PATH, REG_TXT_PATH, # validate jams jam = jams.load(jam_file.name) regjam = jams.load(REG_JAM_PATH) + + sandbox_exclude = [ + 'isolated_events_path', 'save_isolated_events', 'jams_path', + 'txt_path', 'audio_path'] _compare_scaper_jams(soundscape_jam, jam) - _compare_scaper_jams(soundscape_jam, regjam) + _compare_scaper_jams( + soundscape_jam, + regjam, + exclude_additional_scaper_sandbox_keys=sandbox_exclude) # validate txt annotation txt_data = _load_txt_annotation(txt_file.name) From af3faef8a8b2d26b344c8db28d3f5e5a3b418ec7 Mon Sep 17 00:00:00 2001 From: Justin Salamon Date: Thu, 24 Sep 2020 19:52:59 -0700 Subject: [PATCH 13/25] add new sandbox fields to prevent unit test fail on load --- .../regression/scaper_133_off_by_one_regression_test.jams | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/data/regression/scaper_133_off_by_one_regression_test.jams b/tests/data/regression/scaper_133_off_by_one_regression_test.jams index 63e529b..740df25 100644 --- a/tests/data/regression/scaper_133_off_by_one_regression_test.jams +++ b/tests/data/regression/scaper_133_off_by_one_regression_test.jams @@ -391,7 +391,9 @@ "reverb": null, "scaper_version": "1.3.3", "soundscape_audio_path": "/var/folders/5h/6vxlzhys259350xz4nb3xbym0000gn/T/tmpw80q7jsa.wav", - "isolated_events_audio_path": [] + "isolated_events_audio_path": [], + "fix_clipping": false, + "peak_normalization": false } }, "time": 0, From b5bc632f2d5a7a192f657a1ed88293a071f3d007 Mon Sep 17 00:00:00 2001 From: Justin Salamon Date: Thu, 24 Sep 2020 20:41:20 -0700 Subject: [PATCH 14/25] add tests for clipping and normalization --- scaper/core.py | 1 + tests/test_core.py | 91 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+) diff --git a/scaper/core.py b/scaper/core.py index 85ce629..773d9ef 100644 --- a/scaper/core.py +++ b/scaper/core.py @@ -2004,6 +2004,7 @@ def _generate_audio(self, scale_factor, ref_db_change, self.ref_db), ScaperWarning) + # print(scale_factor) if scale_factor < 0.05: warnings.warn( 'Scale factor for peak normalization is extreme ' diff --git a/tests/test_core.py b/tests/test_core.py index 7ae5275..ffc4ea3 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1988,6 +1988,7 @@ def _generate_soundscape_with_short_background(background_file, audio_path, jams sc.generate(audio_path, jams_path) + def test_scaper_generate_with_fade(): # Test scaper generate with different fade lengths # Works by using a fade of 0 at first then comparing @@ -2041,6 +2042,7 @@ def test_scaper_generate_with_fade(): mask = np.invert(np.isnan(ratio)) assert np.allclose(ratio[mask], fade_out_window[mask]) + def test_scaper_with_short_background(): SHORT_BG_FILE = os.path.join( SHORT_BG_PATH, 'noise', 'noise-free-sound-0145.wav') @@ -2083,3 +2085,92 @@ def test_scaper_with_short_background(): audio1, sr = soundfile.read(wav1_file.name) audio2, sr = soundfile.read(wav2_file.name) assert np.allclose(audio1, audio2) + + +def test_clipping_and_normalization(): + + for sr in [16000, 44100]: + sc = scaper.Scaper(10, FG_PATH, BG_PATH, random_state=0) + sc.sr = sr + sc.ref_db = -20 + + sc.add_event( + label=('const', 'siren'), + source_file=('choose', []), + source_time=('uniform', 0, 10), + event_time=('const', 0), + event_duration=('const', 10), + snr=('const', 20), + pitch_shift=None, + time_stretch=None) + + # extreme clipping + sc_extreme = scaper.Scaper(10, FG_PATH, BG_PATH, random_state=0) + sc_extreme.sr = 16000 + sc_extreme.ref_db = -20 + + sc_extreme.add_event( + label=('const', 'siren'), + source_file=('choose', []), + source_time=('uniform', 0, 10), + event_time=('const', 0), + event_duration=('const', 10), + snr=('const', 40), + pitch_shift=None, + time_stretch=None) + + tmpfiles = [] + with _close_temp_files(tmpfiles): + + # Make sure a warning is raised when there's clipping + audio_path = tempfile.NamedTemporaryFile(suffix='.wav', delete=True) + tmpfiles.append(audio_path) + pytest.warns(ScaperWarning, sc.generate, audio_path, fix_clipping=False) + + # Make sure a second warning is raised if we're fixing the clipping + audio_path2 = tempfile.NamedTemporaryFile(suffix='.wav', delete=True) + tmpfiles.append(audio_path2) + with pytest.warns(None) as record: + sc.generate(audio_path2, fix_clipping=True) + + assert len(record) == 2 + assert str(record[0].message) == 'Soundscape audio is clipping!' + assert 'Peak normalization applied to fix clipping' in str(record[1].message) + + # Make sure we get a third warning when the scaling factor is < 0.05 + audio_path3 = tempfile.NamedTemporaryFile(suffix='.wav', delete=True) + tmpfiles.append(audio_path3) + with pytest.warns(None) as record: + sc_extreme.generate(audio_path3, fix_clipping=True) + + assert len(record) == 3 + assert str(record[0].message) == 'Soundscape audio is clipping!' + assert 'Peak normalization applied to fix clipping' in str(record[1].message) + assert 'Scale factor for peak normalization is extreme' in str(record[2].message) + + # PEAK NORMALIZATION TESTS + # Make sure a warning is raised when there's clipping + audio_path4 = tempfile.NamedTemporaryFile(suffix='.wav', delete=True) + tmpfiles.append(audio_path4) + pytest.warns(ScaperWarning, sc.generate, audio_path4, + fix_clipping=False, peak_normalization=True) + + # Make sure a second warning is NOT raised if we're peak normalizing by default + audio_path5 = tempfile.NamedTemporaryFile(suffix='.wav', delete=True) + tmpfiles.append(audio_path5) + with pytest.warns(None) as record: + sc.generate(audio_path5, fix_clipping=False, peak_normalization=True) + + assert len(record) == 1 + assert str(record[0].message) == 'Soundscape audio is clipping!' + + # Make sure we get two warnings when w're normalizing but not fixing + # clipping explicitly and the scaling factor is < 0.05 + audio_path6 = tempfile.NamedTemporaryFile(suffix='.wav', delete=True) + tmpfiles.append(audio_path6) + with pytest.warns(None) as record: + sc_extreme.generate(audio_path6, fix_clipping=False, peak_normalization=True) + + assert len(record) == 2 + assert str(record[0].message) == 'Soundscape audio is clipping!' + assert 'Scale factor for peak normalization is extreme' in str(record[1].message) From 836a9faf3412d836a8f61f108cc7432dfd6ae4de Mon Sep 17 00:00:00 2001 From: Justin Salamon Date: Thu, 24 Sep 2020 20:43:11 -0700 Subject: [PATCH 15/25] Remove pyrsistent==0.15.4 dep since we've dropped 2.7 and 3.4 --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index 258d0ff..0c7a9da 100644 --- a/setup.py +++ b/setup.py @@ -37,7 +37,6 @@ ], install_requires=[ 'sox==1.4.0', - 'pyrsistent==0.15.4', 'jams>=0.3.2', 'numpy>=1.13.3', "soxbindings>=1.2.2;platform_system!='Windows'", From b7079c4b130998e50ad9cc8ed649b9896446dede Mon Sep 17 00:00:00 2001 From: Justin Salamon Date: Thu, 24 Sep 2020 20:48:19 -0700 Subject: [PATCH 16/25] Update changelog --- docs/changes.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/changes.rst b/docs/changes.rst index c70b908..0dd360e 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -2,6 +2,14 @@ Changelog --------- +v1.6.3 +~~~~~~ +- Scaper.generate now accepts two new optional arguments for controlling audio clipping and normalization: + - fix_clipping: if True and the soundscape audio is clipping, it will be peak normalized and all isolated events will be scaled accordingly. + - peak_normalization: if True, sounscape audio will be peak normalized regardless of whether it's clipping or not and all isolated events will be scaled accordingly. +- All generate arguments are now documented in the scaper sandbox inside the JAMS annotation. +- Furthermore, we also document in the JAMS: the scale factor used for peak normalization, the change in ref_db, and the actual ref_db of the generated audio. + v1.6.2 ~~~~~~ - Switching from FFMpeg LUFS calculation to pyloudnorm for better performance: runtime is reduced by approximately 30% From 6db0f28a94645f5f0204e2d1543bfddf3fbacc2d Mon Sep 17 00:00:00 2001 From: Justin Salamon Date: Thu, 24 Sep 2020 21:11:51 -0700 Subject: [PATCH 17/25] start working on more tests, commented out for now --- tests/test_core.py | 46 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 42 insertions(+), 4 deletions(-) diff --git a/tests/test_core.py b/tests/test_core.py index ffc4ea3..f35991c 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -306,6 +306,43 @@ def _validate_soundscape_and_event_audio(orig_wav_file, # validate annotation txt _compare_txt_annotation(orig_txt_file.name, gen_txt_file.name) + # # Test when we generate ONLY a JAMS file, and then generate audio from the JAMS + # for _ in range(5): + # (soundscape_audio, soundscape_jam, annotation_list, event_audio_list) = \ + # sc.generate(audio_path=orig_wav_file.name, + # jams_path=orig_jam_file.name, + # txt_path=orig_txt_file.name, + # no_audio=True, + # disable_instantiation_warnings=True) + # + # assert soundscape_audio is None + # + # (fj_soundscape_audio, fj_soundscape_jam, fj_annotation_list, fj_event_audio_list) = \ + # scaper.generate_from_jams(orig_jam_file.name, + # audio_outfile=gen_wav_file.name, + # jams_outfile=gen_jam_file.name, + # txt_path=gen_txt_file.name) + # + # # validate return API + # assert np.allclose(soundscape_audio, fj_soundscape_audio) + # _compare_scaper_jams(soundscape_jam, fj_soundscape_jam) + # _compare_txt_annotation(annotation_list, fj_annotation_list) + # for event, fj_event in zip(event_audio_list, fj_event_audio_list): + # assert np.allclose(event, fj_event, atol=1e-8, rtol=rtol) + # + # # validate soundscape audio + # orig_wav, sr = soundfile.read(orig_wav_file.name) + # gen_wav, sr = soundfile.read(gen_wav_file.name) + # assert np.allclose(gen_wav, orig_wav, atol=atol, rtol=rtol) + # + # # validate jams + # orig_jam = jams.load(orig_jam_file.name) + # gen_jam = jams.load(gen_jam_file.name) + # _compare_scaper_jams(orig_jam, gen_jam) + # + # # validate annotation txt + # _compare_txt_annotation(orig_txt_file.name, gen_txt_file.name) + # Now add in trimming! for _ in range(5): with backports.tempfile.TemporaryDirectory() as isolated_events_path: @@ -422,10 +459,11 @@ def _validate_soundscape_and_event_audio(orig_wav_file, # validate return API assert np.allclose(soundscape_audio, fj_soundscape_audio) # TODO: can't compare jams due to change in FG/BG - # In the future update jam comparison such that any item can be - # excluded from the comparison on demand, which would allow for this - # test here. - # _compare_scaper_jams(soundscape_jam, fj_soundscape_jam) + # Need to be able to ignore specific keys in the event value dict + # exclude_sandbox = ['fg_path', 'bg_path'] + # _compare_scaper_jams( + # soundscape_jam, fj_soundscape_jam, + # exclude_additional_scaper_sandbox_keys=exclude_sandbox) _compare_txt_annotation(annotation_list, fj_annotation_list) for event, fj_event in zip(event_audio_list, fj_event_audio_list): assert np.allclose(event, fj_event, atol=1e-8, rtol=rtol) From bcd7f3fa7c590121b7399825dcfa174449a117e6 Mon Sep 17 00:00:00 2001 From: Justin Salamon Date: Thu, 24 Sep 2020 22:21:23 -0700 Subject: [PATCH 18/25] make generate from jams backward compatible with files that don't have fix_clipping and peak_normalization --- scaper/core.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/scaper/core.py b/scaper/core.py index 773d9ef..9cce34d 100644 --- a/scaper/core.py +++ b/scaper/core.py @@ -212,8 +212,16 @@ def generate_from_jams(jams_infile, # Pull generation parameters from annotation reverb = ann.sandbox.scaper['reverb'] - fix_clipping = ann.sandbox.scaper['fix_clipping'] - peak_normalization = ann.sandbox.scaper['peak_normalization'] + + if 'fix_clipping' in ann.sandbox.scaper.keys(): + fix_clipping = ann.sandbox.scaper['fix_clipping'] + else: + fix_clipping = False + + if 'peak_normalization' in ann.sandbox.scaper.keys(): + peak_normalization = ann.sandbox.scaper['peak_normalization'] + else: + peak_normalization = False # Cast ann.sandbox.scaper to a Sandbox object ann.sandbox.scaper = jams.Sandbox(**ann.sandbox.scaper) @@ -2235,7 +2243,6 @@ def generate(self, # TODO: Stick to heavy handed overwriting for now, in the future we # should consolidate this with what happens inside _instantiate(). - # print("THIS!!!", allow_repeated_label, type(allow_repeated_label)) ann.sandbox.scaper.audio_path = audio_path ann.sandbox.scaper.jams_path = jams_path ann.sandbox.scaper.allow_repeated_label = allow_repeated_label From 504aef9586f2b83bcd34b22600d7bb708ed482b4 Mon Sep 17 00:00:00 2001 From: Justin Salamon Date: Thu, 24 Sep 2020 22:21:50 -0700 Subject: [PATCH 19/25] test generating from file that doesn't have fix_clipping and peak_normalization --- tests/test_core.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/tests/test_core.py b/tests/test_core.py index f35991c..99bf72b 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -212,6 +212,50 @@ def test_generate_from_jams(atol=1e-5, rtol=1e-8): pytest.raises(ScaperError, scaper.generate_from_jams, jam_file.name, gen_file.name) + # # Make sure we can load an old JAM file that doesn't have fix_cilpping or peak_normalization + old_jam_file = 'tests/data/regression/soundscape_20200501_44100_no_clipping_normalization_fields.jams' + tmpfiles = [] + with _close_temp_files(tmpfiles): + gen_audio_file = tempfile.NamedTemporaryFile(suffix='.wav', delete=True) + gen_jam_file = tempfile.NamedTemporaryFile(suffix='.jams', delete=True) + gen_txt_file = tempfile.NamedTemporaryFile(suffix='.txt', delete=True) + tmpfiles.extend([gen_audio_file, gen_jam_file, gen_txt_file]) + + (fj_soundscape_audio, fj_soundscape_jam, fj_annotation_list, fj_event_audio_list) = \ + scaper.generate_from_jams(old_jam_file, + audio_outfile=gen_audio_file.name, + jams_outfile=gen_jam_file.name, + txt_path=gen_txt_file.name) + + # validate return API + orig_wav, sr = soundfile.read(TEST_PATHS[44100]['REG'].wav, always_2d=True) + assert np.allclose(orig_wav, fj_soundscape_audio) + + regjam = jams.load(TEST_PATHS[44100]['REG'].jams) + sandbox_exclude = ['fix_clipping', 'peak_normalization'] + _compare_scaper_jams( + regjam, fj_soundscape_jam, + exclude_additional_scaper_sandbox_keys=sandbox_exclude) + # _compare_txt_annotation(annotation_list, fj_annotation_list) # TODO + + # TODO: + # for event, fj_event in zip(event_audio_list, fj_event_audio_list): + # assert np.allclose(event, fj_event, atol=1e-8, rtol=rtol) + + # validate soundscape audio written to disk + gen_wav, sr = soundfile.read(gen_audio_file.name, always_2d=True) + assert np.allclose(gen_wav, orig_wav, atol=atol, rtol=rtol) + + # validate jams + sandbox_exclude = ['fix_clipping', 'peak_normalization'] + gen_jam = jams.load(gen_jam_file.name) + _compare_scaper_jams( + regjam, gen_jam, + exclude_additional_scaper_sandbox_keys=sandbox_exclude) + + # validate annotation txt + _compare_txt_annotation(TEST_PATHS[44100]['REG'].txt, gen_txt_file.name) + # Test for valid jams file tmpfiles = [] with _close_temp_files(tmpfiles): From 619c26df9a16ff1259a0f4560f00bd9c242d6033 Mon Sep 17 00:00:00 2001 From: Justin Salamon Date: Thu, 24 Sep 2020 22:22:04 -0700 Subject: [PATCH 20/25] regression --- ...4100_no_clipping_normalization_fields.jams | 268 ++++++++++++++++++ 1 file changed, 268 insertions(+) create mode 100644 tests/data/regression/soundscape_20200501_44100_no_clipping_normalization_fields.jams diff --git a/tests/data/regression/soundscape_20200501_44100_no_clipping_normalization_fields.jams b/tests/data/regression/soundscape_20200501_44100_no_clipping_normalization_fields.jams new file mode 100644 index 0000000..76e9835 --- /dev/null +++ b/tests/data/regression/soundscape_20200501_44100_no_clipping_normalization_fields.jams @@ -0,0 +1,268 @@ +{ + "annotations": [ + { + "annotation_metadata": { + "curator": { + "name": "", + "email": "" + }, + "annotator": {}, + "version": "", + "corpus": "", + "annotation_tools": "", + "annotation_rules": "", + "validation": "", + "data_source": "" + }, + "namespace": "scaper", + "data": [ + { + "time": 0.0, + "duration": 10.0, + "value": { + "label": "park", + "source_file": "tests/data/audio/background/park/268903__yonts__city-park-tel-aviv-israel.wav", + "source_time": 0, + "event_time": 0, + "event_duration": 10.0, + "snr": 0, + "role": "background", + "pitch_shift": null, + "time_stretch": null + }, + "confidence": 1.0 + }, + { + "time": 2.0, + "duration": 5.0, + "value": { + "label": "siren", + "source_file": "tests/data/audio/foreground/siren/69-Siren-1.wav", + "source_time": 5, + "event_time": 2, + "event_duration": 5, + "snr": 5, + "role": "foreground", + "pitch_shift": null, + "time_stretch": null + }, + "confidence": 1.0 + }, + { + "time": 5.0, + "duration": 0.6875056689342404, + "value": { + "label": "car_horn", + "source_file": "tests/data/audio/foreground/car_horn/17-CAR-Rolls-Royce-Horn.wav", + "source_time": 0, + "event_time": 5, + "event_duration": 0.6875056689342404, + "snr": 20, + "role": "foreground", + "pitch_shift": 1, + "time_stretch": null + }, + "confidence": 1.0 + }, + { + "time": 7.0, + "duration": 0.9674829931972788, + "value": { + "label": "human_voice", + "source_file": "tests/data/audio/foreground/human_voice/42-Human-Vocal-Voice-taxi-2_edit.wav", + "source_time": 0, + "event_time": 7, + "event_duration": 0.806235827664399, + "snr": 10, + "role": "foreground", + "pitch_shift": null, + "time_stretch": 1.2 + }, + "confidence": 1.0 + } + ], + "sandbox": { + "scaper": { + "duration": 10.0, + "original_duration": 10.0, + "fg_path": "tests/data/audio/foreground", + "bg_path": "tests/data/audio/background", + "fg_spec": [ + [ + [ + "const", + "siren" + ], + [ + "const", + "tests/data/audio/foreground/siren/69-Siren-1.wav" + ], + [ + "const", + 5 + ], + [ + "const", + 2 + ], + [ + "const", + 5 + ], + [ + "const", + 5 + ], + "foreground", + null, + null + ], + [ + [ + "const", + "car_horn" + ], + [ + "const", + "tests/data/audio/foreground/car_horn/17-CAR-Rolls-Royce-Horn.wav" + ], + [ + "const", + 0 + ], + [ + "const", + 5 + ], + [ + "const", + 2 + ], + [ + "const", + 20 + ], + "foreground", + [ + "const", + 1 + ], + null + ], + [ + [ + "const", + "human_voice" + ], + [ + "const", + "tests/data/audio/foreground/human_voice/42-Human-Vocal-Voice-taxi-2_edit.wav" + ], + [ + "const", + 0 + ], + [ + "const", + 7 + ], + [ + "const", + 2 + ], + [ + "const", + 10 + ], + "foreground", + null, + [ + "const", + 1.2 + ] + ] + ], + "bg_spec": [ + [ + [ + "const", + "park" + ], + [ + "const", + "tests/data/audio/background/park/268903__yonts__city-park-tel-aviv-israel.wav" + ], + [ + "const", + 0 + ], + [ + "const", + 0 + ], + [ + "const", + 10.0 + ], + [ + "const", + 0 + ], + "background", + null, + null + ] + ], + "fg_labels": [ + "car_horn", + "human_voice", + "siren" + ], + "bg_labels": [ + "park", + "restaurant", + "street" + ], + "protected_labels": [], + "sr": 44100, + "ref_db": -50, + "n_channels": 1, + "fade_in_len": 0.01, + "fade_out_len": 0.01, + "n_events": 3, + "polyphony_max": 2, + "polyphony_gini": 0.5422979845375607, + "allow_repeated_label": true, + "allow_repeated_source": true, + "reverb": null, + "scaper_version": "1.6.3", + "soundscape_audio_path": "tests/data/regression/soundscape_20200501_44100.wav", + "isolated_events_audio_path": [], + "audio_path": "tests/data/regression/soundscape_20200501_44100.wav", + "jams_path": "tests/data/regression/soundscape_20200501_44100.jams", + "save_isolated_events": false, + "isolated_events_path": null, + "disable_sox_warnings": true, + "no_audio": false, + "txt_path": "tests/data/regression/soundscape_20200501_44100.txt", + "txt_sep": "\t", + "disable_instantiation_warnings": true, + "peak_normalization_scale_factor": 1.0, + "ref_db_change": 0, + "ref_db_generated": -50 + } + }, + "time": 0, + "duration": 10.0 + } + ], + "file_metadata": { + "title": "", + "artist": "", + "release": "", + "duration": 10.0, + "identifiers": {}, + "jams_version": "0.3.4" + }, + "sandbox": {} +} \ No newline at end of file From 2d72ad03d9cfbe098dbe683218761c539b90e08f Mon Sep 17 00:00:00 2001 From: Justin Salamon Date: Fri, 25 Sep 2020 14:36:45 -0700 Subject: [PATCH 21/25] Almost done with tests... --- tests/test_core.py | 164 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 128 insertions(+), 36 deletions(-) diff --git a/tests/test_core.py b/tests/test_core.py index 99bf72b..cfc94fa 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -294,6 +294,24 @@ def test_generate_from_jams(atol=1e-5, rtol=1e-8): pitch_shift=('uniform', -1, 1), time_stretch=('uniform', 0.8, 1.2)) + # --- Define CLIPPING scaper --- * + sc_clipping = scaper.Scaper(10, FG_PATH, BG_PATH) + sc_clipping.protected_labels = [] + sc_clipping.ref_db = -20 + sc_clipping.add_background(label=('choose', []), + source_file=('choose', []), + source_time=('const', 0)) + # Add 5 events + for _ in range(5): + sc_clipping.add_event(label=('choose', []), + source_file=('choose', []), + source_time=('const', 0), + event_time=('uniform', 0, 9), + event_duration=('choose', [1, 2, 3]), + snr=('uniform', 20, 30), + pitch_shift=('uniform', -1, 1), + time_stretch=('uniform', 0.8, 1.2)) + def _validate_soundscape_and_event_audio(orig_wav_file, gen_wav_file, gen_events_path, @@ -350,42 +368,116 @@ def _validate_soundscape_and_event_audio(orig_wav_file, # validate annotation txt _compare_txt_annotation(orig_txt_file.name, gen_txt_file.name) - # # Test when we generate ONLY a JAMS file, and then generate audio from the JAMS - # for _ in range(5): - # (soundscape_audio, soundscape_jam, annotation_list, event_audio_list) = \ - # sc.generate(audio_path=orig_wav_file.name, - # jams_path=orig_jam_file.name, - # txt_path=orig_txt_file.name, - # no_audio=True, - # disable_instantiation_warnings=True) - # - # assert soundscape_audio is None - # - # (fj_soundscape_audio, fj_soundscape_jam, fj_annotation_list, fj_event_audio_list) = \ - # scaper.generate_from_jams(orig_jam_file.name, - # audio_outfile=gen_wav_file.name, - # jams_outfile=gen_jam_file.name, - # txt_path=gen_txt_file.name) - # - # # validate return API - # assert np.allclose(soundscape_audio, fj_soundscape_audio) - # _compare_scaper_jams(soundscape_jam, fj_soundscape_jam) - # _compare_txt_annotation(annotation_list, fj_annotation_list) - # for event, fj_event in zip(event_audio_list, fj_event_audio_list): - # assert np.allclose(event, fj_event, atol=1e-8, rtol=rtol) - # - # # validate soundscape audio - # orig_wav, sr = soundfile.read(orig_wav_file.name) - # gen_wav, sr = soundfile.read(gen_wav_file.name) - # assert np.allclose(gen_wav, orig_wav, atol=atol, rtol=rtol) - # - # # validate jams - # orig_jam = jams.load(orig_jam_file.name) - # gen_jam = jams.load(gen_jam_file.name) - # _compare_scaper_jams(orig_jam, gen_jam) - # - # # validate annotation txt - # _compare_txt_annotation(orig_txt_file.name, gen_txt_file.name) + # Test when we generate ONLY a JAMS file, and then generate audio from the JAMS + # Case 1: without clipping + for _ in range(5): + + (soundscape_audio, soundscape_jam, annotation_list, event_audio_list) = \ + sc.generate(audio_path=orig_wav_file.name, + jams_path=orig_jam_file.name, + txt_path=orig_txt_file.name, + no_audio=True, + disable_instantiation_warnings=True) + + assert soundscape_audio is None + assert event_audio_list is None + assert soundscape_jam is not None + assert annotation_list is not None + + ann = soundscape_jam.annotations.search(namespace='scaper')[0] + + assert ann.sandbox.scaper.audio_path == orig_wav_file.name + assert ann.sandbox.scaper.jams_path == orig_jam_file.name + assert ann.sandbox.scaper.fix_clipping is False + assert ann.sandbox.scaper.peak_normalization is False + assert ann.sandbox.scaper.save_isolated_events is False + assert ann.sandbox.scaper.isolated_events_path is None + assert ann.sandbox.scaper.disable_sox_warnings is True + assert ann.sandbox.scaper.no_audio is True + assert ann.sandbox.scaper.txt_path == orig_txt_file.name + assert ann.sandbox.scaper.txt_sep is '\t' + assert ann.sandbox.scaper.disable_instantiation_warnings is True + assert ann.sandbox.scaper.peak_normalization_scale_factor == 1.0 + assert ann.sandbox.scaper.ref_db_change == 0 + assert ann.sandbox.scaper.ref_db_generated == \ + ann.sandbox.scaper.ref_db + + (fj_soundscape_audio, fj_soundscape_jam, fj_annotation_list, fj_event_audio_list) = \ + scaper.generate_from_jams(orig_jam_file.name, + audio_outfile=gen_wav_file.name, + jams_outfile=gen_jam_file.name, + txt_path=gen_txt_file.name) + + # validate return API + _compare_scaper_jams(soundscape_jam, fj_soundscape_jam) + _compare_txt_annotation(annotation_list, fj_annotation_list) + + # Test when we generate ONLY a JAMS file, and then generate audio from the JAMS + # Case 2: WITH CLIPPING + for _ in range(5): + (soundscape_audio, soundscape_jam, annotation_list, event_audio_list) = \ + sc_clipping.generate(audio_path=orig_wav_file.name, + jams_path=orig_jam_file.name, + txt_path=orig_txt_file.name, + no_audio=True, + fix_clipping=True, + disable_instantiation_warnings=True) + + assert soundscape_audio is None + assert event_audio_list is None + assert soundscape_jam is not None + assert annotation_list is not None + + ann = soundscape_jam.annotations.search(namespace='scaper')[0] + + assert ann.sandbox.scaper.audio_path == orig_wav_file.name + assert ann.sandbox.scaper.jams_path == orig_jam_file.name + assert ann.sandbox.scaper.fix_clipping is True + assert ann.sandbox.scaper.peak_normalization is False + assert ann.sandbox.scaper.save_isolated_events is False + assert ann.sandbox.scaper.isolated_events_path is None + assert ann.sandbox.scaper.disable_sox_warnings is True + assert ann.sandbox.scaper.no_audio is True + assert ann.sandbox.scaper.txt_path == orig_txt_file.name + assert ann.sandbox.scaper.txt_sep is '\t' + assert ann.sandbox.scaper.disable_instantiation_warnings is True + assert ann.sandbox.scaper.peak_normalization_scale_factor == 1.0 + assert ann.sandbox.scaper.ref_db_change == 0 + assert ann.sandbox.scaper.ref_db_generated == \ + ann.sandbox.scaper.ref_db + + (fj_soundscape_audio, fj_soundscape_jam, fj_annotation_list, fj_event_audio_list) = \ + scaper.generate_from_jams(orig_jam_file.name, + audio_outfile=gen_wav_file.name, + jams_outfile=gen_jam_file.name, + txt_path=gen_txt_file.name) + + assert fj_soundscape_audio is not None + assert fj_event_audio_list is not None + assert fj_soundscape_jam is not None + assert fj_annotation_list is not None + + ann = fj_soundscape_jam.annotations.search(namespace='scaper')[0] + + # assert ann.sandbox.scaper.audio_path == gen_wav_file.name + # assert ann.sandbox.scaper.jams_path == gen_jam_file.name + assert ann.sandbox.scaper.fix_clipping is True + assert ann.sandbox.scaper.peak_normalization is False + assert ann.sandbox.scaper.save_isolated_events is False + assert ann.sandbox.scaper.isolated_events_path is None + assert ann.sandbox.scaper.disable_sox_warnings is True + assert ann.sandbox.scaper.no_audio is True # TODO + # assert ann.sandbox.scaper.txt_path == gen_txt_file.name + assert ann.sandbox.scaper.txt_sep is '\t' + assert ann.sandbox.scaper.disable_instantiation_warnings is True + assert ann.sandbox.scaper.peak_normalization_scale_factor != 1.0 + assert ann.sandbox.scaper.ref_db_change != 0 + assert ann.sandbox.scaper.ref_db_generated != \ + ann.sandbox.scaper.ref_db + + # validate return API + # _compare_scaper_jams(soundscape_jam, fj_soundscape_jam) + # _compare_txt_annotation(annotation_list, fj_annotation_list) # Now add in trimming! for _ in range(5): From e9646600763c36761a285de47688cb2c221ee7e5 Mon Sep 17 00:00:00 2001 From: pseeth Date: Sat, 26 Sep 2020 01:20:36 -0700 Subject: [PATCH 22/25] Fixing generate_from_jams so it saves to the ann. --- scaper/core.py | 18 +++++++++++++++--- tests/test_core.py | 2 +- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/scaper/core.py b/scaper/core.py index 9cce34d..675c581 100644 --- a/scaper/core.py +++ b/scaper/core.py @@ -236,6 +236,19 @@ def generate_from_jams(jams_infile, save_isolated_events=save_isolated_events, isolated_events_path=isolated_events_path, disable_sox_warnings=disable_sox_warnings) + + # TODO: Stick to heavy handed overwriting for now, in the future we + # should consolidate this with what happens inside _instantiate(). + ann.sandbox.scaper.reverb = reverb + ann.sandbox.scaper.fix_clipping = fix_clipping + ann.sandbox.scaper.peak_normalization = peak_normalization + ann.sandbox.scaper.save_isolated_events = save_isolated_events + ann.sandbox.scaper.isolated_events_path = isolated_events_path + ann.sandbox.scaper.disable_sox_warnings = disable_sox_warnings + ann.sandbox.scaper.peak_normalization_scale_factor = scale_factor + ann.sandbox.scaper.ref_db_change = ref_db_change + ann.sandbox.scaper.ref_db_generated = sc.ref_db + ref_db_change + # If there are slice (trim) operations, need to perform them! # Need to add this logic for the isolated events too. @@ -2012,7 +2025,6 @@ def _generate_audio(self, scale_factor, ref_db_change, self.ref_db), ScaperWarning) - # print(scale_factor) if scale_factor < 0.05: warnings.warn( 'Scale factor for peak normalization is extreme ' @@ -2076,7 +2088,7 @@ def _generate_audio(self, # FUSS. Eventually we should remove this from here. ann.sandbox.scaper.soundscape_audio_path = audio_path ann.sandbox.scaper.isolated_events_audio_path = isolated_events_audio_path - + # Return audio for in-memory processing return soundscape_audio, event_audio_list, scale_factor, ref_db_change @@ -2260,7 +2272,7 @@ def generate(self, ann.sandbox.scaper.peak_normalization_scale_factor = scale_factor ann.sandbox.scaper.ref_db_change = ref_db_change ann.sandbox.scaper.ref_db_generated = self.ref_db + ref_db_change - + # Save JAMS to disk too if jams_path is not None: soundscape_jam.save(jams_path) diff --git a/tests/test_core.py b/tests/test_core.py index cfc94fa..0933b67 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -297,7 +297,7 @@ def test_generate_from_jams(atol=1e-5, rtol=1e-8): # --- Define CLIPPING scaper --- * sc_clipping = scaper.Scaper(10, FG_PATH, BG_PATH) sc_clipping.protected_labels = [] - sc_clipping.ref_db = -20 + sc_clipping.ref_db = 0 sc_clipping.add_background(label=('choose', []), source_file=('choose', []), source_time=('const', 0)) From 77779ca66b5b972ba5bfb7ffc715870964d85945 Mon Sep 17 00:00:00 2001 From: Justin Salamon Date: Mon, 28 Sep 2020 12:09:19 -0700 Subject: [PATCH 23/25] move transformer creation into conditional reverb block. MUST apply reverb AFTER peak normalization, doesn't work otherwise (in sox) --- scaper/core.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/scaper/core.py b/scaper/core.py index 675c581..c96371f 100644 --- a/scaper/core.py +++ b/scaper/core.py @@ -1993,9 +1993,6 @@ def _generate_audio(self, "No events to synthesize (silent soundscape), no audio " "generated.", ScaperWarning) else: - tfm = sox.Transformer() - if reverb is not None: - tfm.reverb(reverberance=reverb * 100) # Sum all events to get soundscape audio soundscape_audio = sum(event_audio_list) @@ -2033,11 +2030,19 @@ def _generate_audio(self, ScaperWarning ) - # Apply effects and reshape - soundscape_audio = tfm.build_array( - input_array=soundscape_audio, - sample_rate_in=self.sr, - ) + # Optionally apply reverb + # NOTE: must apply AFTER peak normalization: applying reverb + # to a clipping signal with sox and then normalizing doesn't + # work as one would hope. + if reverb is not None: + tfm = sox.Transformer() + tfm.reverb(reverberance=reverb * 100) + soundscape_audio = tfm.build_array( + input_array=soundscape_audio, + sample_rate_in=self.sr, + ) + + # Reshape to ensure data are 2d soundscape_audio = soundscape_audio.reshape(-1, self.n_channels) # Optionally save soundscape audio to disk From d779ef3d328e17d55239a893211774f9052d2168 Mon Sep 17 00:00:00 2001 From: Justin Salamon Date: Mon, 28 Sep 2020 12:25:23 -0700 Subject: [PATCH 24/25] Use os.makedirs(..., exist_ok=True), update some inline comments --- scaper/core.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/scaper/core.py b/scaper/core.py index c96371f..9b2721c 100644 --- a/scaper/core.py +++ b/scaper/core.py @@ -238,7 +238,7 @@ def generate_from_jams(jams_infile, disable_sox_warnings=disable_sox_warnings) # TODO: Stick to heavy handed overwriting for now, in the future we - # should consolidate this with what happens inside _instantiate(). + # should consolidate this with what happens inside _instantiate(). ann.sandbox.scaper.reverb = reverb ann.sandbox.scaper.fix_clipping = fix_clipping ann.sandbox.scaper.peak_normalization = peak_normalization @@ -249,7 +249,6 @@ def generate_from_jams(jams_infile, ann.sandbox.scaper.ref_db_change = ref_db_change ann.sandbox.scaper.ref_db_generated = sc.ref_db + ref_db_change - # If there are slice (trim) operations, need to perform them! # Need to add this logic for the isolated events too. if 'slice' in ann.sandbox.keys(): @@ -2058,11 +2057,7 @@ def _generate_audio(self, else: event_folder = isolated_events_path - if not os.path.exists(event_folder): - # In Python 3.2 and above we could do - # os.makedirs(..., exist_ok=True) but we test back to - # Python 2.7. - os.makedirs(event_folder) + os.makedirs(event_folder, exist_ok=True) iso_idx = 0 role_counter = {'background': 0, 'foreground': 0} @@ -2088,9 +2083,11 @@ def _generate_audio(self, "mixture", ScaperWarning) # Document output paths - # TODO: this is redundant with data stored in ann.sandbox.scaper.generate, - # but we're keeping it here for now for backwards compatibility e.g. with - # FUSS. Eventually we should remove this from here. + # TODO: this is redundant with audio_path and isolated_events_path that + # are also stored in ann.sandbox.scaper. For now we're keeping these + # here for now for backwards compatibility e.g. with FUSS. Eventually + # we should remove these two lines and consolidate how/where JAMS + # metadata is stored (cf. generate() and generate_from_jams()). ann.sandbox.scaper.soundscape_audio_path = audio_path ann.sandbox.scaper.isolated_events_audio_path = isolated_events_audio_path From 3e10f956d5a981186a412fd94fcd62d7f1ce7e39 Mon Sep 17 00:00:00 2001 From: pseeth Date: Mon, 28 Sep 2020 12:40:20 -0700 Subject: [PATCH 25/25] Updating profile script. --- tests/profile_results.csv | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/profile_results.csv b/tests/profile_results.csv index a4999a3..5e596ab 100644 --- a/tests/profile_results.csv +++ b/tests/profile_results.csv @@ -7,3 +7,4 @@ time_of_run,scaper_version,python_version,system,machine,processor,n_cpu,n_worke 2020-09-22 18:59:10.570372,1.6.0,3.8.3,Darwin,x86_64,i386,8,1,16.0 GB,100,98.4973,edfe1de5c6e46206f64d6b8218b490d074871d24 2020-09-23 12:50:07.721451,1.6.1,3.8.3,Darwin,x86_64,i386,8,1,16.0 GB,100,97.5089,8ee6a0ddfadde9b1b7cc1fd96a3a8e513c8256e5 2020-09-23 15:06:59.663871,1.6.2,3.8.3,Darwin,x86_64,i386,8,1,16.0 GB,100,62.1766,e0107122b3cadaa713e119ce68b843876024ee63 +2020-09-28 12:39:24.620412,1.6.3,3.8.3,Darwin,x86_64,i386,8,1,16.0 GB,100,62.6309,d779ef3d328e17d55239a893211774f9052d2168