Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add options to fix clipping and apply peak normalization to the generated soundscape #132

Merged
merged 25 commits into from
Sep 28, 2020
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
6da1890
bump version to 1.6.3
justinsalamon Sep 24, 2020
05261f8
Remove unused imports and clean up formatting
justinsalamon Sep 24, 2020
2400ef7
Implement peak normalize
justinsalamon Sep 24, 2020
68be280
First pass at fix_clipping and peak_normalization, tests failing
justinsalamon Sep 24, 2020
c4b8f26
fix bug in code for saving isolated events audio
justinsalamon Sep 24, 2020
9ae18af
test peak_normalize
justinsalamon Sep 24, 2020
627f7cf
Warn user when ref_db changes due to clipping prevention
justinsalamon Sep 24, 2020
8de1713
Return ref_db_change
justinsalamon Sep 25, 2020
9de26b6
update regression jams with new generate fields
justinsalamon Sep 25, 2020
f912321
Store all generation parmas in the jams sandbox
justinsalamon Sep 25, 2020
c71be95
Add ability to exclude sandbox keys from jams comparison
justinsalamon Sep 25, 2020
69878f4
exclude sandbox keys not relevant to tests
justinsalamon Sep 25, 2020
af3faef
add new sandbox fields to prevent unit test fail on load
justinsalamon Sep 25, 2020
b5bc632
add tests for clipping and normalization
justinsalamon Sep 25, 2020
836a9fa
Remove pyrsistent==0.15.4 dep since we've dropped 2.7 and 3.4
justinsalamon Sep 25, 2020
b7079c4
Update changelog
justinsalamon Sep 25, 2020
6db0f28
start working on more tests, commented out for now
justinsalamon Sep 25, 2020
bcd7f3f
make generate from jams backward compatible with files that don't hav…
justinsalamon Sep 25, 2020
504aef9
test generating from file that doesn't have fix_clipping and peak_nor…
justinsalamon Sep 25, 2020
619c26d
regression
justinsalamon Sep 25, 2020
2d72ad0
Almost done with tests...
justinsalamon Sep 25, 2020
e964660
Fixing generate_from_jams so it saves to the ann.
Sep 26, 2020
77779ca
move transformer creation into conditional reverb block. MUST apply r…
justinsalamon Sep 28, 2020
d779ef3
Use os.makedirs(..., exist_ok=True), update some inline comments
justinsalamon Sep 28, 2020
3e10f95
Updating profile script.
Sep 28, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 42 additions & 8 deletions scaper/audio.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,15 @@
# CREATED: 4/23/17 15:37 by Justin Salamon <[email protected]>

'''
"""
Utility functions for audio processing using FFMPEG (beyond sox). Based on:
pseeth marked this conversation as resolved.
Show resolved Hide resolved
https://github.com/mathos/neg23/
'''
"""

import subprocess
import sox
import numpy as np
import pyloudnorm
import soundfile
import tempfile
from .scaper_exceptions import ScaperError
from .util import _close_temp_files


def get_integrated_lufs(audio_array, samplerate, min_duration=0.5,
filter_class='K-weighting', block_size=0.400):
Expand Down Expand Up @@ -104,5 +101,42 @@ def match_sample_length(audio_path, duration_in_samples):

audio = np.pad(audio, pad_width, 'constant')

soundfile.write(audio_path, audio, sr,
subtype=audio_info.subtype, format=audio_info.format)
soundfile.write(audio_path, audio, sr,
subtype=audio_info.subtype, format=audio_info.format)


def peak_normalize(soundscape_audio, event_audio_list):
"""
Compute the scale factor required to peak normalize the audio such that
max(abs(soundscape_audio)) = 1.

Parameters
----------
soundscape_audio : np.ndarray
The soudnscape audio.
event_audio_list : list
List of np.ndarrays containing the audio samples of each isolated
foreground event.

Returns
-------
scaled_soundscape_audio : np.ndarray
The peak normalized soundscape audio.
scaled_event_audio_list : list
List of np.ndarrays containing the scaled audio samples of
each isolated foreground event. All events are scaled by scale_factor.
scale_factor : float
The scale factor used to peak normalize the soundscape audio.
"""
eps = 1e-10
max_sample = np.max(np.abs(soundscape_audio))
justinsalamon marked this conversation as resolved.
Show resolved Hide resolved
scale_factor = 1.0 / (max_sample + eps)

# scale the event audio and the soundscape audio:
scaled_soundscape_audio = soundscape_audio * scale_factor

scaled_event_audio_list = []
for event_audio in event_audio_list:
scaled_event_audio_list.append(event_audio * scale_factor)

return scaled_soundscape_audio, scaled_event_audio_list, scale_factor
163 changes: 123 additions & 40 deletions scaper/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from .util import polyphony_gini
from .util import is_real_number, is_real_array
from .audio import get_integrated_lufs
from .audio import peak_normalize
from .version import version as scaper_version

SUPPORTED_DIST = {"const": _sample_const,
Expand Down Expand Up @@ -214,7 +215,7 @@ def generate_from_jams(jams_infile,

# Cast ann.sandbox.scaper to a Sandbox object
ann.sandbox.scaper = jams.Sandbox(**ann.sandbox.scaper)
soundscape_audio, event_audio_list = \
soundscape_audio, event_audio_list, scale_factor = \
justinsalamon marked this conversation as resolved.
Show resolved Hide resolved
sc._generate_audio(audio_outfile, ann, reverb=reverb,
save_isolated_events=save_isolated_events,
isolated_events_path=isolated_events_path,
Expand Down Expand Up @@ -1696,8 +1697,14 @@ def _instantiate(self, allow_repeated_label=True,
# Return
return jam

def _generate_audio(self, audio_path, ann, reverb=None,
save_isolated_events=False, isolated_events_path=None,
def _generate_audio(self,
audio_path,
ann,
reverb=None,
fix_clipping=False,
peak_normalization=False,
save_isolated_events=False,
isolated_events_path=None,
disable_sox_warnings=True):
'''
Generate audio based on a scaper annotation and save to disk.
Expand All @@ -1713,6 +1720,19 @@ def _generate_audio(self, audio_path, ann, reverb=None,
(no reverberation) and 1 (maximum reverberation). Use None
(default) to prevent the soundscape from going through the reverb
module at all.
fix_clipping: bool
When True (default=False), checks the soundscape audio for clipping
(abs(sample) > 1). If so, the soundscape waveform is peak normalized,
i.e., scaled such that max(abs(soundscape_audio)) = 1. The audio for
each isolated event is also scaled accordingly. Note: this will change
the actual value of `ref_db` in the generated audio. The scaling
factor that was used is returned.
peak_normalization : bool
When True (default=False), normalize the generated soundscape audio
such that max(abs(soundscape_audio)) = 1. The audio for
each isolated event is also scaled accordingly. Note: this will change
the actual value of `ref_db` in the generated audio. The scaling
factor that was used is returned.
save_isolated_events : bool
If True, this will save the isolated foreground events and
backgrounds in a directory adjacent to the generated soundscape
Expand Down Expand Up @@ -1743,6 +1763,11 @@ def _generate_audio(self, audio_path, ann, reverb=None,
in the same order in which they appear in the jams annotations data
list, and can be matched with:
`for obs, event_audio in zip(ann.data, event_audio_list): ...`.
scale_factor : float
If peak_normalization is True, or fix_clipping is True and the
soundscape audio needs to be scaled to avoid clipping, scale_factor
is the value used to scale the soundscape audio and the audio of the
isolated events. None otherwise.

Raises
------
Expand All @@ -1768,6 +1793,7 @@ def _generate_audio(self, audio_path, ann, reverb=None,
# List for storing all generated audio (one array for every event)
soundscape_audio = None
event_audio_list = []
scale_factor = None
justinsalamon marked this conversation as resolved.
Show resolved Hide resolved

with _set_temp_logging_level(temp_logging_level):

Expand Down Expand Up @@ -1913,29 +1939,86 @@ def _generate_audio(self, audio_path, ann, reverb=None,
'Unsupported event role: {:s}'.format(
e.value['role']))

# Finally combine all the files and optionally apply reverb.
# If there are no events, throw a warning.
if len(event_audio_list) == 0:
warnings.warn(
"No events to synthesize (silent soundscape), no audio "
"generated.", ScaperWarning)
else:
tfm = sox.Transformer()
if reverb is not None:
tfm.reverb(reverberance=reverb * 100)

# Sum all events to get soundscape audio
soundscape_audio = sum(event_audio_list)

# Check for clipping and fix [optional]
max_sample = np.max(np.abs(soundscape_audio))
clipping = max_sample > 1
if clipping:
warnings.warn('Soundscape audio is clipping!',
ScaperWarning)

if peak_normalization or (clipping and fix_clipping):

# normalize soundscape audio and scale event audio
soundscape_audio, event_audio_list, scale_factor = \
peak_normalize(soundscape_audio, event_audio_list)

warnings.warn(
pseeth marked this conversation as resolved.
Show resolved Hide resolved
'Peak normalization applied (scale factor = {})'.format(
justinsalamon marked this conversation as resolved.
Show resolved Hide resolved
scale_factor),
ScaperWarning)

if scale_factor < 0.05:
warnings.warn(
'Scale factor for peak normalization is extreme '
'(<0.05), actual event SNR values in the soundscape '
'audio may not match their specified values.',
ScaperWarning
)

# Apply effects and reshape
soundscape_audio = tfm.build_array(
input_array=soundscape_audio,
sample_rate_in=self.sr,
)
soundscape_audio = soundscape_audio.reshape(-1, self.n_channels)

# Optionally save soundscape audio to disk
if audio_path is not None:
soundfile.write(audio_path, soundscape_audio, self.sr,
subtype='PCM_32')

# Optionally save isolated events to disk
if save_isolated_events:
base, ext = os.path.splitext(audio_path)
if isolated_events_path is None:
event_folder = '{:s}_events'.format(base)
else:
event_folder = isolated_events_path

_role_count = role_counter[e.value['role']]
event_audio_path = os.path.join(
event_folder,
'{:s}{:d}_{:s}{:s}'.format(
e.value['role'], _role_count, e.value['label'], ext))
role_counter[e.value['role']] += 1

if not os.path.exists(event_folder):
# In Python 3.2 and above we could do
# In Python 3.2 and above we could do
# os.makedirs(..., exist_ok=True) but we test back to
# Python 2.7.
os.makedirs(event_folder)
justinsalamon marked this conversation as resolved.
Show resolved Hide resolved
soundfile.write(event_audio_path, event_audio_list[-1], self.sr, subtype='PCM_32')
isolated_events_audio_path.append(event_audio_path)

#TODO what do we do in this case? for now throw a warning
iso_idx = 0
for i, e in enumerate(ann.data):
_role_count = role_counter[e.value['role']]
event_audio_path = os.path.join(
event_folder,
'{:s}{:d}_{:s}{:s}'.format(
e.value['role'], _role_count, e.value['label'], ext))
role_counter[e.value['role']] += 1

soundfile.write(event_audio_path, event_audio_list[iso_idx], self.sr, subtype='PCM_32')
isolated_events_audio_path.append(event_audio_path)
iso_idx += 1

# TODO what do we do in this case? for now throw a warning
if reverb is not None:
warnings.warn(
"Reverb is on and save_isolated_events is True. Reverberation "
Expand All @@ -1944,41 +2027,21 @@ def _generate_audio(self, audio_path, ann, reverb=None,
"audio of the isolated events will not add up to the "
"mixture", ScaperWarning)

# Finally combine all the files and optionally apply reverb.
# If there are no events, throw a warning.
if len(event_audio_list) == 0:
warnings.warn(
"No events to synthesize (silent soundscape), no audio "
"generated.", ScaperWarning)
else:
tfm = sox.Transformer()
if reverb is not None:
tfm.reverb(reverberance=reverb * 100)
# TODO: do we want to normalize the final output?
soundscape_audio = sum(event_audio_list)
soundscape_audio = tfm.build_array(
input_array=soundscape_audio,
sample_rate_in=self.sr,
)
soundscape_audio = soundscape_audio.reshape(-1, self.n_channels)

# Save to disk if output path provided
if audio_path is not None:
soundfile.write(audio_path, soundscape_audio, self.sr, subtype='PCM_32')

# Document output paths
ann.sandbox.scaper.soundscape_audio_path = audio_path
ann.sandbox.scaper.isolated_events_audio_path = isolated_events_audio_path

# Return audio for in-memory processing
return soundscape_audio, event_audio_list
return soundscape_audio, event_audio_list, scale_factor

def generate(self,
audio_path=None,
jams_path=None,
allow_repeated_label=True,
allow_repeated_source=True,
reverb=None,
fix_clipping=False,
peak_normalization=False,
save_isolated_events=False,
isolated_events_path=None,
disable_sox_warnings=True,
Expand Down Expand Up @@ -2014,6 +2077,23 @@ def generate(self,
(no reverberation) and 1 (maximum reverberation). Use None
(default) to prevent the soundscape from going through the reverb
module at all.
fix_clipping: bool
When True (default=False), checks the soundscape audio for clipping
(abs(sample) > 1). If so, the soundscape waveform is peak normalized,
i.e., scaled such that max(abs(soundscape_audio)) = 1. The audio for
each isolated event is also scaled accordingly. Note: this will change
the actual value of `ref_db` in the generated audio. The updated
`ref_db` value will be stored in the JAMS annotation. The SNR of
foreground events with respect to the background is unaffected except
when extreme scaling is required to prevent clipping.
peak_normalization : bool
When True (default=False), normalize the generated soundscape audio
such that max(abs(soundscape_audio)) = 1. The audio for
each isolated event is also scaled accordingly. Note: this will change
the actual value of `ref_db` in the generated audio. The updated
`ref_db` value will be stored in the JAMS annotation. The SNR of
foreground events with respect to the background is unaffected except
when extreme scaling is required to achieve peak normalization.
save_isolated_events : bool
If True, this will save the isolated foreground events and
backgrounds in a directory adjacent to the generated soundscape
Expand Down Expand Up @@ -2104,11 +2184,14 @@ def generate(self,

# Generate the audio and save to disk
if not no_audio:
soundscape_audio, event_audio_list = \
self._generate_audio(audio_path, ann, reverb=reverb,
soundscape_audio, event_audio_list, scale_factor = \
self._generate_audio(audio_path, ann,
reverb=reverb,
save_isolated_events=save_isolated_events,
isolated_events_path=isolated_events_path,
disable_sox_warnings=disable_sox_warnings)
disable_sox_warnings=disable_sox_warnings,
fix_clipping=fix_clipping,
peak_normalization=peak_normalization)

# Save JAMS to disk too
if jams_path is not None:
Expand Down
2 changes: 1 addition & 1 deletion scaper/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
"""Version info"""

short_version = '1.6'
version = '1.6.2'
version = '1.6.3'