diff --git a/api/api/utils/waveform.py b/api/api/utils/waveform.py index 07b91429ef0..fbc8ffe5d30 100644 --- a/api/api/utils/waveform.py +++ b/api/api/utils/waveform.py @@ -89,7 +89,7 @@ def download_audio(url, identifier): return file_name -def generate_waveform(file_name, duration): +def generate_waveform(file_name: str, duration: int): """ Generate the waveform for the file by invoking the ``audiowaveform`` binary. @@ -102,7 +102,16 @@ def generate_waveform(file_name, duration): logger.debug("waveform_generation_started") - pps = math.ceil(1e6 / duration) # approx 1000 points in total + # Determine the width of the waveform based on the duration of the audio. + # The width varies to improve the appearance and "resolution" of the waveform. + # It also prevents requesting to many points from short audio files. + # See https://github.com/WordPress/openverse/issues/4676 + # + # For long audio files, we set the width to 1,000,000 pixels. + # For short audio files, we set the width to 100,000 pixels. + # This prevents the waveform from appearing "stretched out" and sparse. + width = 1e6 if duration > 100 else 1e5 + pps = math.ceil(width / duration) # approx 1000 points in total args = [ "audiowaveform", "--input-filename", @@ -124,7 +133,8 @@ def generate_waveform(file_name, duration): raise WaveformGenerationFailure() logger.debug("waveform_generation_finished", returncode=proc.returncode) - return proc.stdout + json_out = json.loads(proc.stdout) + return json_out def process_waveform_output(json_out): @@ -141,8 +151,7 @@ def process_waveform_output(json_out): logger.info("Transforming points") - output = json.loads(json_out) - data = output["data"] + data = json_out["data"] logger.debug(f"initial points len(data)={len(data)}") transformed_data = [] diff --git a/api/test/factory/sample-short-audio.mp3 b/api/test/factory/sample-short-audio.mp3 new file mode 100644 index 00000000000..0925836f64f Binary files /dev/null and b/api/test/factory/sample-short-audio.mp3 differ diff --git a/api/test/unit/utils/test_waveform.py b/api/test/unit/utils/test_waveform.py index 6929884f2ea..1bb538829a8 100644 --- a/api/test/unit/utils/test_waveform.py +++ b/api/test/unit/utils/test_waveform.py @@ -4,7 +4,7 @@ import pook import pytest -from api.utils.waveform import UA_STRING, download_audio +from api.utils.waveform import UA_STRING, download_audio, generate_waveform _MOCK_AUDIO_PATH = Path(__file__).parent / ".." / ".." / "factory" @@ -30,3 +30,17 @@ def test_download_audio_sends_ua_header(mock_request): download_audio("http://example.org", "abcd-1234") # ``pook`` will only match if UA header is sent. assert mock_request.total_matches > 0 + + +@pytest.mark.parametrize( + "audio, duration", + [ + ("sample-audio.mp3", 26000), + ("sample-short-audio.mp3", 45), + ], +) +def test_generate_waveform(audio, duration): + file_name = str(_MOCK_AUDIO_PATH.joinpath(audio)) + + json_out = generate_waveform(file_name, duration) + assert len(json_out) > 0