Skip to content

Commit

Permalink
Fix waveform generation for short tracks (#4677)
Browse files Browse the repository at this point in the history
Co-authored-by: zack <[email protected]>
  • Loading branch information
krysal and zackkrida authored Aug 5, 2024
1 parent da3a5a0 commit e7f6934
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 6 deletions.
19 changes: 14 additions & 5 deletions api/api/utils/waveform.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def download_audio(url, identifier):
return file_name


def generate_waveform(file_name, duration):
def generate_waveform(file_name: str, duration: int):
"""
Generate the waveform for the file by invoking the ``audiowaveform`` binary.
Expand All @@ -102,7 +102,16 @@ def generate_waveform(file_name, duration):

logger.debug("waveform_generation_started")

pps = math.ceil(1e6 / duration) # approx 1000 points in total
# Determine the width of the waveform based on the duration of the audio.
# The width varies to improve the appearance and "resolution" of the waveform.
# It also prevents requesting to many points from short audio files.
# See https://github.com/WordPress/openverse/issues/4676
#
# For long audio files, we set the width to 1,000,000 pixels.
# For short audio files, we set the width to 100,000 pixels.
# This prevents the waveform from appearing "stretched out" and sparse.
width = 1e6 if duration > 100 else 1e5
pps = math.ceil(width / duration) # approx 1000 points in total
args = [
"audiowaveform",
"--input-filename",
Expand All @@ -124,7 +133,8 @@ def generate_waveform(file_name, duration):
raise WaveformGenerationFailure()

logger.debug("waveform_generation_finished", returncode=proc.returncode)
return proc.stdout
json_out = json.loads(proc.stdout)
return json_out


def process_waveform_output(json_out):
Expand All @@ -141,8 +151,7 @@ def process_waveform_output(json_out):

logger.info("Transforming points")

output = json.loads(json_out)
data = output["data"]
data = json_out["data"]
logger.debug(f"initial points len(data)={len(data)}")

transformed_data = []
Expand Down
Binary file added api/test/factory/sample-short-audio.mp3
Binary file not shown.
16 changes: 15 additions & 1 deletion api/test/unit/utils/test_waveform.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pook
import pytest

from api.utils.waveform import UA_STRING, download_audio
from api.utils.waveform import UA_STRING, download_audio, generate_waveform


_MOCK_AUDIO_PATH = Path(__file__).parent / ".." / ".." / "factory"
Expand All @@ -30,3 +30,17 @@ def test_download_audio_sends_ua_header(mock_request):
download_audio("http://example.org", "abcd-1234")
# ``pook`` will only match if UA header is sent.
assert mock_request.total_matches > 0


@pytest.mark.parametrize(
"audio, duration",
[
("sample-audio.mp3", 26000),
("sample-short-audio.mp3", 45),
],
)
def test_generate_waveform(audio, duration):
file_name = str(_MOCK_AUDIO_PATH.joinpath(audio))

json_out = generate_waveform(file_name, duration)
assert len(json_out) > 0

0 comments on commit e7f6934

Please sign in to comment.