From e7f69343b122ef7102dae8a509d6f81a3cf649c0 Mon Sep 17 00:00:00 2001 From: Krystle Salazar Date: Mon, 5 Aug 2024 12:42:06 -0400 Subject: [PATCH] Fix waveform generation for short tracks (#4677) Co-authored-by: zack <6351754+zackkrida@users.noreply.github.com> --- api/api/utils/waveform.py | 19 ++++++++++++++----- api/test/factory/sample-short-audio.mp3 | Bin 0 -> 2609 bytes api/test/unit/utils/test_waveform.py | 16 +++++++++++++++- 3 files changed, 29 insertions(+), 6 deletions(-) create mode 100644 api/test/factory/sample-short-audio.mp3 diff --git a/api/api/utils/waveform.py b/api/api/utils/waveform.py index 07b91429ef0..fbc8ffe5d30 100644 --- a/api/api/utils/waveform.py +++ b/api/api/utils/waveform.py @@ -89,7 +89,7 @@ def download_audio(url, identifier): return file_name -def generate_waveform(file_name, duration): +def generate_waveform(file_name: str, duration: int): """ Generate the waveform for the file by invoking the ``audiowaveform`` binary. @@ -102,7 +102,16 @@ def generate_waveform(file_name, duration): logger.debug("waveform_generation_started") - pps = math.ceil(1e6 / duration) # approx 1000 points in total + # Determine the width of the waveform based on the duration of the audio. + # The width varies to improve the appearance and "resolution" of the waveform. + # It also prevents requesting to many points from short audio files. + # See https://github.com/WordPress/openverse/issues/4676 + # + # For long audio files, we set the width to 1,000,000 pixels. + # For short audio files, we set the width to 100,000 pixels. + # This prevents the waveform from appearing "stretched out" and sparse. + width = 1e6 if duration > 100 else 1e5 + pps = math.ceil(width / duration) # approx 1000 points in total args = [ "audiowaveform", "--input-filename", @@ -124,7 +133,8 @@ def generate_waveform(file_name, duration): raise WaveformGenerationFailure() logger.debug("waveform_generation_finished", returncode=proc.returncode) - return proc.stdout + json_out = json.loads(proc.stdout) + return json_out def process_waveform_output(json_out): @@ -141,8 +151,7 @@ def process_waveform_output(json_out): logger.info("Transforming points") - output = json.loads(json_out) - data = output["data"] + data = json_out["data"] logger.debug(f"initial points len(data)={len(data)}") transformed_data = [] diff --git a/api/test/factory/sample-short-audio.mp3 b/api/test/factory/sample-short-audio.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..0925836f64f508f5e3813134f0f8e4bf514c0848 GIT binary patch literal 2609 zcmd^>X;4#H7RT>;AR?&a5J1JPeUK2s zYEXgTphiGS96?-K+y);8krG;i(zYn0(13-u8`3>$gPjRAGyP${ST$egS9R})d(Qda zQ+4W`d(Bh2PVnEvWo+AIIh$o!1OTl0K=CDvU&8z)EO!M)h*yXC`2+@J!QaiKnHLK#hLM2J2`&G0O=HoK6~)wa6CKOCr&rWySOqKsB4YM& zS*aTpJvvpH@7-*=F(AceZqc>L_l1h-!!22(Y=)T0eYLK*LL0*TaBSCu2M<2HTX_G5MtC*gN>B)Nb@tfqq$!XL` zL+=Hd-&uT$eXIz1_)unm3!MmIfG-PBN#EBZgs@y1;G1ComWB0PGy=VDbAdV(Mg$P1 z_e2&_Yf3~uNLap9&Ib~ai*FY?CQ2WlEy5kqc-z_Z$nB^AL^Jc0Xz8425B72IA>0;V zo%qjCiBL`kWIl)N%M2$Pb}RH=jX)G3k05&x*-|g6%`_fFsFDXzXi2wm<2Z{(S%%{@ zlm>vyjc@|s$!XA{x%870^I(N(HUa9xM7H`R>Wm_ct&btFir8X(8lfm(DzV3OBAZG} z6jX=Pi(6x>r*OkB?WqItP=*s z<*LTJ-F>dnwtFsJJNo)@ZpiObVK>Z*J101E@*wgJCKR33uuQ?`!Sy`pN9?yzrE{USRPes#8>YK9%zc+tSf2l z|22k5d9&bVX|*he9maj~qMsJ|uuyT}%+dvMIb6xD#apDh{I`?Dw33>q38fBgb-+Pf zZXm6x+UwG5FC-?SZJ$PO%&Tf|`gkLuW6yBsPkD7`Uox7*ZJ}<&`F!z?A&+ZpmKVI6zX};X+z2(k{OZS4Bf9{#zKXv_O z@1uRO?_bC96lV3s0#bBgUW?%2-(L*77v&*cIxG9f7avGUcyFgFjfzf z$wh~(E)1bUr)r0`MNM;MwbyyA$VzoVkbtO63PMXDkwcDw?wO$s#16|%1#Vgs6dS_nuVW{H#^06zeq**yt^{{x+ z^X3;1yN*hqynE}pbGO^1Z*1aVM%BNQ%@wCq#qJU>1!I=o%cCf9z-~FP#Ddyaiw?FTiu*L>0ah4JE6=AAY2I@h)0I}(2^JHMoHvW8Pg1y}lw8q+xrW5;$+$&6ei6?(UiSQ%gf z@C!X%9T^=Y4yRaGulFtLE6)(pAf5mmegzXXkbIdNoQm06n*TXTG$_CCp5`mwnlj2s z^W;XRV5{Uh4|FwbB;ZR>5*Wrvs10!gFu4JYf>Xa**G$F=XU2wp2W3FreHDo4*8h|i4ip!;;~!(Jdt$0Mv^gZ&(WoMPn>GNWxlkHUfZn3JBB_eq1%^vPBWRhL_kEt&z< zDI($rZvV^6vX^7_$sFxEvSGn>1jxax-wmz}=Z1Mk{Jv4*wQ5tPkUMWSNhI!D81tVm KmX9Fjf8(G1g6&WM literal 0 HcmV?d00001 diff --git a/api/test/unit/utils/test_waveform.py b/api/test/unit/utils/test_waveform.py index 6929884f2ea..1bb538829a8 100644 --- a/api/test/unit/utils/test_waveform.py +++ b/api/test/unit/utils/test_waveform.py @@ -4,7 +4,7 @@ import pook import pytest -from api.utils.waveform import UA_STRING, download_audio +from api.utils.waveform import UA_STRING, download_audio, generate_waveform _MOCK_AUDIO_PATH = Path(__file__).parent / ".." / ".." / "factory" @@ -30,3 +30,17 @@ def test_download_audio_sends_ua_header(mock_request): download_audio("http://example.org", "abcd-1234") # ``pook`` will only match if UA header is sent. assert mock_request.total_matches > 0 + + +@pytest.mark.parametrize( + "audio, duration", + [ + ("sample-audio.mp3", 26000), + ("sample-short-audio.mp3", 45), + ], +) +def test_generate_waveform(audio, duration): + file_name = str(_MOCK_AUDIO_PATH.joinpath(audio)) + + json_out = generate_waveform(file_name, duration) + assert len(json_out) > 0