From dbd4356759ad958d3317621b33da9be13e3fedae Mon Sep 17 00:00:00 2001 From: pajowu Date: Fri, 17 Nov 2023 17:05:59 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20Fix=20crash=20in=20speaker=20ide?= =?UTF-8?q?ntification=20if=20segment=20starts=20less=20than=200.1s=20befo?= =?UTF-8?q?re=20audio=20end?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- worker/transcribee_worker/identify_speakers.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/worker/transcribee_worker/identify_speakers.py b/worker/transcribee_worker/identify_speakers.py index 610573b0..f1bf2a13 100644 --- a/worker/transcribee_worker/identify_speakers.py +++ b/worker/transcribee_worker/identify_speakers.py @@ -35,7 +35,14 @@ def time_to_sample(time: float | None): segments = [ ( - time_to_sample(child.children[0].start), + min( + time_to_sample(child.children[0].start), + # we always use at least 0.1s, + # otherwise the fingerprinting model explodes sometimes + # since the start of the segment might be less than 0.1s + # from end of the audio, we use this as a safety + len(audio) - time_to_sample(0.1), + ), max( time_to_sample(child.children[-1].end), # we always use at least 0.1s,