From dbd4356759ad958d3317621b33da9be13e3fedae Mon Sep 17 00:00:00 2001
From: pajowu <git@ca.pajowu.de>
Date: Fri, 17 Nov 2023 17:05:59 +0100
Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20Fix=20crash=20in=20speaker=20ide?=
 =?UTF-8?q?ntification=20if=20segment=20starts=20less=20than=200.1s=20befo?=
 =?UTF-8?q?re=20audio=20end?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 worker/transcribee_worker/identify_speakers.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/worker/transcribee_worker/identify_speakers.py b/worker/transcribee_worker/identify_speakers.py
index 610573b0..f1bf2a13 100644
--- a/worker/transcribee_worker/identify_speakers.py
+++ b/worker/transcribee_worker/identify_speakers.py
@@ -35,7 +35,14 @@ def time_to_sample(time: float | None):
 
         segments = [
             (
-                time_to_sample(child.children[0].start),
+                min(
+                    time_to_sample(child.children[0].start),
+                    # we always use at least 0.1s,
+                    # otherwise the fingerprinting model explodes sometimes
+                    # since the start of the segment might be less than 0.1s
+                    # from end of the audio, we use this as a safety
+                    len(audio) - time_to_sample(0.1),
+                ),
                 max(
                     time_to_sample(child.children[-1].end),
                     # we always use at least 0.1s,