Skip to content

Commit

Permalink
Vad options. Check for last chunk.
Browse files Browse the repository at this point in the history
  • Loading branch information
boocmp committed Aug 22, 2024
1 parent 2741d3a commit b8bdd8b
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 14 deletions.
28 changes: 16 additions & 12 deletions src/stream_transcriber.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,15 @@ def __init__(self):
self._vad_detected_offset = 0
self._speech_audio_buffers = []
self._speech_timestamps = []
self._last_chunk_received = False

self._vad_options = VadOptions(
min_speech_duration_ms=60, min_silence_duration_ms=60
min_speech_duration_ms=125, min_silence_duration_ms=125, speech_pad_ms=125
)

def consume(self, stream_data: bytes):
self._last_chunk_received = len(stream_data) == 0

self._raw_stream_data += stream_data
try:
raw_audio_buffer = decode_audio(io.BytesIO(self._raw_stream_data))
Expand All @@ -64,16 +67,17 @@ def consume(self, stream_data: bytes):
if not speech_timestamps:
return

# remove the speech chunks which probably are not ended
while (
speech_timestamps
and speech_timestamps[-1]["end"]
> len(raw_audio_buffer) - self._vad_options.min_silence_duration_ms * 16
):
del speech_timestamps[-1]
if not self._last_chunk_received:
# remove the speech chunks which probably are not ended
while (
speech_timestamps
and speech_timestamps[-1]["end"]
> len(raw_audio_buffer) - self._vad_options.min_silence_duration_ms * 16
):
del speech_timestamps[-1]

if not speech_timestamps:
return
if not speech_timestamps:
return

self._vad_detected_offset += speech_timestamps[-1]["end"]

Expand Down Expand Up @@ -105,7 +109,7 @@ def consume(self, stream_data: bytes):

[print(buf2secs(x)) for x in self._speech_audio_buffers]

print(len2secs(self._raw_stream_data_duration), self._vad_detected_offset)
print(self._raw_stream_data_duration, len2secs(self._vad_detected_offset))

def should_transcribe(self):
if not self._speech_audio_buffers:
Expand All @@ -114,7 +118,7 @@ def should_transcribe(self):
return True
if self._raw_stream_data_duration > 3:
return True
return False
return self._last_chunk_received

def get_speech_audio(self) -> bytes:
assert self.should_transcribe()
Expand Down
2 changes: 0 additions & 2 deletions src/stt_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,6 @@ async def handleUpstream(
async with ipc.client.Publisher(pair) as pipe:
try:
async for chunk in request.stream():
if len(chunk) == 0:
break
stream.consume(chunk)

while stream.should_transcribe():
Expand Down

0 comments on commit b8bdd8b

Please sign in to comment.