1
+ from utils_vad import *
2
+ import sys
3
+ import os
4
+ from pathlib import Path
5
+ sys .path .append ('/home/keras/notebook/nvme_raid/adamnsandle/silero_mono/pipelines/align/bin/' )
6
+ from align_utils import load_audio_norm
7
+ import torch
8
+ import pandas as pd
9
+ import numpy as np
10
+ sys .path .append ('/home/keras/notebook/nvme_raid/adamnsandle/silero_mono/utils/' )
11
+ from open_stt import soundfile_opus as sf
12
+
13
+ def split_save_audio_chunks (audio_path , model_path , save_path = None , device = 'cpu' , absolute = True , max_duration = 10 , adaptive = False , ** kwargs ):
14
+
15
+ if not save_path :
16
+ save_path = str (Path (audio_path ).with_name ('after_vad' ))
17
+ print (f'No save path specified! Using { save_path } to save audio chunks!' )
18
+
19
+ SAMPLE_RATE = 16000
20
+ if type (model_path ) == str :
21
+ #print('Loading model...')
22
+ model = init_jit_model (model_path , device )
23
+ else :
24
+ #print('Using loaded model')
25
+ model = model_path
26
+ save_name = Path (audio_path ).stem
27
+ audio , sr = load_audio_norm (audio_path )
28
+ wav = torch .tensor (audio )
29
+ if adaptive :
30
+ speech_timestamps = get_speech_ts_adaptive (wav , model , device = device , ** kwargs )
31
+ else :
32
+ speech_timestamps = get_speech_ts (wav , model , device = device , ** kwargs )
33
+
34
+ full_save_path = Path (save_path , save_name )
35
+ if not os .path .exists (full_save_path ):
36
+ os .makedirs (full_save_path , exist_ok = True )
37
+
38
+ chunks = []
39
+ if not speech_timestamps :
40
+ return pd .DataFrame ()
41
+ for ts in speech_timestamps :
42
+ start_ts = int (ts ['start' ])
43
+ end_ts = int (ts ['end' ])
44
+
45
+ for i in range (start_ts , end_ts , max_duration * SAMPLE_RATE ):
46
+ new_start = i
47
+ new_end = min (end_ts , i + max_duration * SAMPLE_RATE )
48
+ duration = round ((new_end - new_start ) / SAMPLE_RATE , 2 )
49
+ chunk_path = Path (full_save_path , f'{ save_name } _{ new_start } -{ new_end } .opus' )
50
+ chunk_path = chunk_path .absolute () if absolute else chunk_path
51
+ sf .write (str (chunk_path ), audio [new_start : new_end ], 16000 , format = 'OGG' , subtype = 'OPUS' )
52
+ chunks .append ({'audio_path' : chunk_path ,
53
+ 'text' : '' ,
54
+ 'duration' : duration ,
55
+ 'domain' : '' })
56
+ return pd .DataFrame (chunks )
0 commit comments