-
Notifications
You must be signed in to change notification settings - Fork 0
/
diarize.py
62 lines (51 loc) · 2.25 KB
/
diarize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
from pyannote.database.util import load_rttm
from pyannote.core import Segment, notebook
from pyannote.audio import Pipeline
from pyannote.audio import Audio
from huggingface_hub import notebook_login
import numpy as np
timestamps = np.asarray([0, 3540, 4110, 4200, 6590, 7160, 7280, 7340, 7460, 7670, 7730, 7790, 7850, 7880, 7910, 8030, 8450, 9890, 9920, 10040, 10100, 10280, 10370, 10490, 10610, 10670, 10880, 15770, 15830, 16400, 16430])
timestamps = timestamps / 1000
transcripts = ['okay', 'okay', 'okay', ' I hope', ' I hope', ' I hope', ' I hope', ' I hope', ' I hope', ' I hope', ' I hope', ' I hope', ' I hope', ' I hope this', ' I hope this is', ' I hope this is working', ' I hope this is', ' I hope this is working', ' I hope this is working', ' I hope this is working', ' I hope this is working', ' I hope this is working', ' I hope this is working good', ' I hope this is working good to', ' I hope this is working good to see', ' I hope this is working good to see that', ' exit', ' exit', 'exit']
timestamps_speaker = []
speakers = []
pipeline = Pipeline.from_pretrained('pyannote/speaker-diarization', use_auth_token="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")
diarization = pipeline("../Test-Sanya.wav", num_speakers=2)
print(diarization.labels())
for speaker in diarization.labels():
times = diarization.label_timeline(speaker)
for time in times:
print(speaker, time)
print(time.start)
timestamps_speaker.append(time.start)
speakers.append(speaker)
print(timestamps_speaker)
print(speakers)
index = np.argsort(timestamps_speaker)
print(index)
print(np.asarray(timestamps_speaker)[index])
print(np.asarray(speakers)[index])
i = 0
sporder = []
for time in timestamps:
if i < len(timestamps_speaker)-1:
if time < timestamps_speaker[i+1]:
print(speakers[i])
sporder.append(speakers[i])
else:
i += 1
print(speakers[i])
sporder.append(speakers[i])
else:
print(speakers[i])
sporder.append(speakers[i])
print(sporder)
order = {}
for i in range(len(sporder)-1):
if sporder[i] != sporder[i+1]:
order[sporder[i]] = transcripts[i-2]
print(i)
last = sporder[-1]
if last not in order:
order[last] = transcripts[-1]
print(order)