-
Notifications
You must be signed in to change notification settings - Fork 7
/
mix_audio.py
85 lines (68 loc) · 3.33 KB
/
mix_audio.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import os
import numpy as np
from td_utils import graph_spectrogram
def get_random_time_segment(segment_ms, total_ms=10000.0):
segment_start = np.random.randint(low=0, high=total_ms-segment_ms) # Make sure segment doesn't run past the 10sec background
segment_end = segment_start + segment_ms - 1
return (segment_start, segment_end)
def is_overlapping(segment_time, previous_segments):
segment_start, segment_end = segment_time
overlap = False
for previous_start, previous_end in previous_segments:
if segment_start <= previous_end and segment_end >= previous_start:
overlap = True
return overlap
def insert_audio_clip(background, audio_clip, previous_segments):
total_ms = len(background)
segment_ms = len(audio_clip)
segment_time = get_random_time_segment(segment_ms, total_ms)
count = 0
while is_overlapping(segment_time, previous_segments):
segment_time = get_random_time_segment(segment_ms, total_ms)
count += 1
if count > 50 :
return background, None
previous_segments.append(segment_time)
new_background = background.overlay(audio_clip, position = segment_time[0])
return new_background, segment_time
def insert_ones(y, segment_time, total_ms=10000.0):
Ty = y.shape[1]
segment_end_y = int(segment_time[1] * Ty / total_ms)
segment_len = int((segment_time[1]-segment_time[0]) * Ty / total_ms)
for i in range(segment_end_y + 1, segment_end_y + segment_len + 1):
if i < Ty:
y[0, i] = 1
return y
def create_training_data(background, activates, negatives, filename, kernel=15, stride=4):
background.export('./data/tmp.wav', format="wav")
mel = graph_spectrogram('./data/tmp.wav', minus=False)
Ty = int((mel.shape[1]-kernel)/stride + 1)
y = np.zeros((1, Ty))
total_ms = len(background)
previous_segments = []
input_ms = 0
while((input_ms/total_ms)<0.5):
number_of_activates = np.random.randint(0, 4)
random_indices = np.random.randint(len(activates), size=number_of_activates)
random_activates = [activates[i] for i in random_indices]
for random_activate in random_activates:
random_activate += np.random.randint(-2,5)
background, segment_time = insert_audio_clip(background, random_activate, previous_segments)
if segment_time is not None:
# print('active')
segment_start, segment_end = segment_time
input_ms += (segment_end - segment_start)
y = insert_ones(y, segment_time=segment_time, total_ms=total_ms)
number_of_negatives = np.random.randint(0, 4)
random_indices = np.random.randint(len(negatives), size=number_of_negatives)
random_negatives = [negatives[i] for i in random_indices]
for random_negative in random_negatives:
random_negative += np.random.randint(-2,5)
background, segment_time = insert_audio_clip(background, random_negative, previous_segments)
if segment_time is not None:
# print('negative')
segment_start, segment_end = segment_time
input_ms += (segment_end - segment_start)
file_handle = background.export(filename, format="wav")
x, x_minus = graph_spectrogram(filename)
return x, x_minus, y