-
Notifications
You must be signed in to change notification settings - Fork 0
/
training_data.py
44 lines (33 loc) · 1.16 KB
/
training_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# For English Data Only
# Properties can be changed by manipulating the globals: _LANG, _DATA_DIR
import numpy as np
import scipy.signal as signal
from scipy.io import wavfile
_LANG = 'english'
_DATA_DIR = './data'
def main():
pass
## UTILITY ####################################################################
def normalize_data(x):
return np.float64(x) / 16.
def resample(x, fs, rate=22050):
'''Resample MONO signal to rate.
returns (resmaple(signal), new_fs)'''
fac = rate / fs
return (signal.resample(x, x.size * fac), rate)
## TRAINING ##################################################################
def gen_get_data(indices, lang, fs):
'''Return an iterator over the parsed data streams'''
for i in indices:
x, dfs = get_data(i, lang)
if dfs != fs:
x, _ = resample(x, dfs, fs)
yield x, fs
def get_data(k, language, norm_fun=normalize_data):
'''Extract the signal from a wavfile, applying norm_fun to convert
to float'''
fs, data = wavfile.read(_DATA_DIR + '/' + language + str(int(k)) + '.mov.wav')
data = norm_fun(data)
return data, fs
if __name__ == '__main__':
main()