-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfull_system.py
executable file
·298 lines (235 loc) · 10.2 KB
/
full_system.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
import kaldiio
import sys, os
import random
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import torch.optim as optim
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import pandas as pd
import pickle
from pickle import Unpickler
from tdnn import TDNN
from collections import defaultdict
import sklearn.discriminant_analysis as lda
import plda
from collections import defaultdict
import glob
from torchsummary import summary
from sklearn.decomposition import PCA
random.seed(1)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)
in_set = ['ENG', 'GER', 'ICE', 'FRE', 'SPA', 'ARA', 'RUS', 'BEN', 'KAS', 'GRE', 'CAT', 'KOR', 'TUR', 'TAM', 'TEL', 'CHI', 'TIB', 'JAV', 'EWE', 'HAU', 'LIN', 'YOR', 'HUN', 'HAW', 'MAO', 'ITA', 'URD', 'SWE', 'PUS', 'GEO', 'HIN', 'THA']
out_of_set = ['DUT', 'HEB', 'UKR', 'BUL', 'PER', 'ALB', 'UIG', 'MAL', 'BUR', 'IBA', 'ASA', 'AKU', 'ARM', 'HRV', 'FIN', 'JPN', 'NOR', 'NEP', 'RUM']
print("\n===================================LOADING MFCC+PITCH FROM ARK==================================\n")
data = []
for i,lang in enumerate(in_set + out_of_set, 0):
print(lang, "(In-set)" if lang in in_set else "(Out-of-set)")
filepath = './feature-subset/' + lang + '/raw_mfcc_pitch_' + lang + '.1.ark'
for key, numpy_array in kaldiio.load_ark(filepath):
inputs = torch.from_numpy(np.expand_dims(numpy_array, axis=0))
labels = torch.from_numpy(np.array([i if lang in in_set else (-i+len(in_set)-1) ]))
data.append((inputs, labels))
print("\n===================================SPLITTING DATA INTO 3 SETS: train1, train2, test==================================\n")
'''
Train1 (tdnn): 95% of in-set
Train2 (lda/plda): 80% of out-of-set
Test: 5% of in-set + 20% out-of-set
'''
random.shuffle(data)
data_concatenated = dict()
for iter,i in enumerate(data):
label = i[1].numpy()[0]
mfcc = np.squeeze(i[0].numpy(), axis=0)
if label in data_concatenated:
data_concatenated[label].append(mfcc)
else:
data_concatenated[label] = [mfcc]
for i in data_concatenated:
data_concatenated[i] = np.vstack(data_concatenated[i])
del data
def chunkify_tensor(tensor, size=400):
return torch.split(tensor, size, dim=1)[:-1] # except last one bc that isn't the right size
train1, train2, test = [], [], []
for i in data_concatenated:
label = torch.from_numpy(np.array([i]))
mfcc = torch.from_numpy(np.expand_dims(data_concatenated[i], axis=0))
chunks = chunkify_tensor(mfcc)
if i >= 0:
cutoff = int(len(chunks) * 0.95)
for chunk in chunks[:cutoff]:
train1.append((chunk.to(device), label.to(device)))
for chunk in chunks[cutoff:]:
test.append((chunk.to(device), label.to(device)))
else:
cutoff = int(len(chunks) * 0.8)
for chunk in chunks[:cutoff]:
train2.append((chunk.to(device), label.to(device)))
for chunk in chunks[cutoff:]:
test.append((chunk.to(device), label.to(device)))
del data_concatenated
random.shuffle(train1)
random.shuffle(train2)
random.shuffle(test)
for i in test:
assert(test[0][0].size(dim=0) == i[0].size(dim=0))
assert(test[0][0].size(dim=1) == i[0].size(dim=1))
assert(test[0][0].size(dim=2) == i[0].size(dim=2))
random.shuffle(test)
test = test[:3000] # subset for demo purposes
print("\n===================================PREPARING TDNN MODELS==================================\n")
class Net(nn.Module):
def __init__(self, in_size, num_classes):
super().__init__()
self.layer1 = TDNN(input_dim=in_size, output_dim=256, context_size=3)
self.layer2 = TDNN(input_dim=256, output_dim=256, context_size=3, dilation=1)
self.layer3 = TDNN(input_dim=256, output_dim=256, context_size=3, dilation=1)
self.layer4 = TDNN(input_dim=256, output_dim=256, context_size=1)
self.layer5 = TDNN(input_dim=256, output_dim=256, context_size=1)
self.final_layer = TDNN(input_dim=256, output_dim=num_classes, context_size=1)
def forward(self, x):
forward_pass = nn.Sequential(
self.layer1,
nn.ReLU(),
self.layer2,
nn.ReLU(),
self.layer3,
nn.ReLU(),
self.layer4,
nn.ReLU(),
self.layer5,
nn.ReLU(),
self.final_layer)
return forward_pass(x)
LOAD_PATH = './saved-models/tdnn-final-submission'
print('Loading model: first copy for softmax + threshold')
infile = open(LOAD_PATH + ".pickle", "rb")
net = Unpickler(infile).load()
infile.close()
print('Loading model: second copy for LDA + PLDA')
infile2 = open(LOAD_PATH + ".pickle", "rb")
net2 = Unpickler(infile2).load()
net2.final_layer = nn.Identity()
infile2.close()
print("\n===================================TEST TEST TEST==================================\n")
'''
Measure 4 things:
- IS detection (threshold): out of all IS samples, how many were labeled as IS
- OOS detection (threshold): out of all OOS samples, how many were labeled as OOS
- IS ID (softmax): out of all IS samples, how many were labeled as IS and given the correct language ID
- OOS ID (plda): out of correclty detected OOS, How many are correctly labeled by plda
'''
lda_load_dir = './saved-lda/'
plda_load_dir = './saved-plda/'
def print_accuracies(y_correct, y_pred):
assert(len(y_correct) == len(y_pred))
accuracies = {
'IS_DET': {'correct': 0, 'total': 0},
'OOS_DET': {'correct': 0, 'total': 0},
'IS_ID': {'correct': 0, 'total': 0},
'OOS_ID': {'correct': 0, 'total': 0}
}
for i in range(len(y_correct)):
if y_correct[i] >= 0:
accuracies['IS_DET']['total'] += 1
if y_pred[i] >= 0:
accuracies['IS_DET']['correct'] += 1
accuracies['IS_ID']['total'] += 1
if y_pred[i] == y_correct[i]:
accuracies['IS_ID']['correct'] += 1
else:
accuracies['OOS_DET']['total'] += 1
if y_pred[i] < 0:
accuracies['OOS_DET']['correct'] += 1
accuracies['OOS_ID']['total'] += 1
if y_pred[i] == y_correct[i]:
accuracies['OOS_ID']['correct'] += 1
for key in sorted(accuracies.keys()):
try:
print('\t', key, ':', accuracies[key]['correct']/accuracies[key]['total'])
except:
pass
return accuracies
def get_mode(lst):
counter = defaultdict(int)
for i in lst:
counter[i] += 1
maxes = [k for k, v in counter.items() if v == max(counter.values())]
return random.choice(maxes)
def get_mode2(lst):
astranspose = np.transpose(np.array(lst)).tolist()
return [get_mode(i) for i in astranspose]
def get_lda_plda_layers(lda_load_directory, plda_load_directory):
lda_layers = []
for lda_layer in sorted(glob.glob(lda_load_directory + '*.pk')):
lda_layers.append(pickle.load(open(lda_layer, 'rb')))
print(len(lda_layers), "lda layers loaded")
plda_layers = []
for plda_layer in sorted(glob.glob(plda_load_directory + '*.pk')):
plda_layers.append(pickle.load(open(plda_layer, 'rb')))
print(len(plda_layers), "plda layers loaded")
print()
return lda_layers, plda_layers
do_testing = True
if do_testing:
lda_layers, plda_layers = get_lda_plda_layers(lda_load_dir, plda_load_dir)
net = net.to(device)
net2 = net2.to(device)
random.shuffle(test)
all_paths = []
with torch.no_grad():
save_predicted1, save_conf, save_flattened, save_predicted2, save_labels = [], [], [], [], []
for i, data in enumerate(test,0):
inputs, labels = data[0].to(device), data[1].to(device)
outputs = net(inputs)
outputs = torch.mean(outputs, 1)
outputs = F.softmax(outputs, dim=1)
predicted1 = outputs.argmax(1, keepdim=True)[0]
conf = outputs.amax(1, keepdim=True).item()
output_net2 = net2(inputs)
flattened = np.squeeze(torch.flatten(output_net2)).detach().cpu().numpy()
save_predicted1.append(predicted1)
save_conf.append(conf)
save_flattened.append(flattened)
save_labels.append(labels.to('cpu').numpy()[0])
if i % 1000 == 0 and i != 0:
print("iter", i, 'of', len(test))
ensemble_predictions = []
for layer_i in range(len(lda_layers)):
lda_output = lda_layers[layer_i].transform(save_flattened)
plda_output, _ = plda_layers[layer_i].predict(lda_output)
ensemble_predictions.append(plda_output)
predicted2 = get_mode2(ensemble_predictions)
save_predicted2 = predicted2
for i in range(len(save_predicted1)):
all_paths.append((save_predicted1[i], save_conf[i], save_predicted2[i], save_labels[i]))
save_predicted1, save_conf, save_flattened, save_predicted2, save_labels = [], [], [], [], []
# dupe code as above
ensemble_predictions = []
for layer_i in range(len(lda_layers)):
lda_output = lda_layers[layer_i].transform(save_flattened)
plda_output, _ = plda_layers[layer_i].predict(lda_output)
ensemble_predictions.append(plda_output)
predicted2 = get_mode2(ensemble_predictions)
save_predicted2 = predicted2
for i in range(len(save_predicted1)):
all_paths.append((save_predicted1[i], save_conf[i], save_predicted2[i], save_labels[i]))
save_predicted1, save_conf, save_flattened, save_predicted2, save_labels = [], [], [], [], []
print()
assert(len(all_paths) == len(test))
accuracies_plot = []
y_correct = [i[3] for i in all_paths]
for threshold in [i/100 for i in range(0,105,5)]: # 0, 0.05, 0.1, 0.15, ... , 0.95, 1.0
y_pred = []
for i in all_paths:
predicted = i[0]
if i[1] < threshold:
predicted = i[2]
y_pred.append(predicted)
print(f'Threshold: {threshold}')
accuracies = print_accuracies(y_correct, y_pred)
accuracies_plot.append((threshold, accuracies))
print()