-
Notifications
You must be signed in to change notification settings - Fork 0
/
a3_gmm.py
107 lines (79 loc) · 3.46 KB
/
a3_gmm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
from sklearn.model_selection import train_test_split
import numpy as np
import os, fnmatch
import random
dataDir = '/u/cs401/A3/data/'
class theta:
def __init__(self, name, M=8,d=13):
self.name = name
self.omega = np.zeros((M,1))
self.mu = np.zeros((M,d))
self.Sigma = np.zeros((M,d))
def log_b_m_x( m, x, myTheta, preComputedForM=[]):
''' Returns the log probability of d-dimensional vector x using only component m of model myTheta
See equation 1 of the handout
As you'll see in tutorial, for efficiency, you can precompute something for 'm' that applies to all x outside of this function.
If you do this, you pass that precomputed component in preComputedForM
'''
print ( 'TODO' )
def log_p_m_x( m, x, myTheta):
''' Returns the log probability of the m^{th} component given d-dimensional vector x, and model myTheta
See equation 2 of handout
'''
print ( 'TODO' )
def logLik( log_Bs, myTheta ):
''' Return the log likelihood of 'X' using model 'myTheta' and precomputed MxT matrix, 'log_Bs', of log_b_m_x
X can be training data, when used in train( ... ), and
X can be testing data, when used in test( ... ).
We don't actually pass X directly to the function because we instead pass:
log_Bs(m,t) is the log probability of vector x_t in component m, which is computed and stored outside of this function for efficiency.
See equation 3 of the handout
'''
print( 'TODO' )
def train( speaker, X, M=8, epsilon=0.0, maxIter=20 ):
''' Train a model for the given speaker. Returns the theta (omega, mu, sigma)'''
myTheta = theta( speaker, M, X.shape[1] )
print ('TODO')
return myTheta
def test( mfcc, correctID, models, k=5 ):
''' Computes the likelihood of 'mfcc' in each model in 'models', where the correct model is 'correctID'
If k>0, print to stdout the actual speaker and the k best likelihoods in this format:
[ACTUAL_ID]
[SNAME1] [LOGLIK1]
[SNAME2] [LOGLIK2]
...
[SNAMEK] [LOGLIKK]
e.g.,
S-5A -9.21034037197
the format of the log likelihood (number of decimal places, or exponent) does not matter
'''
bestModel = -1
print ('TODO')
return 1 if (bestModel == correctID) else 0
if __name__ == "__main__":
trainThetas = []
testMFCCs = []
print('TODO: you will need to modify this main block for Sec 2.3')
d = 13
k = 5 # number of top speakers to display, <= 0 if none
M = 8
epsilon = 0.0
maxIter = 20
# train a model for each speaker, and reserve data for testing
for subdir, dirs, files in os.walk(dataDir):
for speaker in dirs:
print( speaker )
files = fnmatch.filter(os.listdir( os.path.join( dataDir, speaker ) ), '*npy')
random.shuffle( files )
testMFCC = np.load( os.path.join( dataDir, speaker, files.pop() ) )
testMFCCs.append( testMFCC )
X = np.empty((0,d))
for file in files:
myMFCC = np.load( os.path.join( dataDir, speaker, file ) )
X = np.append( X, myMFCC, axis=0)
trainThetas.append( train(speaker, X, M, epsilon, maxIter) )
# evaluate
numCorrect = 0
for i in range(0,len(testMFCCs)):
numCorrect += test( testMFCCs[i], i, trainThetas, k )
accuracy = 1.0*numCorrect/len(testMFCCs)