-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
178 lines (143 loc) · 5.86 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
import sys
import numpy as np
from matplotlib import pyplot
sys.path.append('..')
def displayData(X, example_width=None, figsize=(10, 10)):
"""
Displays 2D data stored in X in a nice grid.
"""
# Compute rows, cols
if X.ndim == 2:
m, n = X.shape
elif X.ndim == 1:
n = X.size
m = 1
X = X[None] # Promote to a 2 dimensional array
else:
raise IndexError('Input X should be 1 or 2 dimensional.')
example_width = example_width or int(np.round(np.sqrt(n)))
example_height = n / example_width
# Compute number of items to display
display_rows = int(np.floor(np.sqrt(m)))
display_cols = int(np.ceil(m / display_rows))
fig, ax_array = pyplot.subplots(display_rows, display_cols, figsize=figsize)
fig.subplots_adjust(wspace=0.025, hspace=0.025)
ax_array = [ax_array] if m == 1 else ax_array.ravel()
for i, ax in enumerate(ax_array):
# Display Image
h = ax.imshow(X[i].reshape(example_width, example_width, order='F'),
cmap='Greys', extent=[0, 1, 0, 1])
ax.axis('off')
def predict(Theta1, Theta2, X):
"""
Predict the label of an input given a trained neural network
Outputs the predicted label of X given the trained weights of a neural
network(Theta1, Theta2)
"""
# Useful values
m = X.shape[0]
num_labels = Theta2.shape[0]
# You need to return the following variables correctly
p = np.zeros(m)
h1 = sigmoid(np.dot(np.concatenate([np.ones((m, 1)), X], axis=1), Theta1.T))
h2 = sigmoid(np.dot(np.concatenate([np.ones((m, 1)), h1], axis=1), Theta2.T))
p = np.argmax(h2, axis=1)
return p
def debugInitializeWeights(fan_out, fan_in):
"""
Initialize the weights of a layer with fan_in incoming connections and fan_out outgoings
connections using a fixed strategy. This will help you later in debugging.
Note that W should be set a matrix of size (1+fan_in, fan_out) as the first row of W handles
the "bias" terms.
Parameters
----------
fan_out : int
The number of outgoing connections.
fan_in : int
The number of incoming connections.
Returns
-------
W : array_like (1+fan_in, fan_out)
The initialized weights array given the dimensions.
"""
# Initialize W using "sin". This ensures that W is always of the same values and will be
# useful for debugging
W = np.sin(np.arange(1, 1 + (1+fan_in)*fan_out))/10.0
W = W.reshape(fan_out, 1+fan_in, order='F')
return W
def computeNumericalGradient(J, theta, e=1e-4):
"""
Computes the gradient using "finite differences" and gives us a numerical estimate of the
gradient.
Parameters
----------
J : func
The cost function which will be used to estimate its numerical gradient.
theta : array_like
The one dimensional unrolled network parameters. The numerical gradient is computed at
those given parameters.
e : float (optional)
The value to use for epsilon for computing the finite difference.
Notes
-----
The following code implements numerical gradient checking, and
returns the numerical gradient. It sets `numgrad[i]` to (a numerical
approximation of) the partial derivative of J with respect to the
i-th input argument, evaluated at theta. (i.e., `numgrad[i]` should
be the (approximately) the partial derivative of J with respect
to theta[i].)
"""
numgrad = np.zeros(theta.shape)
perturb = np.diag(e * np.ones(theta.shape))
for i in range(theta.size):
loss1, _ = J(theta - perturb[:, i])
loss2, _ = J(theta + perturb[:, i])
numgrad[i] = (loss2 - loss1)/(2*e)
return numgrad
def checkNNGradients(nnCostFunction, lambda_=0):
"""
Creates a small neural network to check the backpropagation gradients. It will output the
analytical gradients produced by your backprop code and the numerical gradients
(computed using computeNumericalGradient). These two gradient computations should result in
very similar values.
Parameters
----------
nnCostFunction : func
A reference to the cost function implemented by the student.
lambda_ : float (optional)
The regularization parameter value.
"""
input_layer_size = 3
hidden_layer_size = 5
num_labels = 3
m = 5
# We generate some 'random' test data
Theta1 = debugInitializeWeights(hidden_layer_size, input_layer_size)
Theta2 = debugInitializeWeights(num_labels, hidden_layer_size)
# Reusing debugInitializeWeights to generate X
X = debugInitializeWeights(m, input_layer_size - 1)
y = np.arange(1, 1+m) % num_labels
# print(y)
# Unroll parameters
nn_params = np.concatenate([Theta1.ravel(), Theta2.ravel()])
# short hand for cost function
costFunc = lambda p: nnCostFunction(p, input_layer_size, hidden_layer_size,
num_labels, X, y, lambda_)
cost, grad = costFunc(nn_params)
numgrad = computeNumericalGradient(costFunc, nn_params)
# Visually examine the two gradient computations.The two columns you get should be very similar.
print(np.stack([numgrad, grad], axis=1))
print('The above two columns you get should be very similar.')
print('(Left-Your Numerical Gradient, Right-Analytical Gradient)\n')
# Evaluate the norm of the difference between two the solutions. If you have a correct
# implementation, and assuming you used e = 0.0001 in computeNumericalGradient, then diff
# should be less than 1e-9.
diff = np.linalg.norm(numgrad - grad)/np.linalg.norm(numgrad + grad)
print('If your backpropagation implementation is correct, then \n'
'the relative difference will be small (less than 1e-9). \n'
'Relative Difference: %g' % diff)
def sigmoid(z):
"""
Computes the sigmoid of z.
"""
return 1.0 / (1.0 + np.exp(-z))