-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbatch.py
163 lines (146 loc) · 6.02 KB
/
batch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import numpy as np
import matplotlib.pyplot as plt
from data import *
class ANN():
def __init__(self, X, y, parameters, learning_rate, n_iter, lamda):
#data set
self.X=X
#laebels
self.y=y
#learning rate
self.learning_rate=learning_rate
#number of iterations
self.epoch = n_iter
self.lamda=lamda
#number of neurons in the input layers
self.n_layers = len(parameters)
#Counts number of nodes in each layer
self.sizes = [layer[0] for layer in parameters]
#Activation functions for each layer.
self.fs =[layer[1] for layer in parameters]
#Derivatives of activation functions
self.fprimes = [layer[2] for layer in parameters]
#builds the network
self.setup_network()
#contains train, validation and test errors, respectively
self.train_error = []
self.valid_error = []
self.test_error = []
def setup_network(self):
#weghit of the layers
self.weights=[]
#gradient of weight
self.w_grad = []
#gradient of bias
self.b_grad = []
# bias, input, outputs vectors of layers
self.biases=[]
self.inputs=[]
self.outputs=[]
#error at each layer
self.delta=[]
##weights are randomly initialized, biased are initialized by 1
for layer in range(self.n_layers-1):
n = self.sizes[layer]
m = self.sizes[layer+1]
np.random.seed(0)
self.weights.append(np.random.normal(0,0.01, (m,n)))
self.biases.append(np.random.normal(0,0.01,(m,1)))
#partial derivatives for weights and biases whic are initialized to zero
self.w_grad.append(np.zeros((m,n)))
self.b_grad.append(np.zeros((m,1)))
self.inputs.append(np.zeros((n,1)))
self.outputs.append(np.zeros((n,1)))
#delta is the error term
self.delta.append(np.zeros((n,1)))
#The last layer are initialized to zero separately
n = self.sizes[-1]
self.inputs.append(np.zeros((n,1)))
self.outputs.append(np.zeros((n,1)))
self.delta.append(np.zeros((n,1)))
#gets the data and propagates the data from input to the output layer
def feedforward(self, x):
#chacks the dimension of the data
if x.ndim == 1:
x.resize(len(x),1)
self.inputs[0]=x #Just for clarification
self.outputs[0]=x
#multipling the input data by the weithts and biased
for i in range(1,self.n_layers):
self.inputs[i]=np.dot(self.weights[i-1],self.outputs[i-1])+self.biases[i-1]
self.outputs[i]=self.fs[i](self.inputs[i])
#return the output which is s single number
return self.outputs[-1]
#updates the gradients for weights and biased
def update_gradient(self,x,y):
output = self.feedforward(x)
self.delta[-1] =self.fprimes[-1](self.outputs[-1])*(output-y)
n=self.n_layers-2
for i in xrange(n,0,-1):
#computes the error term
self.delta[i] = self.fprimes[i](self.inputs[i])*np.dot(self.weights[i].T, self.delta[i+1])
#Compute the desired partial derivatives for weights and biased
self.w_grad[i]+=np.outer(self.delta[i+1],self.outputs[i])
self.b_grad[i]+= self.delta[i+1]
#Compute the desired partial derivatives for weights and biased in the first layer
self.w_grad[0]+= np.outer(self.delta[1],self.outputs[0])
self.b_grad[0]+=self.delta[1]
#since we are using batch processing at each
#iteration set the gradient and biased to zero
def set_w_to_zero(self):
for i in xrange(self.n_layers-1):
self.w_grad[i]=0
self.b_grad[i]=0
#updating weights and biased
def update_weights(self):
n=self.n_layers-2
k=(1.0/len(self.y))
lr = self.learning_rate
for i in xrange(n,0,-1):
#updates the weights and the biased
self.weights[i] = self.weights[i] - k*lr*self.w_grad[i] -lr*self.lamda*self.weights[i]
self.biases[i] = self.biases[i] - k*lr*self.b_grad[i]
#updating the first layer
self.weights[0] = self.weights[0] - k*lr*self.w_grad[0]-lr*self.lamda*self.weights[0]
self.biases[0] = self.biases[0] - k*lr*self.b_grad[0]
#training the model
def train(self, data):
for repeat in range(self.epoch):
for row in range(len(self.X)):
x=self.X[row]
y=self.y[row]
self.update_gradient(x,y)
#updates the weights
self.update_weights()
#set gradients to zero
self.set_w_to_zero()
#calculates the error for traing, validation and testing set, respectively
self.train_error.append(cost(self.predict(data.tr_set), data.tr_labels))
self.valid_error.append( cost(self.predict(data.val_set), data.val_labels))
self.test_error.append( cost(self.predict(data.ts_set), data.ts_labels))
print repeat, np.column_stack((self.train_error[-1], self.valid_error[-1], self.test_error[-1]))
return self.train_error, self.valid_error, self.test_error, self.predict(data.tr_set), self.predict(data.val_set), self.predict(data.ts_set)
#predicts the targets of testing set
def predict(self, X):
n = len(X)
m = self.sizes[-1]
prediction = np.empty((n,m))
for i in range(len(X)):
prediction[i] = self.feedforward(X[i])
return prediction
#sigmoid function
def sigmoid(x):
return 1.0/(1.0+np.exp(-x))
#gradient of sigmoid function
def sigmoid_grad(x):
return sigmoid(x)*(1-sigmoid(x))
#identity function
def identity(x):
return x
#gradient of identity functio which equals to 1
def identity_grad(x):
return 1
#calculating sum of square errors
#takes two inputs, actual and the predicted labels
def cost(predicted, labels):
return np.sum((labels - predicted)**2)/len(labels)