-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathimplementations.py
252 lines (175 loc) · 7.19 KB
/
implementations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
# -*- coding: utf-8 -*-
"""mandatory function functions."""
import numpy as np
from costs import compute_loss_mse
def batch_iter(y, tx, batch_size, num_batches=None, shuffle=True):
"""
Generate a minibatch iterator for a dataset.
Takes as input two iterables (here the output desired values 'y' and the input data 'tx')
Outputs an iterator which gives mini-batches of `batch_size` matching elements from `y` and `tx`.
Data can be randomly shuffled to avoid ordering in the original data messing with the randomness of the minibatches.
Example of use :
for minibatch_y, minibatch_tx in batch_iter(y, tx, 32):
<DO-SOMETHING>
"""
data_size = len(y)
num_batches_max = int(np.ceil(data_size/batch_size))
if num_batches is None:
num_batches = num_batches_max
else:
num_batches = min(num_batches, num_batches_max)
if shuffle:
shuffle_indices = np.random.permutation(np.arange(data_size))
shuffled_y = y[shuffle_indices]
shuffled_tx = tx[shuffle_indices]
else:
shuffled_y = y
shuffled_tx = tx
for batch_num in range(num_batches):
start_index = batch_num * batch_size
end_index = min((batch_num + 1) * batch_size, data_size)
if start_index != end_index:
yield shuffled_y[start_index:end_index], shuffled_tx[start_index:end_index]
def compute_gradient(y, tx, w):
"""Compute the gradient."""
e=y-np.dot(tx,w)
grad=-np.dot(np.transpose(tx),e)/(len(y))
return grad
def least_square_GD(y, tx, initial_w,gamma, max_iters):
"""Gradient descent algorithm."""
# Define initial w
w = initial_w
for n_iter in range(max_iters):
# compute gradient and loss
G=compute_gradient(y,tx,w)
loss=compute_loss_mse(y,tx, w)
# update w by gradient
w=w-gamma*G
# print loss every 100 iterations
if n_iter % 10000 == 0:
print("Gradient Descent({bi}/{ti}): loss={l}".format(bi=n_iter, ti=max_iters - 1, l=loss))
#print("Gradient Descent({bi}/{ti}): loss={l}".format(bi=n_iter, ti=max_iters - 1, l=loss))
return w
def least_square_SGD(y, tx, batch_size, initial_w, max_epochs, gamma):
"""Stochastic gradient descent algorithm."""
# Define initial w
w = initial_w;
for n_iter in range(max_epochs):
for minibatch_y, minibatch_tx in batch_iter(y, tx, batch_size):
grad = compute_gradient(minibatch_y, minibatch_tx, w)
w = w-gamma*grad
loss=compute_loss_mse(y,tx,w)
# print("Gradient Descent({bi}/{ti}): loss={l}".format(bi=n_iter, ti=max_epochs - 1, l=loss))
return w
def least_squares(y, tx):
"""calculate the least squares solution."""
# Computing optimal weights
gram=np.dot(tx.T,tx)
im=np.dot(tx.T,y)
opt_w=np.linalg.solve(gram,im)
# Computing mean square error
mse=compute_loss_mse(y,tx,opt_w)
return opt_w, mse
def ridge_regression(y, tx, lambda_):
"""implement ridge regression."""
# Initializing useful parameters
k=len(y)
l=1
if(len(tx.shape) > 1):
l = tx.shape[1]
#LA=lambda_*np.eye(l,l)
LA=lambda_*2*k*np.eye(l,l)
LA[0,0]=0
# Computing optimal weights
gram=np.dot(tx.T,tx)+LA
mat=np.dot(tx.T,y)
weight=np.linalg.solve(gram,mat)
# Computing mean square error
mse= compute_loss_mse(y,tx,weight)
return weight, mse
# Pour la logistique
def sigmoid(t):
"""apply sigmoid function on t."""
return np.exp(t)/(1+np.exp(t))
def calculate_logistic_loss(y, tx, w):
"""compute the cost by negative log likelihood."""
#loss = y*np.log(sigmoid(np.dot(tx,w))) + (1-y)*np.log(1-sigmoid(np.dot(tx, w)))
#return np.sum(loss)
if (len(y.shape)==1):
z=y.reshape(y.shape[0],1)
else:
z=y
logistic_loss = np.log(1+np.exp(np.dot(tx,w)))-z*np.dot(tx,w)
return np.sum(logistic_loss)
def calculate_logistic_gradient(y, tx, w):
"""compute the gradient of loss."""
err= sigmoid(np.dot(tx,w))-y
return np.dot(tx.T,err)
def calculate_logistic_hessian(tx, w):
"""return the hessian of the loss function."""
sig=sigmoid(np.dot(tx,w))*(1-sigmoid(np.dot(tx,w)))
H = np.dot(tx.T, sig*tx)
return H
def logistic_regression(y, tx, initial_w, alpha, max_iters, threshold, method):
# if method == True, we implement Gradient descent
# if method == False, we implement Newton method
"""
Do gradient descent / Newton method using logistic regression.
Return the loss and the updated w.
"""
w=initial_w
losses=[]
for iter in range(max_iters):
# get loss and and gradient
loss, grad, hess = logistic_regression_aid(y, tx, w)
# We print loss each 100 iteration
if iter % 100 == 0:
print("Current iteration={i}, the loss={l}".format(i=iter, l=loss))
# update w
if method == True:
w=w-alpha*grad
else:
w=w-alpha*np.linalg.solve(hess,grad)
# store loss
losses.append(loss)
#Convergence criteria
if len(losses) > 1 and np.abs(losses[-1] - losses[-2]) < threshold:
break
# visualization Pas encore non ...
#visualization(y, x, mean_x, std_x, w, "classification_by_logistic_regression_gradient_descent")
print("The loss={l}".format(l=loss))
return loss, w
def reg_logistic_regression(y, tx, initial_w, lambda_, alpha, max_iters):
"""
Do gradient descent using reguralized logistic regression.
Return the loss and the updated w.
"""
w=initial_w
losses=[]
for iter in range(max_iters):
# get loss and and gradient
loss, grad = reg_logistic_regression_aid(y, tx, w, lambda_)
# We print loss each 100 iteration
if iter % 300 == 0:
print("Current iteration={i}, the loss={l}".format(i=iter, l=loss))
# update w
w=w-alpha*grad
# visualization Pas encore non ...
#visualization(y, x, mean_x, std_x, w, "classification_by_logistic_regression_gradient_descent")
print("The loss={l}".format(l=loss))
return loss, w
def logistic_regression_aid(y, tx, w):
"""return the loss, gradient and hessian !"""
return calculate_logistic_loss(y, tx, w), calculate_logistic_gradient(y, tx, w), calculate_logistic_hessian(tx, w)
def reg_logistic_regression_aid(y, tx, w, lambda_):
"""return the loss, gradient, and hessian."""
# calcul new loss using the loss for simple logistic regression
loss = calculate_logistic_loss(y, tx, w) + lambda_*(np.linalg.norm(w)**2)
# calcul new gradient using the gradient for simple logistic regression
new_grad = calculate_logistic_gradient(y, tx, w) + 2*lambda_*w
# La Hessienne ... pas encore non ..
# calcul à l'aide de l'ancienne hessienne
#k = len(w)
#new_hess = calculate_logistic_hessian(y, tx, w) + 2*lambda_*np.eye(k,k)
return loss, new_grad
# ***************************************************