-
Notifications
You must be signed in to change notification settings - Fork 0
/
linear_regression.py
93 lines (81 loc) · 2.8 KB
/
linear_regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import math
import operator
import numpy as np
class Regression:
'''
Implements Linear Regression with Weight Decay Regularization
fields:
int dim Dimensionality of the data
int lam Regularization factor
ndarray weights numpy ndarray (dim+1 x 1) of the weights
List data Array (N x 1) of tuples (x, y) composed of vectors x and results y=f(x)
'''
def __init__(self, dim, data = [], lam = 0):
self.dim = dim
self.reset(data, lam)
def reset(self, data, lam = 0):
'''
Reset weights and lambda and feed a data sample
'''
self.lam = lam
self.weights = np.zeros(self.dim+1)
for t in data:
if len(t[0])!=self.dim:
raise ValueError('Wrong data dimensionality')
self.data = data
def hypothesis(self, x):
'''
Takes d-dimensional data vector x and computes h(x) using the current weights
'''
x_adj = [1.0] + x #adjusted to include 1 at the start
return np.dot(self.weights, x_adj) #dot product of w and x
def quadratic_error(self, point):
'''
Takes as "point" a tuple (x, y) with x a vector and y=f(x)
and returns the quadratic error (h(x) - f(x))**2
'''
h = self.hypothesis(point[0])
return (h - point[1])**2
def sample_error(self, data):
'''
Computes the in-sample error Ein if given self.data as data,
computes the out-of-sample error Eout if given a dataset generated by f(x) as data
'''
total_error = 0.0
for point in data:
total_error += self.quadratic_error(point)
return total_error / len(data)
def solve(self):
'''
Computes the weights using linear regression
'''
data_matrix = []
for point in self.data:
data_matrix.append([1.0] + point[0]) #Create the input data matrix X with x vectors as its rows
data_matrix = np.matrix(data_matrix)
target_vector = []
for point in self.data:
target_vector.append([point[1]]) #Create the target vector y of f(x) values for the x inputs
target_vector = np.matrix(target_vector)
reg_pinv = np.linalg.inv((np.transpose(data_matrix)*data_matrix) + (self.lam*np.matrix(np.identity(self.dim+1))))*np.transpose(data_matrix)
self.weights = np.array(np.transpose(reg_pinv * target_vector))
def classify(self, point):
'''
For classification problems:
Takes as "point" a tuple (x, y) with x a vector and y=f(x)
and classifies it, returning True if sign(h(x))=f(x) and False if not
'''
h = self.hypothesis(point[0])
return math.copysign(1.0, h) == point[1]
def classification_error(self, data):
'''
For classification problems:
Computes the in-sample error Ein if given self.data as data,
computes the out-of-sample error Eout if given a dataset generated by f(x) as data
'''
g_misclass_points = 0 #counter of newdata points misclassified by g
for point in data:
if not self.classify(point):
g_misclass_points += 1
#return the fraction of P
return g_misclass_points / len(data)