-
Notifications
You must be signed in to change notification settings - Fork 3
/
basic-linear-regression.py
44 lines (33 loc) · 1012 Bytes
/
basic-linear-regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import numpy as np
import matplotlib.pyplot as plt
# load the data
X = []
Y = []
for line in open('data_1d.csv'):
x, y = line.split(',')
X.append(float(x))
Y.append(float(y))
# let's turn X and Y into numpy arrays since that will be useful later
X = np.array(X)
Y = np.array(Y)
# let's plot the data to see what it looks like
plt.scatter(X, Y)
plt.show()
# apply the equations we learned to calculate a and b
# denominator is common
# note: this could be more efficient if
# we only computed the sums and means once
denominator = X.dot(X) - X.mean() * X.sum()
a = ( X.dot(Y) - Y.mean()*X.sum() ) / denominator
b = ( Y.mean() * X.dot(X) - X.mean() * X.dot(Y) ) / denominator
# let's calculate the predicted Y
Yhat = a*X + b
# let's plot everything together to make sure it worked
plt.scatter(X, Y)
plt.plot(X, Yhat)
plt.show()
# determine how good the model is by computing the r-squared
d1 = Y - Yhat
d2 = Y - Y.mean()
r2 = 1 - d1.dot(d1) / d2.dot(d2)
print("the r-squared is:", r2)