-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathex.py
111 lines (101 loc) · 3.91 KB
/
ex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#!/usr/bin/python
#-*- coding: utf-8 -*-
from numpy import *
import matplotlib.pyplot as plt #导入matplotlib包
import numpy as np #导入numpy包
import time
def loadData(): #读入文件
f=open('ex1data1.txt')
data = f.readlines() #一次读入文件
l=len(data)
mat=zeros((l,2)) #l*2的矩阵,初始化0
index=0
xdata = ones((l,2)) #l*2的矩阵,初始化为1
ydata = []
for line in data:
line = line.strip() #取出多余字符,
linedata = line.split(',') #分割数据
mat[index, :] = linedata[0:2] #暂存在mat矩阵中
index +=1
xdata[:,1] = mat[:,0] #将mat的第0列存入xdata的第一列
ydata = mat[:,1] #将mat的第1列存入ydata中
return xdata,ydata
def model(theta,x): #直线方程
theta = np.array(theta)
return x.dot(theta) #返回直线的y值
def cost(theta,xdata,ydata,l): #代价函数
SUM = 0
idex = 0
ydata = mat (ydata)
ydata = ydata.T
for line in ydata:
yp = model(theta,xdata[idex ,:])
yp = yp - ydata[idex ,:]
yp = yp**2
SUM = SUM + yp
idex =idex +1
return SUM/2/l #返回耗费
def grad(theta,idex1,xdata,ydata,sigmal,l): #梯度计算
idex = 0
SUM = 0
for line in ydata:
yp = model(theta,xdata[idex ,:]) - ydata[idex] #计算梯度
yp =yp * xdata[idex][idex1]
idex =idex +1
SUM = SUM + yp
return SUM/l
def gradlient(theta,xdata,ydata,sigmal,l): #参数更新
idex1 = 0
for line1 in theta:
theta[idex1] = theta[idex1] - sigmal * grad(theta,idex1,xdata,ydata,sigmal,l) #更新参数
theta[idex1] = theta[idex1]
idex1 = idex1+1
return theta #返回参数
def OLS(xdata,ydata): #梯度下降求解
start = time.time()
theta = [0,0]
iters = 0
iters = int(iters)
l = len(ydata)
l = int(l)
cost_record = []
it = []
sigmal = 0.01
cost_val = cost(theta,xdata,ydata,l) #计算代价
cost_record.append(cost_val)
it.append(iters)
while iters <1500:
theta = gradlient(theta,xdata,ydata,sigmal,l) #更新参数
cost_updata = cost(theta,xdata,ydata,l) #计算代价
iters = iters + 1
cost_val = cost_updata
cost_record.append(cost_val) #记录代价
it.append(iters) #记录迭代次数
end = time.time()
return mat(theta).T,cost_record,it
def show(xArr,yArr): #显示回归直线
plt.xlabel('Population',size=25) # 横坐标
plt.ylabel('Profit' ,size=25) # 纵坐标
plt.title('Linear Regression',size=30) # 标题
xCopy =xArr.copy()
xCopy.sort(0)
yHat = xCopy * ws
plt.plot(xArr[:,1],yArr,'go',label='y1')
plt.plot(xCopy[:,1],yHat,label='y2')
plt.show()
def show1(xArr,yArr): #显示代价
plt.xlabel('iters',size=25) # 横坐标
plt.ylabel('cost' ,size=25) # 纵坐标
plt.title('Linear Regression',size=30) # 标题
plt.plot(xArr[0:,],yArr[0:,],'go',label='y1')
plt.show()
xArr,yArr= loadData()
ws,cost_rec,iters= OLS(xArr,yArr)
test = [1,3.5]
print('人口为35000时,利润为:')
print(ws[0][0]*test[0]+ws[1][0]*test[1])
test1 = [1,7]
print('人口为70000时,利润为:')
print(ws[0][0]*test1[0]+ws[1][0]*test1[1])
show(xArr,yArr)
show1(mat(np.array(iters)),mat(np.array(cost_rec)))