-
Notifications
You must be signed in to change notification settings - Fork 0
/
adaboost.py
88 lines (83 loc) · 2.89 KB
/
adaboost.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
'''
@Description:
@Version: 1.0
@Autor: Troy Wu
@Date: 2020-07-11 21:09:27
@LastEditors: Troy Wu
@LastEditTime: 2020-07-14 18:44:05
'''
import numpy as np
import pandas as pd
def get_Mat(path):
dataSet = pd.read_table(path, header = None)
xMat = np.mat(dataSet.iloc[:, : -1].values)
yMat = np.mat(dataSet.iloc[:, -1].values).T
return xMat, yMat
def Classify0(xMat, i, Q, S):
re = np.ones((xMat.shape[0], 1))
if S == 'lt':
re[xMat[:, i] <= Q] = -1 # 如果小于阈值,则赋值为-1
else:
re[xMat[:, i] > Q] = -1 # 如果大于阈值,则赋值为-1
return re
def get_Stump(xMat, yMat, D):
m, n = xMat.shape
Steps = 10
bestStump = {}
bestClas = np.mat(np.zeros((m, 1)))
minE = np.inf # 最小误差
for i in range(n):
Min = xMat[:, i].min()
Max = xMat[:, i].max()
stepSize = (Max - Min) / Steps
for j in range(-1, int(Steps)+1):
for S in ['lt', 'gt']:
Q = (Min + j*stepSize)
re = Classify0(xMat, i, Q, S)
err = np.mat(np.ones((m, 1)))
err[re == yMat] = 0 # 分类正确的赋值为0
eca = D.T * err
if eca < minE:
minE = eca
bestClas = re.copy()
bestStump['特征列'] = i
bestStump['阈值'] = Q
bestStump['标志'] = S
return bestStump, minE, bestClas
def Ada_train(xMat, yMat, maxC = 40):
'''
输入:
maxC:最大迭代次数
返回:
weakClass:弱分类器信息
aggClass:类别估计值(其实就是更改了标签的估计值)
'''
weakClass = []
m = xMat.shape[0]
D = np.mat(np.ones((m, 1)) / m) # 初始化权重
aggClass = np.mat(np.zeros((m, 1)))
for i in range(maxC):
Stump, error, bestClas = get_Stump(xMat, yMat, D) # 构建单层决策树
alpha = float(0.5*np.log((1-error) / max(error, 1e-16)))
Stump['alpha'] = np.round(alpha, 2)
weakClass.append(Stump)
expon = np.multiply(-1 * alpha * yMat, bestClas)
D = np.multiply(D, np.exp(expon))
D = D / D.sum()
aggClass += alpha*bestClas
aggErr = np.multiply(np.sign(aggClass) != yMat, np.ones((m, 1)))
errRate = aggErr.sum()
if errRate == 0:
break
return weakClass, aggClass
def AdaClassify(data, weakClass):
dataMat = np.mat(data)
m = dataMat.shape[0]
aggClass = np.mat(np.zeros((m, 1)))
for i in range(len(weakClass)):
classEst = Classify0(dataMat, \
weakClass[i]['特征列'],
weakClass[i]['阈值'],
weakClass[i]['标志'])
aggClass += weakClass[i]['alpha'] * classEst
return np.sign(aggClass)