-
Notifications
You must be signed in to change notification settings - Fork 66
/
sk-逻辑分类有b偏量.py
100 lines (85 loc) · 4.56 KB
/
sk-逻辑分类有b偏量.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# -*- coding: UTF-8 -*-
import numpy as np # 快速操作结构数组的工具
import pandas as pd # 数据分析处理工具
# 样本数据集,第一列为x1,第二列为x2,第三列为分类(三种类别)
data=[
[-2.68420713, 0.32660731, 0],[-2.71539062, -0.16955685, 0],[-2.88981954, -0.13734561, 0],[-2.7464372, -0.31112432, 0],[-2.72859298, 0.33392456, 0],
[-2.27989736, 0.74778271, 0],[-2.82089068, -0.08210451, 0],[-2.62648199, 0.17040535, 0],[-2.88795857, -0.57079803, 0],[-2.67384469, -0.1066917, 0],
[-2.50652679,0.65193501,0],[-2.61314272,0.02152063,0],[-2.78743398,-0.22774019,0],[-3.22520045,-0.50327991,0],[-2.64354322,1.1861949,0],
[-2.38386932,1.34475434,0],[-2.6225262,0.81808967,0],[-2.64832273,0.31913667,0],[-2.19907796,0.87924409,0],[-2.58734619,0.52047364,0],
[1.28479459, 0.68543919, 1],[0.93241075, 0.31919809, 1],[1.46406132, 0.50418983, 1],[0.18096721, -0.82560394, 1],[1.08713449, 0.07539039, 1],
[0.64043675, -0.41732348, 1],[1.09522371, 0.28389121, 1],[-0.75146714, -1.00110751, 1],[1.04329778, 0.22895691, 1],[-0.01019007, -0.72057487, 1],
[-0.5110862,-1.26249195,1],[0.51109806,-0.10228411,1],[0.26233576,-0.5478933,1],[0.98404455,-0.12436042,1],[-0.174864,-0.25181557,1],
[0.92757294,0.46823621,1],[0.65959279,-0.35197629,1],[0.23454059,-0.33192183,1],[0.94236171,-0.54182226,1],[0.0432464,-0.58148945,1],
[2.53172698, -0.01184224, 2],[1.41407223, -0.57492506, 2],[2.61648461, 0.34193529, 2],[1.97081495, -0.18112569, 2],[2.34975798, -0.04188255, 2],
[3.39687992, 0.54716805, 2],[0.51938325, -1.19135169, 2],[2.9320051, 0.35237701, 2],[2.31967279, -0.24554817, 2],[2.91813423, 0.78038063, 2],
[1.66193495,0.2420384,2],[1.80234045,-0.21615461,2],[2.16537886,0.21528028,2],[1.34459422,-0.77641543,2],[1.5852673,-0.53930705,2],
[1.90474358,0.11881899,2],[1.94924878,0.04073026,2],[3.48876538,1.17154454,2],[3.79468686,0.25326557,2],[1.29832982,-0.76101394,2],
]
# 样本数据集,第一列为x1,第二列为x2,第三列为分类(2种类别)
data1=[
[-0.017612,14.053064,0],
[-1.395634,4.662541,1],
[-0.752157,6.538620,0],
[-1.322371,7.152853,0],
[0.423363,11.054677,0],
[0.406704,7.067335,1],
[0.667394,12.741452,0],
[-2.460150,6.866805,1],
[0.569411,9.548755,0],
[-0.026632,10.427743,0],
[0.850433,6.920334,1],
[1.347183,13.175500,0],
[1.176813,3.167020,1],
[-1.781871,9.097953,0],
[-0.566606,5.749003,1],
[0.931635,1.589505,1],
[-0.024205,6.151823,1],
[-0.036453,2.690988,1],
[-0.196949,0.444165,1],
[1.014459,5.754399,1]
]
#生成X和y矩阵
dataMat = np.mat(data)
y = dataMat[:,2] # 类别变量
b = np.ones(y.shape) # 添加全1列向量代表b偏量
X = np.column_stack((b, dataMat[:,0:2])) # 特征属性集和b偏量组成x
X = np.mat(X)
# 特征数据归一化
# import sklearn.preprocessing as preprocessing #sk的去均值和归一化
# scaler=preprocessing.StandardScaler()
# X = scaler.fit_transform(X) # 对特征数据集去均值和归一化,可以加快机器性能
# X = np.mat(X)
# # print(X)
# ========逻辑回归========
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(X, y)
print('逻辑回归模型:\n',model)
# 使用模型预测
predicted = model.predict(X) #预测分类
answer = model.predict_proba(X) #预测分类概率
print(answer)
import matplotlib.pyplot as plt
# 绘制边界和散点
# 先产生x1和x2取值范围上的网格点,并预测每个网格点上的值。
h = 0.02
x1_min, x1_max = X[:,1].min() - .5, X[:,1].max() + .5
x2_min, x2_max = X[:,2].min() - .5, X[:,2].max() + .5
xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, h), np.arange(x2_min, x2_max, h))
testMat = np.c_[xx1.ravel(), xx2.ravel()] #形成测试特征数据集
testMat = np.column_stack((np.ones(((testMat.shape[0]),1)),testMat)) #添加第一列为全1代表b偏量
testMat = np.mat(testMat)
Z = model.predict(testMat)
# 绘制区域网格图
Z = Z.reshape(xx1.shape)
plt.pcolormesh(xx1, xx2, Z, cmap=plt.cm.Paired)
# 绘制散点图 参数:x横轴 y纵轴,颜色代表分类。x图标为样本点,.表示预测点
plt.scatter(X[:,1].flatten().A[0], X[:,2].flatten().A[0],c=y.flatten().A[0],marker='x') # 绘制样本数据集
plt.scatter(X[:,1].flatten().A[0], X[:,2].flatten().A[0],c=predicted.tolist(),marker='.') # 绘制预测数据集
# 绘制x轴和y轴坐标
plt.xlabel("x")
plt.ylabel("y")
# 显示图形
plt.show()