-
Notifications
You must be signed in to change notification settings - Fork 21
/
kmeans_iris.py
63 lines (49 loc) · 2.01 KB
/
kmeans_iris.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn import datasets
from sklearn.cluster import KMeans
# Loads the iris dataset
iris = datasets.load_iris()
iris_data = pd.DataFrame(iris.data, columns=iris['feature_names'])
iris_target = pd.DataFrame(iris.target, columns=['target'])
# Map encoded target values to target names
def map_target(target_num):
return iris.target_names[int(target_num)]
iris_target_name = iris_target.apply(map_target, 1)
# Fit a K-Means model to the data
kmeans_3 = KMeans(n_clusters=3)
kmeans_iris = kmeans_3.fit(iris_data[['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)']])
fig = plt.figure(1, figsize=(4, 3))
ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134)
labels = kmeans_iris.labels_
ax.scatter(iris_data.ix[:, 3], iris_data.ix[:, 0], iris_data.ix[:, 2], c=labels.astype(np.float))
ax.w_xaxis.set_ticklabels([])
ax.w_yaxis.set_ticklabels([])
ax.w_zaxis.set_ticklabels([])
ax.set_xlabel('Petal width')
ax.set_ylabel('Sepal length')
ax.set_zlabel('Petal length')
plt.show()
# Plot the ground truth
fig = plt.figure(2, figsize=(4, 3))
ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134)
for name, label in [('Setosa', 0),
('Versicolour', 1),
('Virginica', 2)]:
ax.text3D(iris_data.ix[iris_target.target == label, 3].mean(),
iris_data.ix[iris_target.target == label, 0].mean() + 1.5,
iris_data.ix[iris_target.target == label, 2].mean(), name,
horizontalalignment='center',
bbox=dict(alpha=.5, edgecolor='w', facecolor='w'))
# Reorder the labels to have colors matching the cluster results
y = np.choose(iris_target.target, [1, 2, 0]).astype(np.float)
ax.scatter(iris_data.ix[:, 3], iris_data.ix[:, 0], iris_data.ix[:, 2], c=y)
ax.w_xaxis.set_ticklabels([])
ax.w_yaxis.set_ticklabels([])
ax.w_zaxis.set_ticklabels([])
ax.set_xlabel('Petal width')
ax.set_ylabel('Sepal length')
ax.set_zlabel('Petal length')
plt.show()