-
Notifications
You must be signed in to change notification settings - Fork 16
/
biplot.py
56 lines (34 loc) · 1.52 KB
/
biplot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import pandas as pd
from sklearn.decomposition import PCA
from matplotlib import pyplot as plt
## import data
my_csv = 'data.csv' ## path to your dataset
dat = pd.read_csv(my_csv, index_col=0)
# if no row or column titles in your csv, pass 'header=None' into read_csv
# and delete 'index_col=0' -- but your biplot will be clearer with row/col names
## perform PCA
n = len(dat.columns)
pca = PCA(n_components = n)
# defaults number of PCs to number of columns in imported data (ie number of
# features), but can be set to any integer less than or equal to that value
pca.fit(dat)
## project data into PC space
# 0,1 denote PC1 and PC2; change values for other PCs
xvector = pca.components_[0] # see 'prcomp(my_data)$rotation' in R
yvector = pca.components_[1]
xs = pca.transform(dat)[:,0] # see 'prcomp(my_data)$x' in R
ys = pca.transform(dat)[:,1]
## visualize projections
## Note: scale values for arrows and text are a bit inelegant as of now,
## so feel free to play around with them
for i in range(len(xvector)):
# arrows project features (ie columns from csv) as vectors onto PC axes
plt.arrow(0, 0, xvector[i]*max(xs), yvector[i]*max(ys),
color='r', width=0.0005, head_width=0.0025)
plt.text(xvector[i]*max(xs)*1.2, yvector[i]*max(ys)*1.2,
list(dat.columns.values)[i], color='r')
for i in range(len(xs)):
# circles project documents (ie rows from csv) as points onto PC axes
plt.plot(xs[i], ys[i], 'bo')
plt.text(xs[i]*1.2, ys[i]*1.2, list(dat.index)[i], color='b')
plt.show()