-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathdistribution_analysis.py
75 lines (48 loc) · 1.49 KB
/
distribution_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import json
import random
from sklearn import decomposition
import matplotlib.pyplot as plt
from tflib.gmm import GMM, shownormal
from tflib.normal import Normal
file_path = "./data/"
with open(file_path + 'cate_item.json') as f:
# temp = zip(*zip(*json.load(f)))
# print temp[0]
train_bycate = dict(json.load(f))
catelist = random.sample(train_bycate.keys(), 3)
def generate_all_vector(itemlist):
vectorpath = '/home/cuizeyu/image_vector/'
vect_batch = []
for name in itemlist:
with open(vectorpath + name + '.json') as f:
vect_batch.append(json.load(f))
return vect_batch
def pca_2(vector_list):
pca = decomposition.PCA(n_components=2, copy=True)
vector_2 = pca.fit_transform(vector_list)
return vector_2
def plot_pca(vector_2):
fig = plt.figure()
ax1 = fig.add_subplot(111)
ax1.set_title('Scatter Plot')
plt.xlabel('X')
plt.ylabel('Y')
ax1.scatter(vector_2[:,0], vector_2[:,1], c='r', marker='o')
# plt.legend('x1')
plt.show()
return
for cate in catelist:
itemlist = train_bycate[cate]
vector_list = generate_all_vector(itemlist)
vector_2 = pca_2(vector_list)
# print(vector_2.shape)
# plot_pca(vector_2)
gmm = GMM(dim = 2, ncomps = 4,data = vector_2, method = "random")
print gmm
shownormal(vector_2,gmm)
gmm.em(vector_2,nsteps=1000)
shownormal(vector_2,gmm)
print gmm
ngmm = gmm.condition([0],[-3])
print ngmm.mean()
print ngmm.covariance()