forked from e-lab/clustering-learning
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclclassifier.lua
84 lines (75 loc) · 2.85 KB
/
clclassifier.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
-- trains a CL lassifier based on clustering of train data:
function trainCLClassifier(fracDataSet,nclusters) -- param = fraction of dataset [0 to 1]
-- split dataset into classes and train clusters for each category:
local limitData = torch.ceil(trainData:size()*fracDataSet)
if limitData%2 ~= 0 then limitData = limitData-1 end
local splitdata = torch.Tensor(#classes, limitData/2, trainData.data:size(2))
for i = 1,limitData do
splitdata[trainData.labels[i]][torch.ceil(i/2)] = trainData.data[i]
xlua.progress(i, limitData)
end
-- now run kmeans on each class:
local clusteredClasses = torch.Tensor(#classes, nclusters, trainData.data:size(2))
for i = 1,#classes do
clusteredClasses[i] = okmeans(splitdata[i], nclusters, nil,
opt.initstd, opt.niter, opt.kmbatchsize, nil, verbose)
end
return clusteredClasses
end
function testCLnet(fracDataSet, clusteredClasses, nclusters)
local limitDataTr = torch.ceil(trainData:size()*fracDataSet)
if limitDataTr%2 ~= 0 then limitDataTr = limitDataTr-1 end
-- test on trainData:
local dist = torch.Tensor(#classes, nclusters)
local correctTr = 0
for i = 1,limitDataTr do
local temp = trainData.data[i]
--temp = temp - temp:mean() -- remove mean from input data
--temp = temp / temp:std()
for j=1,#classes do
for k=1,nclusters do
--dist[j][k] = SMRmatch(temp:reshape((#temp)[1]), clusteredclasses[j][k], 0.75)
dist[j][k] = torch.dist(temp, clusteredClasses[j][k])
end
end
max, idx = torch.min(torch.min(dist,2),1)
--print(idx[1][1])
if ( trainData.labels[i] == idx[1][1] ) then
correctTr = correctTr+1
end
--xlua.progress(i, limitDataTr)
end
print('Final correct percentage on trainData: '.. correctTr/limitDataTr*100)
local limitDataTe = torch.ceil(testData:size()*fracDataSet)
if limitDataTe%2 ~= 0 then limitDataTe = limitDataTe-1 end
-- test on testData:
local correctTe = 0
for i = 1,limitDataTe do
local temp = testData.data[i]
--temp = temp - temp:mean() -- remove mean from input data
--temp = temp / temp:std()
for j=1,#classes do
for k=1,nclusters do
dist[j][k] = torch.dist(temp, clusteredClasses[j][k])
end
end
max, idx = torch.min(torch.min(dist,2),1)
--print(idx[1][1])
if ( testData.labels[i] == idx[1][1] ) then
correctTe = correctTe+1
end
--xlua.progress(i, limitDataTe)
end
print('Final correct percentage on testData: '.. correctTe/limitDataTe*100)
return correctTr/limitDataTr*100,correctTe/limitDataTe*100
end
-- this function does not work great...
function SMRmatch(in1, in2, ratio) -- only compares top ratio of highest values -- in2=template!
local sin2, idxin2 = torch.sort(in2,true) -- we suppose in2 is the template (averaged sample)
local indextokeep = torch.ceil(ratio*(#in1)[1])
local distance = 0
for i=1,indextokeep do
distance = distance + torch.abs( in1[idxin2[i]] - in2[idxin2[i]] )
end
return distance
end