-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathSVM.py
96 lines (85 loc) · 3.8 KB
/
SVM.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# -*- coding: utf-8 -*-
"""
Created on Wed Mar 8 10:59:09 2017
@author: shikhar
"""
from sklearn import svm, metrics
from sklearn.model_selection import cross_val_score
import scipy as sp
from math import inf
def NormalizingFeatures(File):
"""
Transforming the file into feature vectors. Normalizing them into the range [-1,1].
"""
FileNormalizing = open(File)
Label = []
Features = []
for line in FileNormalizing:
line = line.strip().split(',')
Label.append(int(line[0]))
Features.append([((2*int(i) / 255)-1) for i in line[1:]])
Features = sp.array(Features)
Label = sp.array(Label)
FileNormalizing.close()
return Label, Features
def InitializingSVC(User_Gamma = inf, User_C = inf):
"""
Initializes SVC depending on the given value of Gamma and C.
"""
if User_C != inf and User_Gamma != inf:
Classifier = svm.SVC(C = User_C, gamma = User_Gamma)
elif User_C == inf and User_Gamma != inf:
Classifier = svm.SVC(gamma = User_Gamma)
elif User_C != inf and User_Gamma == inf:
Classifier = svm.SVC(C = User_C)
else:
Classifier = svm.SVC()
return Classifier
def Predicting(TrainingFeatures, TrainingLabel, Test_Features, Test_Label, Gamma = inf, C = inf):
"""
Uses given settings to predict the test error rate.
"""
Errors = 0
Classifier = InitializingSVC(Gamma, C)
Classifier.fit(Training_Features, Training_Label)
Expected = Test_Label
Predicted = Classifier.predict(Test_Features)
print("Classification report for classifier %s:\n%s\n"
% (Classifier, metrics.classification_report(Expected, Predicted)))
print("Confusion matrix:\n%s" % metrics.confusion_matrix(Expected, Predicted))
for i in range(len(Predicted)):
if Predicted[i] != Expected[i]:
Errors += 1
return (Errors/len(Predicted))*100
def CrossValidation(TrainingFeatures, TrainingLabel, Test_Features=[], Test_Label=[], Gamma = inf, C = inf):
"""
Uses given settings to predict the cross validation score.
"""
if (Test_Features != [] and Test_Label != []):
Label = sp.ndarray.tolist(Training_Label) + sp.ndarray.tolist(Test_Label)
Features = sp.ndarray.tolist(Training_Features) + sp.ndarray.tolist(Test_Features)
Label = sp.array(Label)
Features = sp.array(Features)
else:
Features = sp.array(TrainingFeatures)
Label = sp.array(TrainingLabel)
Classifier = InitializingSVC(Gamma, C)
scores = cross_val_score(Classifier, Features, Label, cv=10)
return scores
if __name__ == '__main__':
TrainingFile = "mnist_train.txt"
TestFile = "mnist_test.txt"
Training_Label, Training_Features = NormalizingFeatures(TrainingFile)
Test_Label, Test_Features = NormalizingFeatures(TestFile)
print("\n\n\nDefault Settings of Gamma and C: \n\n")
ErrorRate = Predicting(Training_Features, Training_Label, Test_Features, Test_Label)
CrossValidation_ErrorRate = CrossValidation(Training_Features, Training_Label, Test_Features, Test_Label)
print("\nTest Error Rate : {} ".format(ErrorRate))
print("Cross Validation Score : {} ".format(CrossValidation_ErrorRate))
print("Cross Validation Error Rate : {} ".format((1 - CrossValidation_ErrorRate.mean()) * 100))
print("\n\n\nBetter Settings of Gamma and C, with Gamma = 1/150 and C = 3: \n\n")
ErrorRate = Predicting(Training_Features, Training_Label, Test_Features, Test_Label, C = 3, Gamma = 1/150)
CrossValidation_ErrorRate = CrossValidation(Training_Features, Training_Label, Test_Features, Test_Label, C = 3, Gamma = 1/150)
print("\nTest Error Rate : {} ".format(ErrorRate))
print("Cross Validation Score : {} ".format(CrossValidation_ErrorRate))
print("Cross Validation Error Rate : {} ".format((1 - CrossValidation_ErrorRate.mean()) * 100))