-
Notifications
You must be signed in to change notification settings - Fork 1
/
kNN.py
95 lines (85 loc) · 2.83 KB
/
kNN.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
import os
from scipy import signal
# Setting the required parameters
k = 1
resampling_length = 40
DATA_FOLDER = "/preprocessed_sequences"
XTrain = []
YTrain = []
XTest = []
YTest = []
# PREPROCESSING & FEATURE EXTRACTION
# Iterate over all sample files
for filename in sorted(os.listdir(os.getcwd()+ DATA_FOLDER)):
current_file = open(DATA_FOLDER[1:] + '/' + filename, 'r')
# Train sample data
if "trainimg" in filename and "inputdata" in filename:
X = []
lines = current_file.readlines()
for line in lines:
X.append([])
currentLine = line.split()
for num in currentLine:
X[-1].append(float(num))
X = np.array(X)
X = np.transpose(X[1 :])
newX = []
for row in X:
newX.append(signal.resample(row, resampling_length))
newX = np.transpose(np.array(newX))
XX = np.reshape(newX, np.prod(np.shape(newX)))
XTrain.append(XX)
# Train sample label
elif "trainimg" in filename and "targetdata" in filename:
line = current_file.readline().split()
digit_class = 0
for i in range(0, 10):
if int(line[i]) == 1:
digit_class = i
break
YTrain.append(digit_class)
# Test sample data
if "testimg" in filename and "inputdata" in filename:
X = []
lines = current_file.readlines()
for line in lines:
X.append([])
currentLine = line.split()
for num in currentLine:
X[-1].append(float(num))
X = np.array(X)
X = np.transpose(X[1 :])
newX = []
for row in X:
newX.append(signal.resample(row, resampling_length))
newX = np.transpose(np.array(newX))
XX = np.reshape(newX, np.prod(np.shape(newX)))
XTest.append(XX)
# Test sample label
elif "testimg" in filename and "targetdata" in filename:
line = current_file.readline().split()
digit_class = 0
for i in range(0, 10):
if int(line[i]) == 1:
digit_class = i
break
YTest.append(digit_class)
# Convert everything to numpy arrays
XTrain = np.array(XTrain)
YTrain = np.array(YTrain)
XTest = np.array(XTest)
YTest = np.array(YTest)
# CLASSIFICATION: Build and train model
kNN = KNeighborsClassifier(n_neighbors = k)
kNN.fit(XTrain, YTrain)
YHat = kNN.predict(XTest)
# POSTPROCESSING: Get predictions and calculate error rate
different_classes = YHat - YTest # If classes are the same, elements will be 0
errors = 0.0
for c in different_classes:
if c != 0:
errors += 1
error_rate = errors/len(YHat)
print("kNN Result: k = %d and Resampling Length = %d ==> Error Rate = %.2f%%" % (k, resampling_length, error_rate * 100))