-
Notifications
You must be signed in to change notification settings - Fork 0
/
data.py
47 lines (43 loc) · 1.81 KB
/
data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
from numpy import divide
class Dataset:
def __init__(self, data):
self.data = data
self.size = [data.shape[0],data.shape[1]]
def get_features(self):
return self.data[:,0:self.size[1]-1]
def get_classes(self):
return self.data[:,self.size[1]-1]
def normalize(self, method="standarization"):
if method == "rescaling":
#Rescaling method
max = self.data.max(axis=0)
min = self.data.min(axis=0)
for i in range(0,self.size[1]-1):
self.data[:,i] = (self.data[:,i] - min[i]) / (max[i] - min[i])
else:
# Standarization method
mean = self.data.mean(axis=0)
denominator = self.data.std(axis=0)
for i in range(0, self.size[1] - 1):
self.data[:, i] = divide((self.data[:, i] - mean[i]), denominator[i])
class Normalizator():
"""Initialize with training data, it will get normalized, then use normalize functions for other datasets"""
def __init__(self,dataset,method="standarization"):
self.train_data = dataset.data
self.size = self.train_data.shape
self.mean = self.train_data.mean(axis=0)
self.std = self.train_data.std(axis = 0)
self.max = self.train_data.max(axis = 0)
self.min = self.train_data.min(axis = 0)
self.method = method
self.normalize(dataset)
def normalize(self, dataset):
data = dataset.data
if self.method == "rescaling":
#Rescaling method
for i in range(0,self.size[1]-1):
data[:,i] = (data[:,i] - self.min[i]) / (self.max[i] - self.min[i])
else:
# Standarization method
for i in range(0, self.size[1] - 1):
data[:, i] = divide((data[:, i] - self.mean[i]), self.std[i])