forked from Kashu7100/Qualia2.0
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdataset.py
96 lines (85 loc) · 3.18 KB
/
dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# -*- coding: utf-8 -*-
from ..core import *
from ..util import download_progress
from ..autograd import Tensor
from .transforms import Compose
import os
import random
import sys
from logging import getLogger
logger = getLogger('QualiaLogger').getChild('data')
class Dataset(object):
''' Dataset \n
'''
def __init__(self, train=True, transforms=None, target_transforms=None):
logger.info('[*] preparing data...')
logger.info(' this might take few minutes.')
self.train = train
self.transforms = transforms if transforms is not None else Compose()
self.target_transforms = target_transforms if target_transforms is not None else Compose()
self.root = home_dir + '/data/download/{}'.format(self.__class__.__name__.lower())
self.data = None
self.label = None
self.prepare()
logger.info('[*] done.')
def __repr__(self):
return '{}(train={}, transforms={}, target_transforms={})'.format(self.__class__.__name__, self.train, self.transforms, self.target_transforms)
def __str__(self):
return self.__class__.__name__
def __len__(self):
return len(self.data)
def __getitem__(self, key):
if self.label is None:
return self.transforms(self.data[key]), None
else:
return self.transforms(self.data[key]), self.target_transforms(self.label[key])
def show(self):
raise NotImplementedError
def state_dict(self):
raise NotImplementedError
def prepare(self):
pass
def _download(self, url, filename=None):
''' downloads data from the url
Args:
url (str): url of the data
filename (str): filename to save the data
'''
if not os.path.exists(self.root+'/'):
os.makedirs(self.root+'/')
data_dir = self.root
if filename is None:
from urllib.parse import urlparse
parts = urlparse(url)
filename = os.path.basename(parts.path)
cache = os.path.join(data_dir, filename)
if not os.path.exists(cache):
from urllib.request import urlretrieve
urlretrieve(url, cache, reporthook=download_progress)
sys.stdout.flush()
sys.stdout.write('\r[*] downloading 100.00%')
print('\r')
@staticmethod
def to_one_hot(label, num_class):
''' convert the label to one hot representation
Args:
label (ndarray | Tensor): label data
num_class (int): number of the class in a dataset
'''
if isinstance(label, Tensor):
one_hot = np.zeros((len(label.data), num_class), dtype=np.int32)
else:
one_hot = np.zeros((len(label), num_class), dtype=np.int32)
for c in range(num_class):
one_hot[:,c][label==c] = 1
return one_hot
@staticmethod
def to_vector(label):
''' convert the label to vector representation
Args:
label (ndarray | Tensor): label data
'''
if isinstance(label, Tensor):
return np.argmax(label.data, axis=1)
else:
return np.argmax(label, axis=1)