-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpreprocess_nlvr.py
64 lines (47 loc) · 1.82 KB
/
preprocess_nlvr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import os
from preprocess.utils import get_image
import scipy.misc
import numpy as np
from sklearn.externals import joblib
# Edit this list to specify which files to be created
IMG_SIZES = [400]
LOAD_SIZE = 400
NLVR_DIR = './data/nlvr/'
def load_filenames(data_dir):
filepath = data_dir + 'filenames.pickle'
filenames = []
for filename in joblib.load(filepath):
filenames.append(filename)
print('Load filenames from: %s (%d)' % (filepath, len(filenames)))
return filenames
def save_data_list(inpath, outpath, filenames):
for size in IMG_SIZES:
print('Processing images of size %d' % size)
cnt = 0
images = np.ndarray(shape=(len(filenames), size, size, 3), dtype=np.uint8)
for idx, key in enumerate(filenames):
f_name = '%s/%s.png' % (inpath, key)
img = get_image(f_name, LOAD_SIZE, is_crop=False)
img = img.astype('uint8')
if size != LOAD_SIZE:
img = scipy.misc.imresize(img, [size, size], 'bicubic')
images[idx, :, :, :] = np.array(img)
cnt += 1
if cnt % 100 == 0:
print('\rLoad %d......' % cnt, end="", flush=True)
print('Images processed: %d', len(filenames))
outfile = outpath + str(size) + 'images.pickle'
joblib.dump(images, outfile)
print('save to: ', outfile)
def convert_nlvr_dataset_pickle(inpath):
# For Train data
train_dir = os.path.join(inpath, 'train/')
train_filenames = load_filenames(train_dir)
save_data_list(inpath, train_dir, train_filenames)
# For Test data
test_dir = os.path.join(inpath, 'test/')
print(test_dir)
test_filenames = load_filenames(test_dir)
save_data_list(inpath, test_dir, test_filenames)
if __name__ == '__main__':
convert_nlvr_dataset_pickle(NLVR_DIR)