-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcreate_dataset.py
99 lines (82 loc) · 2.79 KB
/
create_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# Create dataset to train siamese network
import numpy as np
seed = 1
np.random.seed(seed) # for reproducibility
import random
from keras.datasets import mnist
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Input, Lambda
from keras.optimizers import RMSprop
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model
from keras.layers import Input, Conv2D, BatchNormalization, MaxPool2D, Activation, Flatten, Dense, Dropout
import numpy as np
import pandas as pd
import cv2
import math
import glob
import os
input_dim = 64
datagen = ImageDataGenerator(
rotation_range=10,
width_shift_range=0.2,
height_shift_range=0.2,
zoom_range=0.2,
shear_range=0.2,
horizontal_flip=True)
def create_similar_image_data():
count = 0
img0=[]
img1=[]
labels = []
for image in glob.iglob('images/*/*.png'):
img = cv2.imread(image)
img = cv2.resize(img, (input_dim,input_dim)).astype('float32')/255.0
for i in xrange(0,5):
img0.append(img)
img1.append(datagen.random_transform(img, seed=seed))
labels.append(1)
count += 1
if(count%1000==0):
print(count)
return np.array([img0, img1]), np.array(labels)
def create_dissimilar_image_data():
img0 = []
img1 = []
shows = []
labels = []
count = 0
for fldr in glob.iglob('images/*'):
shows.append([])
print "folder is: "+fldr
for image in glob.iglob(fldr+'/*.png'):
#print(image)
img = cv2.imread(image)
img = cv2.resize(img, (input_dim,input_dim)).astype('float32')/255.0
shows[-1].append(img)
count += 1
if(count%1000 == 0):
print count
print "show size ", len(shows)
print("creating_pairs")
for i in xrange(0,50000):
x1 = random.randint(0, len(shows)-1)
x2 = random.randint(0, len(shows)-1)
y1 = random.randint(0, len(shows[x1])-1)
y2 = random.randint(0, len(shows[x2])-1)
img0.append(shows[x1][y1])
img1.append(datagen.random_transform(shows[x2][y2], seed=seed))
labels.append(0)
if(i%1000==0):
print i
return np.array([img0, img1]), np.array(labels)
def create_pairs():
similar_pairs, labels_similar = create_similar_image_data()
dissimilar_pairs, labels_dissimilar = create_dissimilar_image_data()
return np.concatenate((similar_pairs, dissimilar_pairs), axis=1), np.concatenate((labels_similar, labels_dissimilar))
#return dissimilar_pairs, labels_dissimilar
# Note it needs around 12GB memory to create 112490 samples
train_X, train_Y = create_pairs()
np.save('train_X.npy', train_X)
np.save('train_Y.npy', train_Y)