forked from tokee/juxta
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathimagenet_tsne_rasterfairy.py
executable file
·395 lines (339 loc) · 18.9 KB
/
imagenet_tsne_rasterfairy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
#!/usr/bin/env python3
#
# Requirements: keras tensorflow sklearn "numpy<1.17" (to avoid warnings fron tensorflow)
# pip3 install --prefer-binary -r Requirements.txt
#
# Or:
#
# python3 -m venv tsne
# source tsne/bin/activat
# pip install --upgrade pip
# pip install --prefer-binary -r Requirements.txt
#
# TODO:
# - Add support for other models
# Disable tensorflow warning about missing GPU support
# https://stackoverflow.com/questions/47068709/your-cpu-supports-instructions-that-this-tensorflow-binary-was-not-compiled-to-u
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import argparse
import sys
import glob
import os.path
import math
import rasterfairy
import numpy as np
import keras
from keras.models import Model
from keras.applications.imagenet_utils import decode_predictions, preprocess_input
from keras.preprocessing import image
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from PIL import Image as PILImage
def process_arguments(args):
parser = argparse.ArgumentParser(description='ML network analysis of images')
parser.add_argument('--images', nargs='+', action='store', required=True, help='images to analyze (image paths, file with list of images or glob')
parser.add_argument('--perplexity', action='store', default=30, help='perplexity of t-SNE (default 30)')
parser.add_argument('--learning_rate', action='store', default=150, help='learning rate of t-SNE (default 150)')
parser.add_argument('--components', action='store', default=300, help='components for PCA fit (default 300)')
parser.add_argument('--output', action='store', default='ml_out.json', help='output file for vectors and classifications (default ml_out.json)')
parser.add_argument('--grid_width', action='store', default=0, help='grid width measured in images. If not defined, it will be calculated towards having a (default) 2:1 aspect ratio')
parser.add_argument('--grid_height', action='store', default=0, help='grid height measured in images. If not defined, it will be calculated towards having a (default) 2:1 aspect ratio')
parser.add_argument('--aspect_ratio', action='store', default=2, help='if nither grid_width nor grid_height is specified, aim for this aspect ratio')
parser.add_argument('--scale_factor', action='store', default=100000, help='coordinates are multiplied with this before RasterFairy processing (do not change this unless you know what you are doing')
parser.add_argument('--render_tsne', action='store', default='', help='if defined, a colleage of the images positioned by their t-SNE calculated coordinates is rendered to the given file')
parser.add_argument('--render_width', action='store', default=5000, help='the width of the t-SNE render')
parser.add_argument('--render_height', action='store', default=5000, help='the height of the t-SNE render')
parser.add_argument('--render_part_width', action='store', default=100, help='the width of a single image on the full t-SNE render')
parser.add_argument('--render_part_height', action='store', default=100, help='the height of a single image on the full t-SNE render')
params = vars(parser.parse_args(args))
return params
#
# Load an image and prepare it for use with keras.
#
# https://stackoverflow.com/questions/47555829/preprocess-input-method-in-keras
def load_image(path, input_shape):
img = image.load_img(path, target_size=input_shape)
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)
return x
#
# Perform keras ML-analysis of the given images and return both the penultimate layer and the
# predictions.
# The penultimate layer can be used for finding distances between images as well as generating
# a visual representation of all images relative to each other, if the dimensionality is reduced
# (this is what happens in the reduce method).
#
# The predictions can be used directly for labelling images.
#
# https://towardsdatascience.com/visualising-high-dimensional-datasets-using-pca-and-t-sne-in-python-8ef87e7915b
def analyze(image_paths, output, penultimate_layer):
model = keras.applications.VGG16(weights='imagenet', include_top=True)
penultimate = model.get_layer(penultimate_layer).output
predictions = model.get_layer("predictions").output
feat_extractor = Model(inputs=model.input, outputs=[penultimate, predictions])
input_shape = model.input_shape[1:3] # 224, 224?
acceptable_image_paths = []
penultimate_features = []
prediction_features = []
predictionss = []
for index, path in enumerate(image_paths):
img = load_image(path, input_shape);
if img is not None:
print(" - Analyzing %d/%d: %s " % ((index+1),len(image_paths), path), flush=True)
features = feat_extractor.predict(img)
penultimate_features.append(features[0][0]) # 4096 dimensional
prediction_features.append(features[1][0]) # 1000 dimensional
acceptable_image_paths.append(path)
predictions = decode_predictions(features[1], top=10)[0]
predictionss.append(predictions)
else:
print(" - Image not available %d/%d: %s" % ((index+1),len(image_paths), path), flush=True)
return acceptable_image_paths, penultimate_features, prediction_features, predictionss
#
# Takes an array of t-SNE derived 2D coordinates and performs a lineary normalisation to the unit space,
# meaning all coordinates will be in the space defined by (0, 0), (1, 1) in floating point numbers.
# The method also produces an array with the coordinates lineary normalised to the space defined by
# (0, 0), (100000, 100000) in integer numbers.
#
# Normalised coordinates are used render presentations of the images relative to each other. The integer
# version is needed by RasterFairy to produce a grid of non-overlapping images.
#
def normalise_tsne(tsne_raws):
tsne_min = [ np.min(tsne_raws[:,d]) for d in range(2) ]
tsne_span = [ np.max(tsne_raws[:,d]) - tsne_min[d] for d in range(2) ]
tsne_norm = []
tsne_norm_int = []
for raw_point in tsne_raws:
norm = [float((raw_point[d] - tsne_min[d])/tsne_span[d]) for d in range(2) ]
tsne_norm.append(norm)
norm_int = [int(norm[d] * scale_factor) for d in range(2) ]
tsne_norm_int.append(norm_int)
return tsne_norm, tsne_norm_int
#
# Reduces the dimensionality of the given penultimate vectors to 2. This is a multi-step
# process with the first step being PCA (Principal Component Analysis), which is fast with
# fair quality, and the second step being t-SNE, which is slow with high quality.
#
# Reducing to 2 dimensions is used for visualisations where similar images are close to
# each other.
#
def reduce(penultimate_features, perplexity, learning_rate, pca_components, scale_factor):
# Reduce dimensions
# t-SNE is too costly to run on 4096-dimensional space, so we reduce with PCA first
image_count = len(penultimate_features)
# TODO: Shouldn't we just skip the PCA-step if there are less images than pca_components?
components = min(pca_components, image_count)
print(" - Running PCA on %d images with %d components..." % (image_count, components), flush=True)
features = np.array(penultimate_features)
pca = PCA(n_components=components)
pca_result = pca.fit_transform(features)
print(" - Running t-SNE on %d images with perplexity=%d, learning_rate=%d, n_iter=%d..." % (image_count, perplexity, learning_rate, 300), flush=True)
tsne = TSNE(n_components=2, verbose=1, perplexity=perplexity, learning_rate=learning_rate, n_iter=300)
tsne_raws = tsne.fit_transform(np.array(pca_result))
return normalise_tsne(tsne_raws)
#print(tsne_raws)
# [[ -6.464286 -77.81506 ]
# [-11.039936 35.283787]
# [-78.37078 -20.521582]
# [ 73.822014 50.5032 ]
# [ 74.64323 -41.658306]]
#
# Given a number of images, the width and height of a grid, capable of holding all the images,
# is returned. It is possible to affect the layout of the grid by specifying either width, height
# or aspect ratio.
#
# A grid layout is needed by RasterFairy to produce a grid of non-overlapping images.
#
def calculate_grid(image_count, grid_width, grid_height, aspect_ratio):
print(" - Checking grid for %d images with parameters grid_width=%s, grid_height=%s, aspect_ratio=%s..." % (image_count, grid_width, grid_height, aspect_ratio), flush=True)
if (grid_width == 0 and grid_height == 0):
print(" - Neither grid_width nor grid_height is specified. Calculating with intended aspect ratio " + str(aspect_ratio) + ":1")
grid_height = int(math.sqrt(image_count/aspect_ratio))
if (grid_height == 0):
grid_height = 1
grid_width = int(image_count / grid_height)
if (grid_width * grid_height < image_count):
grid_width += 1
elif( grid_width != 0 and grid_height != 0):
if (grid_width * grid_height < image_count):
sys.exit("Error: grid_width==" + str(grid_width) + " * grid_height==" + str(grid_height) + " == " + str(grid_width*grid_height) + " does not hold image_count==" + str(image_count))
if (grid_width * (grid_height-1) >= image_count):
print("Warning: grid_width==" + str(grid_width) + " * grid_height==" + str(grid_height) + " == " + str(grid_width*grid_height) + " is too large for image_count==" + str(image_count) + " images (rows can be skipped and rasterfair hangs on mismatched grid capacity)")
if ((grid_width-1) * grid_height >= image_count):
print("Warning: grid_width==" + str(grid_width) + " * grid_height==" + str(grid_height) + " == " + str(grid_width*grid_height) + " is too large for image_count==" + str(image_count) + " images (columns can be skipped and rasterfair hangs on mismatched grid capacity)")
print(" - grid_height==" + str(grid_height) + ", grid_width==" + str(grid_width))
elif (grid_width != 0):
grid_height = int(image_count/grid_width)
if (grid_height*grid_width < image_count):
grid_height += 1
print(" - grid_width==" + str(grid_width) + ", calculated grid_height==" + str(grid_height))
else: # grid_height != 0
grid_width = int(image_count/grid_height)
if (grid_width*grid_height < image_count):
grid_width += 1
print(" - grid_height==" + str(grid_height) + ", calculated grid_width==" + str(grid_width))
print(" - The " + str(image_count) + " images will be represented on a " + str(grid_width) + "x" + str(grid_height) + " grid", flush=True)
return grid_width, grid_height
#
# Takes the normalised image 2D coordinates and lays them on a grid, using RasterFairy.
# The positon of the images is returned as grid-coordinates.
#
# gridifying is needed for juxta as it operates in a grid-oriented world.
#
def gridify(tsne_norm_int, grid_width, grid_height):
print(" - Calling RasterFairy for " + str(len(tsne_norm_int)) + " images to a " + str(grid_width) + "x" + str(grid_height) + " grid", flush=True)
tsne = np.array(tsne_norm_int)
grid, gridShape = rasterfairy.transformPointCloud2D(tsne, target=(grid_width, grid_height))
return grid
#
# Merges & sorts all structures according to the given grid layout.
# The resulting list of image-structures is ordered left->right, top->down, ready for use
# with juxta or similar tool that expects the images to be ordered.
#
def merge(grid, tsne_norm, acceptable_image_paths, penultimate_features, prediction_features, predictions):
merged = []
for i, path in enumerate(acceptable_image_paths):
merged.append({
'path': path,
'position_norm': tsne_norm[i],
'position_grid': grid[i],
'penultimate': penultimate_features[i],
'prediction_features': prediction_features[i],
'predictions': predictions[i]
})
# Column is secondary, row is primary - Python sort is stable; it does not change order on equal keys
merged.sort(key = lambda obj: obj['position_grid'][0])
merged.sort(key = lambda obj: obj['position_grid'][1])
return merged
#
# Store the full structure (image path, penultimate layer, predictions, raw 2D position, grid position)
# as JSON structures, one image/line.
#
# This can be used for further processing in external tools, such as juxta.
#
def store(merged, penultimate_layer, grid_width, grid_height, output):
out = open(output, "w")
for i, element in enumerate(merged):
if (i != 0):
out.write('\n')
out.write('{ "path":"' + element['path'] + '", ')
# TODO: Remember to make this a variable when the script is extended to custom networks
out.write('"network": "imagenet", ')
out.write('"norm_x": ' + str(element['position_norm'][0]) + ', ')
out.write('"norm_y": ' + str(element['position_norm'][1]) + ', ')
out.write('"grid_x": ' + str(int(element['position_grid'][0])) + ', ')
out.write('"grid_y": ' + str(int(element['position_grid'][1])) + ', ')
try:
predictions = element['predictions']
out.write('"predictions": [')
out.write(','.join((' {"designation":"' + str(c[1]) + '", "probability":' + str(c[2]) + ', "internalID":"' + str(c[0])+ '"}') for c in predictions))
out.write("], ")
except ValueError:
print(" - No predictions")
try:
prediction_features = element['prediction_features']
out.write('"prediction_vector": [' + ','.join(str(f) for f in prediction_features) + "], ")
except ValueError:
print(" - No prediction vector")
penultimate = element['penultimate']
out.write('"penultimate_vector_layer": "' + penultimate_layer + '", ')
out.write('"penultimate_vector": [' + ','.join(str(f) for f in penultimate) + "]")
out.write("}")
out.write("\n")
out.close()
print("Stored result in '" + output + "', generate collage with grid dimensions " + str(grid_width) + "x" + str(grid_height), flush=True)
#
# Generate a collage with the given images at the raw 2D positions produced by the reduce method.
#
# This is useful for inspecting relative image similarity, a quantity that is severely hobbled
# when deing a juxta grid based rendering.
#
def render(merged, render_tsne, render_width, render_height, render_part_width, render_part_height):
if (render_tsne == ''):
return
print(" - Generating collage from raw t-SNE coordinates to " + render_tsne, flush=True)
tsne_image = PILImage.new('RGBA', (render_width, render_height))
for element in merged:
path = element['path']
norm = element['position_norm']
print(" - " + path, flush=True)
img = PILImage.open(path)
divisor = max(img.width/render_part_width, img.height/render_part_height)
img = img.resize( (int(img.width/divisor), int(img.height/divisor)), PILImage.LANCZOS)
tsne_image.paste(img, (int(norm[0]*(render_width-render_part_width)), int(norm[1]*(render_height-render_part_height))), mask=img.convert('RGBA'))
tsne_image.save(render_tsne);
print(" - Collage generated from raw t-SNE coordinates and stored as " + render_tsne)
#
# Takes externally calculated penultimage vectors, reduced to 2D and lays it on a grid.
#
# The input format is one image/line, where each line contains
# image_path;coordinate[,coordinate]*
def process_external(path_vectors, grid_width, grid_height, aspect_ratio, perplexity, learning_rate, pca_components, scale_factor):
print(" - Parsing " + str(len(path_vectors)) + " entries")
image_paths = []
vectorss = []
for path_vector in path_vectors:
path, vector = path_vector.split(';', 2)
image_paths.append(path)
vectorss.append([float(str) for str in vector.split(',')])
tsne_norm, tsne_norm_int = reduce(vectorss, perplexity, learning_rate, pca_components, scale_factor)
# print(str(tsne_norm_int))
grid_width, grid_height = calculate_grid(len(tsne_norm_int), grid_width, grid_height, aspect_ratio)
grid = gridify(tsne_norm_int, grid_width, grid_height)
merged = []
for i, path in enumerate(image_paths):
merged.append({
'path': path,
'position_norm': tsne_norm[i],
'position_grid': grid[i],
'penultimate': vectorss[i]
})
# Column is secondary, row is primary - Python sort is stable; it does not change order on equal keys
merged.sort(key = lambda obj: obj['position_grid'][0])
merged.sort(key = lambda obj: obj['position_grid'][1])
store(merged, "Unknown", grid_width, grid_height, output)
if __name__ == '__main__':
params = process_arguments(sys.argv[1:])
image_paths = params['images']
# If the images-argument is a string instead of an existing file, try globbing it
if len(image_paths) == 1:
if os.path.isfile(image_paths[0]):
print("Using entries listed in " + image_paths[0][:75] + " as input")
file = open(image_paths[0], 'r')
image_paths = [line.strip() for line in file.readlines()]
else:
print("Globbing '" + image_paths[0] + "'")
image_paths = glob.glob(os.path.expanduser(image_paths[0]))
if len(image_paths) == 0:
print("Error: 0 images resolved")
sys.exit()
perplexity = int(params['perplexity'])
learning_rate = int(params['learning_rate'])
pca_components = int(params['components'])
output = params['output']
penultimate_layer = "fc2"
# RasterFairy arguments
grid_width = params['grid_width']
grid_width = int(grid_width)
grid_height = params['grid_height']
grid_height = int(grid_height)
aspect_ratio = float(params['aspect_ratio'])
scale_factor = params['scale_factor']
render_tsne = params['render_tsne']
render_width = int(params['render_width'])
render_height = int(params['render_height'])
render_part_width = int(params['render_part_width'])
render_part_height = int(params['render_part_height'])
if (image_paths[0].find(";") != -1 and image_paths[0].find(",") != -1):
print("The input content '" + image_paths[0][:75] + "...' looks like image_paths and vectors. Skipping network analysis")
process_external(image_paths, grid_width, grid_height, aspect_ratio, perplexity, learning_rate, pca_components, scale_factor)
else:
acceptable_image_paths, penultimate_features, prediction_features, predictions = analyze(image_paths, output, penultimate_layer)
tsne_norm, tsne_norm_int = reduce(penultimate_features, perplexity, learning_rate, pca_components, scale_factor)
grid_width, grid_height = calculate_grid(len(tsne_norm_int), grid_width, grid_height, aspect_ratio)
grid = gridify(tsne_norm_int, grid_width, grid_height)
merged = merge(grid, tsne_norm, acceptable_image_paths, penultimate_features, prediction_features, predictions)
render(merged, render_tsne, render_width, render_height, render_part_width, render_part_height)
store(merged, penultimate_layer, grid_width, grid_height, output)