-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun.py
104 lines (77 loc) · 3.06 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import numpy as np
import pandas as pd
import tensorflow as tf
from pathlib import Path
from tqdm.auto import tqdm
from skimage.transform import resize
import random
from utils import CFG as utils
class _paths:
_queries = pd.read_csv('data/queries.csv')
_test = pd.read_csv('data/test.csv')
test_path = [Path('data/test/{}.png'.format(id)) for id in _test.idx]
queries_path = [Path('data/queries/{}.png'.format(id)) for id in _queries.idx]
class ImgData(object):
def __init__(self, path=None):
self.path = path
def read_image(self, path: str) -> tf.Tensor:
'''
Channels : 3
------------------
1.Reading File
2. Decode([6] photo have 4 channels)
3. Change type to tf.float32
4. (3, 512, 512) -> (1, 3, 512, 512) ()
'''
img = tf.io.read_file(path)
img = tf.image.decode_image(img, channels=3, expand_animations=False)
img = tf.image.convert_image_dtype(img, dtype=tf.float32)
img = tf.expand_dims(img, axis=0)
return img
def _embFeatures(self, model) -> np.ndarray:
'''
from_tensor_slices : make dataset dimension
ParallelMapDataset : creates a dataset to apply
'''
_files = list(map(str, self.path))
_files = tf.data.Dataset.from_tensor_slices(_files)
_files = _files.map(self.read_image,
num_parallel_calls=tf.data.AUTOTUNE)
_files = _files.map(utils.resize_images,
num_parallel_calls=tf.data.AUTOTUNE) # resize to 512x512
_files = _files.prefetch(10)
# get embeddings & l2 norm
_emb = _files.map(model)
_emb = _emb.map(utils._l2)
_total = len(_emb)
_emb = _emb.as_numpy_iterator()
_emb = tqdm(_emb, total=_total)
_emb = list(_emb)
_emb = np.concatenate(_emb)
return _emb
model = tf.keras.models.load_model('./weights/')
test_images = ImgData(_paths.test_path)
queries_images = ImgData(_paths.queries_path)
test_emb = test_images._embFeatures(model)
queries_emb = queries_images._embFeatures(model)
def find_similar(queries_emb,test_emb,count_predict):
scores = utils.cosine_distance(queries_emb,test_emb)
nq, ndb = scores.shape
sorted_scores = scores.argsort(axis=1)[:, ::-1] # sort descending per row
topk = sorted_scores[:, : count_predict] # get **indices** of the topk images for each row
topk_scores = np.stack([scores[i, topk[i]] for i in range(nq)]) # get topk scores (comprehensible version)
return topk, count_predict, topk_scores
topk, k, topk_scores= find_similar(queries_emb,test_emb,10)
predicted = []
batch = []
for item in topk:
for index in item:
batch.append(_paths._test.idx.iloc[index])
predicted.append(batch)
batch = []
predicted = np.array(predicted)
pred_data = pd.DataFrame()
pred_data['score'] = topk_scores.flatten()
pred_data.loc[:, 'query_idx'] = np.repeat(_paths._queries.idx, 10).values
pred_data['database_idx'] = predicted.flatten()
pred_data.to_csv('data/submission.csv', index=False)