-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodel.py
74 lines (71 loc) · 2.51 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
from Lstm import *
from Resnet152 import *
import keras
from keras.models import Sequential, Model
from keras.layers import Multiply, Dot, Merge, Input, multiply, dot, merge
"""
The model program to combine Resnet model and LSTM models
and create final model
"""
def concat_test(input):
a = input[0]
b = input[1]
return multiply([a, b])
def model(num_words, embedding_dim, num_classes):
img_mod = ResNet152(include_top = False, weights='imagenet', input_shape = (224, 224, 3))
for layer in img_mod.layers:
layer.trainable=False
lstm = Lstm(num_words = num_words, embedding_dim = embedding_dim)
# inpu1 = Input(shape=img_mod.output_shape)
# inpu2 = Input(shape=lstm.output_shape)
vqa = Lambda(concat_test, name='dot')([img_mod.output, lstm.output])
vqa = Dense(1000, activation='tanh', name = 'fin_fc_1')(vqa)
vqa = Dense(num_classes,activation='softmax', name = 'fin_fc_2')(vqa)
model = Model(inputs = [img_mod.input, lstm.input], outputs = vqa)
model.summary()
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
# print(model.output_shape)
return model
def move_right(array, length):
vec = np.zeros(np.shape(array))
n = array.shape[0]
vec[n - length:n] = array[:length]
return vec
"""
def cust_data():
img = np.array(image.load_img('cat.jpg', target_size=(224,224)))
#img_path = 'cat.jpg'
# x = image.img_to_array(img)
#x = np.expand_dims(x, axis=0)
#x = preprocess_input(x)
#model = VGG16(include_top = False, weights='imagenet')
#img = model.predict(x)
#t = np.sqrt(np.sum(np.multiply(img, img), axis = 1))
#img = np.divide(img, np.transpose(np.tile(t, (4096, 1))))
ques = [x.lower() for x in word_tokenize("What is the color of cat?")]
que = [0 for x in range(0,26)]
que_len = []
que_len.append(len(ques))
loc = 0
data = json.load(open("data/data_prepro.json"))
idx2word = data['ix_to_word']
for j in ques:
for i in idx2word.keys():
if idx2word[i] == j:
que[loc] = int(i)
loc+=1
break
que = np.array(que)
que_len = np.array(que_len)
que_check = move_right(que, que_len)
model = get_model(0.0, model_weights_filename)
print("Img dim: {}, que_check dim: {}".format(img.shape, que_check.shape))
value = model.predict([img, que_check])
print(value)
"""
if __name__ == "__main__":
meta_data = json.load(open('data_prepro.json', 'r'))
meta_data['ix_to_word'] = {str(word):int(i) for i,word in meta_data['ix_to_word'].items()}
num_words = len(meta_data['ix_to_word'])
num_classes = len(meta_data['ix_to_ans'])
vqa = model(num_words, 300, num_classes)