-
Notifications
You must be signed in to change notification settings - Fork 0
/
utility.py
executable file
·273 lines (231 loc) · 8.75 KB
/
utility.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
# -*- coding: utf-8 -*-
"""Utility.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1jcypA8uS80c0GXIbUpPtR9cIsMCULafP
"""
import numpy as np
import os, cv2
#import keras
from tensorflow.keras import applications
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import optimizers
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.layers import *
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
import sys
from utils import detector_utils
detection_graph, sess = detector_utils.load_inference_graph()
def duplicator(frames, sequenceLength):
inLen = len(frames)
outLen = sequenceLength
multiplier = outLen//inLen
fraction = outLen/inLen - multiplier
accumulator = 0
outVideo = []
for frame in frames:
for i in range(multiplier):
outVideo.append(frame)
accumulator+=fraction
if(accumulator >= 1):
outVideo.append(frame)
accumulator-=1
while(len(outVideo) < outLen):
outVideo.append(frames[-1])
return outVideo
def video_to_descriptors(X):
conv_base = VGG16(weights='imagenet',include_top=False,input_shape=(64,64,3))
flat1 = Flatten()(conv_base.layers[-1].output)
model = Model(inputs = conv_base.inputs,outputs = flat1)
output = model.predict(X)
return output
def preprocess(video_path):
_, frames = frames_extraction(video_path, 0 , [0])
frames = np.array(frames)
# print("Shape of frame_extraction output:",frames.shape)
if frames.shape[0] == 0:
raise Exception("Input Video does not contain hands.")
descriptor = video_to_descriptors(frames)
return descriptor
def video_split(video_path):
cap = cv2.VideoCapture(video_path) #capture video from webcam
if not cap.isOpened():
print('Cannot access video!!')
exit()
count = 0
X = []
sequence_length = 72
ret = True
while cap.isOpened():
frames_list = [] #Empty list to store frame sequences
while count < sequence_length: #Record till specific sequence length is reached
ret, frame = cap.read() #Read from the webcam
if ret == False:
break
frames_list.append(frame) #Append frame to the list
count += 1
if len(frames_list) >= 36:
X.append(frames_list)
count = 0
if ret == False:
break
#print('Shape of output',len(X))
cap.release()
X = np.array(X)
#print('Shape of output',X.shape)
return X
# videos = '/content/drive/MyDrive/BE Project/New Words/kill'
# videos_list = [os.path.abspath(os.path.join(videos,video_path)) for video_path in os.listdir(videos)]
# print(videos_list)
def video_cropper(frames,video_path,starting_frame):
#starting_frame = int(input("Enter the Starting Frame of {}".format(video_path)))
end = 0
flag = 0
inLen = len(frames)
i = starting_frame
if starting_frame + 72 > inLen:
end = inLen
flag = 1
else :
end = i + 72
outVideo = []
while i < end:
outVideo.append(frames[i])
i += 1
if flag == 1:
return duplicator(outVideo, seq_len)
return outVideo
# imp# Creating frames from videos
def frames_extraction(video_path,idx = 0,starting_frames = [0]):
frames_list = []
img_height, img_width = 64, 64
seq_len = 72
sf = starting_frames[idx]
vidObj = cv2.VideoCapture(video_path)
# print(video_path)
seed = 420
styles = 5
# Used as counter variable
#count = 1
#fc is the frame count that needs to be maintained in case the number of frames is above 72 but by elimination of frames through hand recognition
#educes the number of frames to have below 72, which will cause an alignment issue with the starting_frames array
fc = 0
noLimbCount = 0
idx_flag = False
detected_frames = 0
# detection_graph, sess = detector_utils.load_inference_graph()
counter = 0 # Goes till 10 frames
faceFlag = False
while vidObj.isOpened():
success, imageRGB = vidObj.read()
if success:
fc += 1
#for face removal ignore
#image = image[int(image.shape[0]/4):]
image = cv2.cvtColor(imageRGB, cv2.COLOR_BGR2RGB)
# Check image for face
face = detector_utils.face_in_frame(image, None, False)
if counter < 10:
faceFlag = faceFlag or face
counter+=1
else:
if not faceFlag:
raise Exception("Face not in Frame")
# If the face is already recorded, dont overwrite it.
# Also write the image to disk only if faceFlag test has passed before and the current image has a face.
if not os.path.isfile('face.jpeg') and face and faceFlag:
cv2.imwrite('face.jpeg', imageRGB)
img_w, img_h = vidObj.get(3),vidObj.get(4)
boxes, scores = detector_utils.detect_objects(image, detection_graph, sess)
#box scores are already sorted, they have their values in the order of t,l,b,r
box1, score1 = boxes[0], scores[0]
box2, score2 = boxes[1], scores[1]
#we have set the threshold at 0.3 , it can be changed
#in case there are 2 hands/boxes
if scores[0] >= 0.3 and scores[1] >= 0.3:
l1,r1,t1,b1 = box1[1],box1[3],box1[0],box1[2]
l2, r2, t2, b2 = box2[1], box2[3], box2[0], box2[2]
l = 0
r = 0
t = 0
b = 0
#this makes the smallest box that fits both the boxes
if l1 < l2:
l = l1
else:
l = l2
if b1 > b2:
b = b1
else:
b = b2
if t1 > t2:
t = t2
else:
t = t1
if r1 < r2:
r = r2
else:
r = r1
box = [t,l,b,r]
#cropping the image
t,l,b,r = t*img_h,l*img_w,b*img_h,r*img_w
# print("t: {},l : {} , b : {}, r : {}".format(t,l,b,r))
image = image[int(t):int(b),int(l):int(r)]
# print(image.shape)
image = cv2.resize(image, (img_height, img_width))
#cv2_imshow(image)
frames_list.append(image)
detected_frames += 1
#for only one hand
elif scores[0] >= 0.3:
l1,r1,t1,b1 = box1[1]*img_w,box1[3]*img_w,box1[0]*img_h,box1[2]*img_h
l1,r1,t1,b1 = int(l1),int(r1),int(t1),int(b1)
image = image[t1:b1,l1:r1]
image = cv2.resize(image, (img_height, img_width))
frames_list.append(image)
# print("t: {},l : {} , b : {}, r : {}".format(t1,l1,b1,r1))
# print(image.shape)
#cv2_imshow(image)
detected_frames += 1
#totally jashs idiocy ignore
elif scores[1] >= 0.3:
l2,r2,t2,b2 = box2[1]*img_w,box2[3]*img_w,box2[0]*img_h,box2[2]*img_h
image = image[int(t2):int(b2),int(l2):int(r2)]
image = cv2.resize(image, (img_height, img_width))
frames_list.append(image)
#cv2_imshow(image)
detected_frames += 1
#in case there are no hands we chill and dont add any part of the frame
else:
# if noLimbCount < 3:
#cv2_imshow(image)
# print("No limbic appendages detected!")
noLimbCount += 1
if fc < starting_frames[idx]:
sf -= 1
else:
#print("Defected/Last frame")
break
if len(frames_list) < seq_len and detected_frames > 0:
frames_list = duplicator(frames_list,seq_len)
elif len(frames_list) > seq_len and detected_frames > 0:
frames_list = video_cropper(frames_list, video_path,sf)
idx_flag = True
#in case there are videos with more than 72 but did not go into video cropper because hands were not detected
if fc > 72:
idx_flag = True
return idx_flag,frames_list
model = load_model('Model/VGG16_20_words_2LSTM_13052021_9011_accuracy.h5')
classes = ['cold','crash','doctor', 'give', 'medicine', 'no', 'police', 'woman', 'yes','animal','child','danger','help','home','kill','please','rob','send','sick','want']
def predict(descriptor):
descriptor.shape = (1, 72, 2048)
y_pred = model.predict(descriptor)
index = int(y_pred.argmax(axis=1)[0])
confidence = y_pred[0][index]
if(confidence >= 0.25):
class_name = classes[index]
return class_name
else:
return "'UnClassified'"