-
Notifications
You must be signed in to change notification settings - Fork 0
/
tomjerry.py
129 lines (100 loc) · 4.17 KB
/
tomjerry.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import cv2 # for capturing videos
import math # for mathematical operations
import matplotlib.pyplot as plt # for plotting the images %matplotlib inline
import pandas as pd
from keras.preprocessing import image # for preprocessing the images
import numpy as np # for mathematical operations
from keras.utils import np_utils
from skimage.transform import resize # for resizing images
count = 0
videoFile = "Tom and jerry.mp4"
cap = cv2.VideoCapture(videoFile) # capturing the video from the given path
frameRate = cap.get(5) #frame rate
x=1
while(cap.isOpened()):
frameId = cap.get(1) #current frame number
ret, frame = cap.read()
if (ret != True):
break
if (frameId % math.floor(frameRate) == 0):
filename ="frame%d.jpg" % count;count+=1
cv2.imwrite(filename, frame)
cap.release()
print ("Done!")
img = plt.imread('frame0.jpg') # reading image using its name
plt.imshow(img)
data = pd.read_csv('mapping.csv') # reading the csv file
data.head() # printing first five rows of the file
X = [ ] # creating an empty array
for img_name in data.Image_ID:
img = plt.imread('' + img_name)
X.append(img) # storing each image in array X
X = np.array(X) # converting list to array
y = data.Class
dummy_y = np_utils.to_categorical(y) # one hot encoding Classes
image = []
for i in range(0,X.shape[0]):
a = resize(X[i], preserve_range=True, output_shape=(224,224)).astype(int) # reshaping to 224*224*3
image.append(a)
X = np.array(image)
from sklearn.model_selection import train_test_split
X_train, X_valid, y_train, y_valid = train_test_split(X, dummy_y, test_size=0.3, random_state=42) # preparing the validation set
from keras.models import Sequential
from keras.applications.vgg16 import VGG16
from keras.layers import Dense, InputLayer, Dropout
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3)) # include_top=False to remove the top layer
X_train = base_model.predict(X_train)
X_valid = base_model.predict(X_valid)
X_train.shape, X_valid.shape
X_train = X_train.reshape(208, 7*7*512) # converting to 1-D
X_valid = X_valid.reshape(90, 7*7*512)
train = X_train/X_train.max() # centering the data
X_valid = X_valid/X_train.max()
# i. Building the model
model = Sequential()
model.add(InputLayer((7*7*512,))) # input layer
model.add(Dense(units=1024, activation='sigmoid')) # hidden layer
model.add(Dense(3, activation='softmax')) # output layer
model.summary()
# ii. Compiling the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# iii. Training the model
model.fit(train, y_train, epochs=100, validation_data=(X_valid, y_valid))
count = 0
videoFile = "Tom and Jerry 3.mp4"
cap = cv2.VideoCapture(videoFile)
frameRate = cap.get(5) #frame rate
x=1
while(cap.isOpened()):
frameId = cap.get(1) #current frame number
ret, frame = cap.read()
if (ret != True):
break
if (frameId % math.floor(frameRate) == 0):
filename ="test%d.jpg" % count;count+=1
cv2.imwrite(filename, frame)
cap.release()
print ("Done!")
test = pd.read_csv('test.csv')
test_image = []
for img_name in test.Image_ID:
img = plt.imread('' + img_name)
test_image.append(img)
test_img = np.array(test_image)
test_image = []
for i in range(0,test_img.shape[0]):
a = resize(test_img[i], preserve_range=True, output_shape=(224,224)).astype(int)
test_image.append(a)
test_image = np.array(test_image)
from keras.applications.vgg16 import preprocess_input
# preprocessing the images
test_image = preprocess_input(test_image, mode='tf')
# extracting features from the images using pretrained model
test_image = base_model.predict(test_image)
# converting the images to 1-D form
test_image = test_image.reshape(186, 7*7*512)
# zero centered images
test_image = test_image/test_image.max()
predictions = model.predict_classes(test_image)
print("The screen time of JERRY is", predictions[predictions==1].shape[0], "seconds")
print("The screen time of TOM is", predictions[predictions==2].shape[0], "seconds")