-
Notifications
You must be signed in to change notification settings - Fork 1
/
VGGSimpleRunner.py
108 lines (92 loc) · 3.63 KB
/
VGGSimpleRunner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import os
import sys
import cv2
from torchvision.models import VGG16_Weights
import torchvision.models as models
from PIL import Image
import csv
from torchvision import transforms
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import torch
from keras.models import load_model
import numpy as np
import time
import pandas as pd
import torch.nn.functional as F
from torch.autograd import Variable
from torch.nn import (
Linear, ReLU, CrossEntropyLoss, Sequential, Conv2d, MaxPool2d, Module, Softmax,
BatchNorm2d, Dropout
)
from torch.optim import Adam
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from sklearn import preprocessing
import glob
def main():
""" 1. initialize parameters and define which letters should recognize the model
2. load vgg16 model, which is located in current directory
3. start video capturing using cv2 library
4. press space to capture a photo ->
transform input photo (resize / crop / ..)
5. use the above output as input to our model and predict the letter
6. the letter will appear in terminal
7. this process (4-6) is repeated until user presses esc button
"""
# initialize parameters
analysisframe = ''
letter_rgb_l = []
letter_gray_l = []
pixels_l = []
le = preprocessing.LabelEncoder()
letters=['Gamma', 'Beta', 'Eta', 'Phi', 'Theta', 'Zeta']
le.fit(letters)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = torch.load('vgg16-transfer-final.pth', map_location=torch.device('cpu'))
model.eval()
cap = cv2.VideoCapture(0)
_, frame = cap.read()
while True:
_, frame = cap.read()
k = cv2.waitKey(1)
if k%256 == 27:
# ESC pressed
print("Escape hit, closing...")
break
elif k%256 == 32:
# SPACE pressed
analysisframe = frame
cv2.imshow("Frame", analysisframe)
analysisframe = Image.fromarray(analysisframe)
preprocess = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
analysisframe = preprocess(analysisframe)
analysisframe = analysisframe.unsqueeze(0).to(device)
device = next(model.parameters()).device
with torch.no_grad():
output = model(analysisframe.to(device))
softmax = F.softmax(output, dim=1)
top_two_values, top_two_indices = torch.topk(softmax, k=2, dim=1)
# Convert tensors to lists
top_two_values = top_two_values.tolist()[0]
top_two_indices = top_two_indices.tolist()[0]
# Retrieve probabilities for the top two predictions
top_two_probabilities = [softmax[0, index].item() for index in top_two_indices]
print("\n Top two predictions:")
for rank, (value, index, probability) in enumerate(zip(top_two_values, top_two_indices, top_two_probabilities)):
if rank+1==2:
indentation = "\t"
else:
indentation = ""
print(f"{indentation} {rank+1}.Prediction: {le.inverse_transform([index])}, Probability: {probability:.2f}")
framergb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
cv2.imshow("Frame", frame)
cap.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
main()