-
Notifications
You must be signed in to change notification settings - Fork 0
/
cam_demo.py
110 lines (80 loc) · 3.06 KB
/
cam_demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import cv2
import numpy as np
import onnxruntime
from math import cos, sin
import datetime
def draw_axis(img, yaw, pitch, roll, tdx=None, tdy=None, size = 80):
pitch = pitch * np.pi / 180
yaw = -(yaw * np.pi / 180)
roll = roll * np.pi / 180
if tdx != None and tdy != None:
tdx = tdx
tdy = tdy
else:
height, width = img.shape[:2]
tdx = width / 2
tdy = height / 2
# X-Axis pointing to right. drawn in red
x1 = size * (cos(yaw) * cos(roll)) + tdx
y1 = size * (cos(pitch) * sin(roll) + cos(roll) * sin(pitch) * sin(yaw)) + tdy
# Y-Axis | drawn in green
# v
x2 = size * (-cos(yaw) * sin(roll)) + tdx
y2 = size * (cos(pitch) * cos(roll) - sin(pitch) * sin(yaw) * sin(roll)) + tdy
# Z-Axis (out of the screen) drawn in blue
x3 = size * (sin(yaw)) + tdx
y3 = size * (-cos(yaw) * sin(pitch)) + tdy
cv2.line(img, (int(tdx), int(tdy)), (int(x1),int(y1)),(0,0,255),3)
cv2.line(img, (int(tdx), int(tdy)), (int(x2),int(y2)),(0,255,0),3)
cv2.line(img, (int(tdx), int(tdy)), (int(x3),int(y3)),(255,0,0),2)
return img
def main():
ort_session = onnxruntime.InferenceSession("fsanet_epoch_95.onnx")
faceCascade = cv2.CascadeClassifier("haarcascade_frontalface_default.xml")
video_capture = cv2.VideoCapture(0)
ad = 0.6
while True:
# Capture frame-by-frame
ret, frame = video_capture.read()
img_h, img_w, _ = np.shape(frame)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
faces = faceCascade.detectMultiScale(
gray,
scaleFactor=1.1,
minNeighbors=5,
minSize=(30, 30),
flags=cv2.CASCADE_SCALE_IMAGE
)
for i, (x,y,w,h) in enumerate(faces):
#cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
x1 = x
y1 = y
x2 = x + w
y2 = y + h
xw1 = max(int(x1 - ad * w), 0)
yw1 = max(int(y1 - ad * h), 0)
xw2 = min(int(x2 + ad * w), img_w - 1)
yw2 = min(int(y2 + ad * h), img_h - 1)
img = frame[yw1:yw2 + 1, xw1:xw2 + 1, :]
img = cv2.resize(img, (64, 64))
img = np.transpose(img, [2, 0, 1])
img = np.expand_dims(img, axis=0)
img = img.astype(np.float32)
start = datetime.datetime.now()
ort_inputs = {ort_session.get_inputs()[0].name: img}
pred_labels = ort_session.run(None, ort_inputs)
finish = datetime.datetime.now()
#print("fsa-net duration : ", int((finish - start).total_seconds() * 1000))
pitch = pred_labels[0][0][0]
yaw = pred_labels[0][0][1]
roll = pred_labels[0][0][2]
draw_axis(frame, yaw, pitch, roll)
# Display the resulting frame
cv2.imshow('Video', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# When everything is done, release the capture
video_capture.release()
cv2.destroyAllWindows()
if __name__ == '__main__':
main()