-
Notifications
You must be signed in to change notification settings - Fork 4
/
bebop_semantic_segmentation.py
93 lines (75 loc) · 2.71 KB
/
bebop_semantic_segmentation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import rospy
from sensor_msgs.msg import CameraInfo, Image
import ros_numpy
# real time semantic segmentation model with opencv with bebop camera.
import torch
import torchvision
import torch2trt
# was opencv issue, need to do this to not get import cv2 error
import sys
sys.path.remove('/opt/ros/kinetic/lib/python2.7/dist-packages')
import cv2
import numpy as np
# deeplabv3 semantic segmentation model
model = torchvision.models.segmentation.deeplabv3_resnet101(pretrained=True)
# throw the model onto gpu
model = model.cuda().eval().half()
# model wrapper
class ModelWrapper(torch.nn.Module):
def __init__(self, model):
super(ModelWrapper, self).__init__()
self.model = model
def forward(self, x):
# need to get 'out' for actual output
return self.model(x)['out']
model_w = ModelWrapper(model).half()
# sample data for getting the model
data = torch.ones((1, 3, 224, 224)).cuda().half()
# convert to tensorRT
model_trt = torch2trt.torch2trt(model_w, [data], fp16_mode=True)
device = torch.device('cuda')
# assigned means and standard deviations
mean = 255.0 * np.array([0.485, 0.456, 0.406])
stdev = 255.0 * np.array([0.229, 0.224, 0.225])
# normalizing effect
normalize = torchvision.transforms.Normalize(mean, stdev)
# pre-process image, set to right image format, transpose, normalize
def preprocess(camera_value):
global device, normalize
x = cv2.resize(camera_value, (224,224))
x = cv2.cvtColor(x, cv2.COLOR_BGR2RGB)
x = x.transpose((2,0,1))
x = torch.from_numpy(x).float()
x = normalize(x)
x = x.to(device)
x = x[None, ...]
return x
# run model and create mask image for objects
def process(camera_value):
output = model_trt(preprocess(camera_value).half())[0].detach()
output = output.cpu().float().numpy()
mask = 1.0 * (output.argmax(0) == 15)
res = mask[:, :, None] * cv2.resize(camera_value, (224,224))
return res
res = np.zeros((480, 856,3))
def vision_callback(data):
'''
vision callback for getting data from bebop image topic.
'''
global res
# create np array with correct dtype with ros_numpy package.
image_arr = ros_numpy.numpify(data)
# process with deeplab semantic segmentation
res = process(image_arr)
if __name__ == '__main__':
# bebop vision detection node
rospy.init_node('bebop_vision_segmentation')
# queue size 1 and large buffer size for images to avoid latency from ssd object detection.
rospy.Subscriber('/bebop/image_raw', Image, vision_callback, queue_size=1, buff_size=2**24)
while True:
# Capture frame-by-frame
# Display the resulting frame
cv2.imshow('frame',res)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
rospy.spin()