Skip to content

Commit

Permalink
Merge pull request #98 from una-auxme/86-new-segmentation-node
Browse files Browse the repository at this point in the history
86 new segmentation node
  • Loading branch information
okrusch committed Nov 27, 2023
2 parents 3c8516b + 4272dfc commit 090cbc4
Show file tree
Hide file tree
Showing 4 changed files with 288 additions and 0 deletions.
17 changes: 17 additions & 0 deletions code/agent/config/rviz_config.rviz
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,23 @@ Visualization Manager:
PointCloud2: true
Value: true
Zoom Factor: 1
- Class: rviz/Image
Enabled: true
Image Rendering: background and overlay
Image Topic: /paf/hero/Center/segmented_image
Name: VisonNode Output
Overlay Alpha: 0.5
Queue Size: 2
Transport Hint: raw
Unreliable: false
Value: true
Visibility:
Grid: true
Imu: true
Path: true
PointCloud2: true
Value: true
Zoom Factor: 1
- Alpha: 1
Class: rviz_plugin_tutorials/Imu
Color: 204; 51; 204
Expand Down
14 changes: 14 additions & 0 deletions code/perception/launch/perception.launch
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,20 @@
<param name="role_name" value="$(arg role_name)" />
</node>

<node pkg="perception" type="vision_node.py" name="VisionNode" output="screen">
<param name="role_name" value="$(arg role_name)" />
<param name="side" value="Center" />
<!--
Object-Detection:
- fasterrcnn_resnet50_fpn_v2
- fasterrcnn_mobilenet_v3_large_320_fpn
Image-Segmentation:
- deeplabv3_resnet101
-->
<param name="model" value="fasterrcnn_resnet50_fpn_v2" />
</node>

<node pkg="perception" type="global_plan_distance_publisher.py" name="GlobalPlanDistance" output="screen">
<param name="control_loop_rate" value="0.1" />
<param name="role_name" value="$(arg role_name)" />
Expand Down
179 changes: 179 additions & 0 deletions code/perception/src/vision_node.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
#!/usr/bin/env python3

from ros_compatibility.node import CompatibleNode
import ros_compatibility as roscomp
import torch
from torchvision.models.segmentation import DeepLabV3_ResNet101_Weights, \
deeplabv3_resnet101
from torchvision.models.detection.faster_rcnn import \
FasterRCNN_MobileNet_V3_Large_320_FPN_Weights, \
FasterRCNN_ResNet50_FPN_V2_Weights, \
fasterrcnn_resnet50_fpn_v2, \
fasterrcnn_mobilenet_v3_large_320_fpn
import torchvision.transforms as t
import cv2
from rospy.numpy_msg import numpy_msg
from sensor_msgs.msg import Image as ImageMsg
from std_msgs.msg import Header
from cv_bridge import CvBridge
from torchvision.utils import draw_bounding_boxes, draw_segmentation_masks
import numpy as np
from time import perf_counter
"""
VisionNode:
The vision node provides a base node for object-detection
or image-segementation.
This node provides the following features:
- Insert pretrained AI-Models
- Subscription to one camera
- Preprocessing of Input Image
- Publishing output image
"""


class VisionNode(CompatibleNode):
def __init__(self, name, **kwargs):
# vision node
super().__init__(name, **kwargs)
self.model_dict = {
"fasterrcnn_resnet50_fpn_v2":
(fasterrcnn_resnet50_fpn_v2(
weights=FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT),
FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT,
"detection",
"pyTorch"),
"fasterrcnn_mobilenet_v3_large_320_fpn":
(fasterrcnn_mobilenet_v3_large_320_fpn(
weights=FasterRCNN_MobileNet_V3_Large_320_FPN_Weights.DEFAULT),
FasterRCNN_MobileNet_V3_Large_320_FPN_Weights.DEFAULT,
"detection",
"pyTorch"),
"deeplabv3_resnet101":
(deeplabv3_resnet101(
weights=DeepLabV3_ResNet101_Weights.DEFAULT),
DeepLabV3_ResNet101_Weights.DEFAULT,
"segmentation",
"pyTorch")
}

# general setup
self.bridge = CvBridge()
self.role_name = self.get_param("role_name", "hero")
self.side = self.get_param("side", "Center")
# self.device = torch.device("cuda"
# if torch.cuda.is_available() else "cpu") Cuda Memory Issues
self.device = torch.device("cpu")
print("VisionNode working on: ", self.device)

# publish / subscribe setup
self.setup_camera_subscriptions()
self.setup_camera_publishers()
self.image_msg_header = Header()
self.image_msg_header.frame_id = "segmented_image_frame"

# model setup
model_info = self.model_dict[self.get_param("model")]
self.model = model_info[0]
self.weights = model_info[1]
self.type = model_info[2]
self.framework = model_info[3]
print("Vision Node Configuration:")
print(f"Model -> {self.get_param('model')},")
print(f"Type -> {self.type}, Framework -> {self.framework}")
self.model.to(self.device)

def setup_camera_subscriptions(self):
self.new_subscription(
msg_type=numpy_msg(ImageMsg),
callback=self.handle_camera_image,
topic=f"/carla/{self.role_name}/{self.side}/image",
qos_profile=1
)

def setup_camera_publishers(self):
self.publisher = self.new_publisher(
msg_type=numpy_msg(ImageMsg),
topic=f"/paf/{self.role_name}/{self.side}/segmented_image",
qos_profile=1
)

def handle_camera_image(self, image):
startTime = perf_counter()
self.model.eval()
cv_image = self.bridge.imgmsg_to_cv2(img_msg=image,
desired_encoding='passthrough')
cv_image = cv2.cvtColor(cv_image, cv2.COLOR_RGB2BGR)

preprocess = t.Compose([
t.ToTensor(),
t.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])

input_image = preprocess(cv_image).unsqueeze(dim=0)
input_image = input_image.to(self.device)
print("Before Model: ", perf_counter() - startTime)
prediction = self.model(input_image)
print("After Model: ", perf_counter() - startTime)
if (self.type == "detection"):
vision_result = self.apply_bounding_boxes(cv_image, prediction[0])
if (self.type == "segmentation"):
vision_result = self.create_mask(cv_image, prediction['out'])

img_msg = self.bridge.cv2_to_imgmsg(vision_result,
encoding="passthrough")
img_msg.header = image.header

self.publisher.publish(img_msg)
print("After Publish: ", perf_counter() - startTime)

pass

def create_mask(self, input_image, model_output):
output_predictions = torch.argmax(model_output, dim=0)

for i in range(21):
output_predictions[i] = output_predictions[i] == i

output_predictions = output_predictions.to(dtype=torch.bool)

input_image = t.ToTensor()(input_image)
input_image = input_image.to(dtype=torch.uint8)
print(output_predictions.shape)
print(input_image.shape)
segmented_image = draw_segmentation_masks(input_image,
output_predictions)
cv_segmented = cv2.cvtColor(segmented_image.detach().numpy(),
cv2.COLOR_BGR2RGB)
return cv_segmented

def apply_bounding_boxes(self, input_image, model_output):
transposed_image = np.transpose(input_image, (2, 0, 1))
image_np_with_detections = torch.tensor(transposed_image,
dtype=torch.uint8)
boxes = model_output['boxes']
# scores = model_output['scores']
labels = [self.weights.meta["categories"][i]
for i in model_output['labels']]

box = draw_bounding_boxes(image_np_with_detections,
boxes,
labels,
colors='red',
width=2)
np_box_img = np.transpose(box.detach().numpy(),
(1, 2, 0))
box_img = cv2.cvtColor(np_box_img, cv2.COLOR_BGR2RGB)
return box_img

def run(self):
self.spin()
pass


if __name__ == "__main__":
roscomp.init("VisionNode")
node = VisionNode("VisionNode")
node.run()
78 changes: 78 additions & 0 deletions doc/06_perception/07_vision_node.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# Vision Node

The Visison Node serves as a replacement for the previous segmentation-node.
It provides an adaptive interface that is able to perform object-detection or image-segmentation
on several different models. The model can be specified as a parameter in the perception.launch file.

## Usage

The following code shows how the Vision-Node is specified in perception.launch

`
<node pkg="perception" type="vision_node.py" name="VisionNode" output="screen">
<param name="role_name" value="$(arg role_name)" />
<param name="side" value="Center" />
<!--
Object-Detection:
- fasterrcnn_resnet50_fpn_v2
- fasterrcnn_mobilenet_v3_large_320_fpn
Image-Segmentation:
- deeplabv3_resnet101
-->
<param name="model" value="deeplabv3_resnet101" />
</node>
`

Depending on preferences and targets a different model can be used by replacing the value of the model parameter
by one of the lines from the comment above.

The Vision-Node will automatically switch between object-detection, imagesegmentation, load the correct weights and perform the correct preprocessing.

For now the Vision-Node only supports pyTorch models. Within the next sprint it should be able to
accept other frameworks aswell. It should also be possible to run object-detection and image-segmentation at the same time.

## How it works

### Initialization

The Vision-Node contains a Dictionary with all it's models. Depending on the model parameter it will initialize the correct model and weights.

`
self.model_dict = {
"fasterrcnn_resnet50_fpn_v2": (fasterrcnn_resnet50_fpn_v2(weights=FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT), FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT, "detection", "pyTorch"),
"fasterrcnn_mobilenet_v3_large_320_fpn": (fasterrcnn_mobilenet_v3_large_320_fpn(weights=FasterRCNN_MobileNet_V3_Large_320_FPN_Weights.DEFAULT), FasterRCNN_MobileNet_V3_Large_320_FPN_Weights.DEFAULT, "detection", "pyTorch"),
"deeplabv3_resnet101": (deeplabv3_resnet101(weights=DeepLabV3_ResNet101_Weights.DEFAULT), DeepLabV3_ResNet101_Weights.DEFAULT, "segmentation", "pyTorch")
}
`

### Core

The core of the Vision-Node is the handle_camera_image function.
This function is automatically triggered by the Camera-Subscriber of the Vision-Node and performs the following steps:

1. Convert ImageMsg to CV2-Image
2. Perform preprocessing on CV2-Image
3. Forward image through model
4. Call further processing function for output depending on type
1. Detection -> apply_bounding_boxes
2. Segmentation -> create_mask
5. Convert CV2-Image to ImageMsg
6. Publish ImageMsg over ImagePublisher

## Visualization

The Vision-Node implements an ImagePublisher under the topic: "/paf//Center/segmented_image"

The Configuartion File of RViz has been changed accordingly to display the published images alongside with the Camera.

## Known Issues

### Time

First experiments showed that the handle_camera_image function is way to slow to be used reliably. It takes around 1.5 seconds to handle one image.

Right now the Vision-Node is not using cuda due to cuda-memory-issues that couldn't be fixed right away.

The performance is expected to rise quite a bit when using cuda.

Also their is lots more room for testing different models inside the Vision-Node to evualte their accuracy and time-performance.

0 comments on commit 090cbc4

Please sign in to comment.