From f1dc1cc5c9ec694a2e43e132242a2e27a5b25888 Mon Sep 17 00:00:00 2001 From: Leon Okrusch Date: Fri, 24 Nov 2023 12:05:04 +0100 Subject: [PATCH 1/3] feat(#86): Added new segmentation/detection node --- code/agent/config/rviz_config.rviz | 17 +++ code/perception/launch/perception.launch | 11 +- code/perception/src/vision_node.py | 164 +++++++++++++++++++++++ doc/06_perception/07_vision_node.md | 81 +++++++++++ 4 files changed, 272 insertions(+), 1 deletion(-) create mode 100755 code/perception/src/vision_node.py create mode 100644 doc/06_perception/07_vision_node.md diff --git a/code/agent/config/rviz_config.rviz b/code/agent/config/rviz_config.rviz index 60da1674..7064cc88 100644 --- a/code/agent/config/rviz_config.rviz +++ b/code/agent/config/rviz_config.rviz @@ -69,6 +69,23 @@ Visualization Manager: PointCloud2: true Value: true Zoom Factor: 1 + - Class: rviz/Image + Enabled: true + Image Rendering: background and overlay + Image Topic: /paf/hero/Center/segmented_image + Name: VisonNode Output + Overlay Alpha: 0.5 + Queue Size: 2 + Transport Hint: raw + Unreliable: false + Value: true + Visibility: + Grid: true + Imu: true + Path: true + PointCloud2: true + Value: true + Zoom Factor: 1 - Alpha: 1 Class: rviz_plugin_tutorials/Imu Color: 204; 51; 204 diff --git a/code/perception/launch/perception.launch b/code/perception/launch/perception.launch index bc8c8f9c..cf3d5ae6 100644 --- a/code/perception/launch/perception.launch +++ b/code/perception/launch/perception.launch @@ -8,9 +8,18 @@ - + + + diff --git a/code/perception/src/vision_node.py b/code/perception/src/vision_node.py new file mode 100755 index 00000000..9fef555d --- /dev/null +++ b/code/perception/src/vision_node.py @@ -0,0 +1,164 @@ +#!/usr/bin/env python3 + +from ros_compatibility.node import CompatibleNode +import ros_compatibility as roscomp +import torch +from torchvision.models.segmentation import DeepLabV3_ResNet101_Weights,\ + deeplabv3_resnet101 +from torchvision.models.detection.faster_rcnn import FasterRCNN_MobileNet_V3_Large_320_FPN_Weights,\ + FasterRCNN_ResNet50_FPN_V2_Weights,\ + fasterrcnn_resnet50_fpn_v2,\ + fasterrcnn_mobilenet_v3_large_320_fpn +import torchvision.transforms as t +import torchvision +import cv2 +from rospy.numpy_msg import numpy_msg +from sensor_msgs.msg import Image as ImageMsg +from sensor_msgs.msg import CameraInfo +from std_msgs.msg import Header +from cv_bridge import CvBridge +from PIL import Image +from torchvision.utils import draw_bounding_boxes, draw_segmentation_masks +import numpy as np +from time import perf_counter +""" +VisionNode: + +The vision node provides a base node for object-detection or image-segementation. +This node provides the following features: + +- Insert pretrained AI-Models +- Subscription to one camera +- Preprocessing of Input Image +- Publishing output image + + +Models to insert: + +- DeepLabV3_ResNet50: + self.model = torch.hub.load('pytorch/vision:v0.10.0', 'deeplabv3_resnet50', pretrained=True) +- DeepLabV3_ResNet101: + self.model = deeplabv3_resnet101(DeepLabV3_ResNet101_Weights) + +""" + + +class VisionNode(CompatibleNode): + def __init__(self, name, **kwargs): + # vision node + super().__init__(name, **kwargs) + self.model_dict = { + "fasterrcnn_resnet50_fpn_v2": (fasterrcnn_resnet50_fpn_v2(weights=FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT), FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT, "detection", "pyTorch"), + "fasterrcnn_mobilenet_v3_large_320_fpn": (fasterrcnn_mobilenet_v3_large_320_fpn(weights=FasterRCNN_MobileNet_V3_Large_320_FPN_Weights.DEFAULT), FasterRCNN_MobileNet_V3_Large_320_FPN_Weights.DEFAULT, "detection", "pyTorch"), + "deeplabv3_resnet101": (deeplabv3_resnet101(weights=DeepLabV3_ResNet101_Weights.DEFAULT), DeepLabV3_ResNet101_Weights.DEFAULT, "segmentation", "pyTorch") + } + + + #general setup + self.bridge = CvBridge() + self.role_name = self.get_param("role_name", "hero") + self.side = self.get_param("side", "Center") + #self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") Cuda Memory Issues + self.device = torch.device("cpu") + print("VisionNode working on: ", self.device) + + #publish / subscribe setup + self.setup_camera_subscriptions() + self.setup_camera_publishers() + self.image_msg_header = Header() + self.image_msg_header.frame_id = "segmented_image_frame" + + #model setup + model_info = self.model_dict[self.get_param("model")] + self.model = model_info[0] + self.weights = model_info[1] + self.type = model_info[2] + self.framework = model_info[3] + print(f"Vision Node Configuration: Model -> {self.get_param('model')}, Type -> {self.type}, Framework -> {self.framework}") + self.model.to(self.device) + + def setup_camera_subscriptions(self): + self.new_subscription( + msg_type=numpy_msg(ImageMsg), + callback=self.handle_camera_image, + topic=f"/carla/{self.role_name}/{self.side}/image", + qos_profile=1 + ) + + + def setup_camera_publishers(self): + self.publisher = self.new_publisher( + msg_type=numpy_msg(ImageMsg), + topic=f"/paf/{self.role_name}/{self.side}/segmented_image", + qos_profile=1 + ) + + + def handle_camera_image(self, image): + startTime = perf_counter() + self.model.eval() + cv_image = self.bridge.imgmsg_to_cv2(img_msg=image, + desired_encoding='passthrough') + cv_image = cv2.cvtColor(cv_image, cv2.COLOR_RGB2BGR) + + preprocess = t.Compose([ + t.ToTensor(), + t.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]) + ]) + + input_image = preprocess(cv_image).unsqueeze(dim=0) + input_image = input_image.to(self.device) + print("Before Model: ", perf_counter() - startTime) + prediction = self.model(input_image) + print("After Model: ", perf_counter() - startTime) + if(self.type == "detection"): + vision_result = self.apply_bounding_boxes(cv_image, prediction[0]) + if(self.type == "segmentation"): + vision_result = self.create_mask(cv_image, prediction['out']) + + img_msg = self.bridge.cv2_to_imgmsg(vision_result, encoding="passthrough") + img_msg.header = image.header + + self.publisher.publish(img_msg) + print("After Publish: ", perf_counter() - startTime) + + pass + + def create_mask(self, input_image, model_output): + output_predictions = torch.argmax(model_output, dim=0) + + for i in range(21): + output_predictions[i] = output_predictions[i] == i + + output_predictions = output_predictions.to(dtype=torch.bool) + + input_image = t.ToTensor()(input_image) + input_image = input_image.to(dtype=torch.uint8) + print(output_predictions.shape) + print(input_image.shape) + segmented_image = draw_segmentation_masks(input_image, output_predictions) + cv_segmented = cv2.cvtColor(segmented_image.detach().numpy(), cv2.COLOR_BGR2RGB) + return cv_segmented + + def apply_bounding_boxes(self, input_image, model_output): + transposed_image = np.transpose(input_image, (2, 0, 1)) + image_np_with_detections = torch.tensor(transposed_image, dtype=torch.uint8) + boxes = model_output['boxes'] + #scores = model_output['scores'] + labels = [self.weights.meta["categories"][i] for i in model_output['labels']] + + box = draw_bounding_boxes(image_np_with_detections, boxes, labels, colors='red', width=2) + np_box_img = np.transpose(box.detach().numpy(), (1, 2, 0)) + box_img = cv2.cvtColor(np_box_img, cv2.COLOR_BGR2RGB) + return box_img + + def run(self): + self.spin() + pass + + +if __name__ == "__main__": + roscomp.init("VisionNode") + node = VisionNode("VisionNode") + node.run() diff --git a/doc/06_perception/07_vision_node.md b/doc/06_perception/07_vision_node.md new file mode 100644 index 00000000..a21f9055 --- /dev/null +++ b/doc/06_perception/07_vision_node.md @@ -0,0 +1,81 @@ +# Vision Node + +The Visison Node serves as a replacement for the previous segmentation-node. +It provides an adaptive interface that is able to perform object-detection or image-segmentation +on several different models. The model can be specified as a parameter in the perception.launch file. + +## Usage + +The following code shows how the Vision-Node is specified in perception.launch + +` + + + + + + +` + + +Depending on preferences and targets a different model can be used by replacing the value of the model parameter +by one of the lines from the comment above. + +The Vision-Node will automatically switch between object-detection, imagesegmentation, load the correct weights and perform the correct preprocessing. + +For now the Vision-Node only supports pyTorch models. Within the next sprint it should be able to +accept other frameworks aswell. It should also be possible to run object-detection and image-segmentation at the same time. + + +## How it works + +### Initialization + +The Vision-Node contains a Dictionary with all it's models. Depending on the model parameter it will initialize the correct model and weights. + +` +self.model_dict = { + "fasterrcnn_resnet50_fpn_v2": (fasterrcnn_resnet50_fpn_v2(weights=FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT), FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT, "detection", "pyTorch"), + "fasterrcnn_mobilenet_v3_large_320_fpn": (fasterrcnn_mobilenet_v3_large_320_fpn(weights=FasterRCNN_MobileNet_V3_Large_320_FPN_Weights.DEFAULT), FasterRCNN_MobileNet_V3_Large_320_FPN_Weights.DEFAULT, "detection", "pyTorch"), + "deeplabv3_resnet101": (deeplabv3_resnet101(weights=DeepLabV3_ResNet101_Weights.DEFAULT), DeepLabV3_ResNet101_Weights.DEFAULT, "segmentation", "pyTorch") + } +` + +### Core + +The core of the Vision-Node is the handle_camera_image function. +This function is automatically triggered by the Camera-Subscriber of the Vision-Node and performs the following steps: + +1. Convert ImageMsg to CV2-Image +2. Perform preprocessing on CV2-Image +3. Forward image through model +4. Call further processing function for output depending on type + 1. Detection -> apply_bounding_boxes + 2. Segmentation -> create_mask +5. Convert CV2-Image to ImageMsg +6. Publish ImageMsg over ImagePublisher + + +## Visualization + +The Vision-Node implements an ImagePublisher under the topic: "/paf//Center/segmented_image" + +The Configuartion File of RViz has been changed accordingly to display the published images alongside with the Camera. + +## Known Issues + +### Time + +First experiments showed that the handle_camera_image function is way to slow to be used reliably. It takes around 1.5 seconds to handle one image. + +Right now the Vision-Node is not using cuda due to cuda-memory-issues that couldn't be fixed right away. + +The performance is expected to rise quite a bit when using cuda. + +Also their is lots more room for testing different models inside the Vision-Node to evualte their accuracy and time-performance. \ No newline at end of file From eacb5d0c8c967149be97f7f6e705d0c36ddd4354 Mon Sep 17 00:00:00 2001 From: Leon Okrusch Date: Fri, 24 Nov 2023 12:07:27 +0100 Subject: [PATCH 2/3] feat(#86): new-segmentation-node --- code/perception/src/vision_node.py | 103 +++++++++++++++++------------ 1 file changed, 59 insertions(+), 44 deletions(-) diff --git a/code/perception/src/vision_node.py b/code/perception/src/vision_node.py index 9fef555d..726ea4de 100755 --- a/code/perception/src/vision_node.py +++ b/code/perception/src/vision_node.py @@ -3,43 +3,33 @@ from ros_compatibility.node import CompatibleNode import ros_compatibility as roscomp import torch -from torchvision.models.segmentation import DeepLabV3_ResNet101_Weights,\ +from torchvision.models.segmentation import DeepLabV3_ResNet101_Weights, \ deeplabv3_resnet101 -from torchvision.models.detection.faster_rcnn import FasterRCNN_MobileNet_V3_Large_320_FPN_Weights,\ - FasterRCNN_ResNet50_FPN_V2_Weights,\ - fasterrcnn_resnet50_fpn_v2,\ - fasterrcnn_mobilenet_v3_large_320_fpn +from torchvision.models.detection.faster_rcnn import \ + FasterRCNN_MobileNet_V3_Large_320_FPN_Weights, \ + FasterRCNN_ResNet50_FPN_V2_Weights, \ + fasterrcnn_resnet50_fpn_v2, \ + fasterrcnn_mobilenet_v3_large_320_fpn import torchvision.transforms as t -import torchvision import cv2 from rospy.numpy_msg import numpy_msg from sensor_msgs.msg import Image as ImageMsg -from sensor_msgs.msg import CameraInfo from std_msgs.msg import Header from cv_bridge import CvBridge -from PIL import Image from torchvision.utils import draw_bounding_boxes, draw_segmentation_masks import numpy as np from time import perf_counter """ VisionNode: -The vision node provides a base node for object-detection or image-segementation. +The vision node provides a base node for object-detection +or image-segementation. This node provides the following features: - Insert pretrained AI-Models - Subscription to one camera - Preprocessing of Input Image - Publishing output image - - -Models to insert: - -- DeepLabV3_ResNet50: - self.model = torch.hub.load('pytorch/vision:v0.10.0', 'deeplabv3_resnet50', pretrained=True) -- DeepLabV3_ResNet101: - self.model = deeplabv3_resnet101(DeepLabV3_ResNet101_Weights) - """ @@ -48,33 +38,50 @@ def __init__(self, name, **kwargs): # vision node super().__init__(name, **kwargs) self.model_dict = { - "fasterrcnn_resnet50_fpn_v2": (fasterrcnn_resnet50_fpn_v2(weights=FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT), FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT, "detection", "pyTorch"), - "fasterrcnn_mobilenet_v3_large_320_fpn": (fasterrcnn_mobilenet_v3_large_320_fpn(weights=FasterRCNN_MobileNet_V3_Large_320_FPN_Weights.DEFAULT), FasterRCNN_MobileNet_V3_Large_320_FPN_Weights.DEFAULT, "detection", "pyTorch"), - "deeplabv3_resnet101": (deeplabv3_resnet101(weights=DeepLabV3_ResNet101_Weights.DEFAULT), DeepLabV3_ResNet101_Weights.DEFAULT, "segmentation", "pyTorch") + "fasterrcnn_resnet50_fpn_v2": + (fasterrcnn_resnet50_fpn_v2( + weights=FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT), + FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT, + "detection", + "pyTorch"), + "fasterrcnn_mobilenet_v3_large_320_fpn": + (fasterrcnn_mobilenet_v3_large_320_fpn( + weights=FasterRCNN_MobileNet_V3_Large_320_FPN_Weights.DEFAULT), + FasterRCNN_MobileNet_V3_Large_320_FPN_Weights.DEFAULT, + "detection", + "pyTorch"), + "deeplabv3_resnet101": + (deeplabv3_resnet101( + weights=DeepLabV3_ResNet101_Weights.DEFAULT), + DeepLabV3_ResNet101_Weights.DEFAULT, + "segmentation", + "pyTorch") } - - #general setup + # general setup self.bridge = CvBridge() self.role_name = self.get_param("role_name", "hero") self.side = self.get_param("side", "Center") - #self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") Cuda Memory Issues + # self.device = torch.device("cuda" + # if torch.cuda.is_available() else "cpu") Cuda Memory Issues self.device = torch.device("cpu") print("VisionNode working on: ", self.device) - #publish / subscribe setup + # publish / subscribe setup self.setup_camera_subscriptions() self.setup_camera_publishers() self.image_msg_header = Header() self.image_msg_header.frame_id = "segmented_image_frame" - #model setup + # model setup model_info = self.model_dict[self.get_param("model")] self.model = model_info[0] self.weights = model_info[1] self.type = model_info[2] self.framework = model_info[3] - print(f"Vision Node Configuration: Model -> {self.get_param('model')}, Type -> {self.type}, Framework -> {self.framework}") + print("Vision Node Configuration:") + print(f"Model -> {self.get_param('model')},") + print(f"Type -> {self.type}, Framework -> {self.framework}") self.model.to(self.device) def setup_camera_subscriptions(self): @@ -85,14 +92,12 @@ def setup_camera_subscriptions(self): qos_profile=1 ) - def setup_camera_publishers(self): self.publisher = self.new_publisher( msg_type=numpy_msg(ImageMsg), topic=f"/paf/{self.role_name}/{self.side}/segmented_image", qos_profile=1 ) - def handle_camera_image(self, image): startTime = perf_counter() @@ -106,23 +111,24 @@ def handle_camera_image(self, image): t.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) - + input_image = preprocess(cv_image).unsqueeze(dim=0) input_image = input_image.to(self.device) print("Before Model: ", perf_counter() - startTime) prediction = self.model(input_image) print("After Model: ", perf_counter() - startTime) - if(self.type == "detection"): + if (self.type == "detection"): vision_result = self.apply_bounding_boxes(cv_image, prediction[0]) - if(self.type == "segmentation"): + if (self.type == "segmentation"): vision_result = self.create_mask(cv_image, prediction['out']) - img_msg = self.bridge.cv2_to_imgmsg(vision_result, encoding="passthrough") + img_msg = self.bridge.cv2_to_imgmsg(vision_result, + encoding="passthrough") img_msg.header = image.header - + self.publisher.publish(img_msg) print("After Publish: ", perf_counter() - startTime) - + pass def create_mask(self, input_image, model_output): @@ -132,24 +138,33 @@ def create_mask(self, input_image, model_output): output_predictions[i] = output_predictions[i] == i output_predictions = output_predictions.to(dtype=torch.bool) - + input_image = t.ToTensor()(input_image) input_image = input_image.to(dtype=torch.uint8) print(output_predictions.shape) print(input_image.shape) - segmented_image = draw_segmentation_masks(input_image, output_predictions) - cv_segmented = cv2.cvtColor(segmented_image.detach().numpy(), cv2.COLOR_BGR2RGB) + segmented_image = draw_segmentation_masks(input_image, + output_predictions) + cv_segmented = cv2.cvtColor(segmented_image.detach().numpy(), + cv2.COLOR_BGR2RGB) return cv_segmented def apply_bounding_boxes(self, input_image, model_output): transposed_image = np.transpose(input_image, (2, 0, 1)) - image_np_with_detections = torch.tensor(transposed_image, dtype=torch.uint8) + image_np_with_detections = torch.tensor(transposed_image, + dtype=torch.uint8) boxes = model_output['boxes'] - #scores = model_output['scores'] - labels = [self.weights.meta["categories"][i] for i in model_output['labels']] - - box = draw_bounding_boxes(image_np_with_detections, boxes, labels, colors='red', width=2) - np_box_img = np.transpose(box.detach().numpy(), (1, 2, 0)) + # scores = model_output['scores'] + labels = [self.weights.meta["categories"][i] + for i in model_output['labels']] + + box = draw_bounding_boxes(image_np_with_detections, + boxes, + labels, + colors='red', + width=2) + np_box_img = np.transpose(box.detach().numpy(), + (1, 2, 0)) box_img = cv2.cvtColor(np_box_img, cv2.COLOR_BGR2RGB) return box_img From f183cc7c1a67cc8f2444fa446888f8f6e0ee4596 Mon Sep 17 00:00:00 2001 From: okrusch <102369315+okrusch@users.noreply.github.com> Date: Mon, 27 Nov 2023 13:51:23 +0100 Subject: [PATCH 3/3] Update 07_vision_node.md --- doc/06_perception/07_vision_node.md | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/doc/06_perception/07_vision_node.md b/doc/06_perception/07_vision_node.md index a21f9055..1b72ead4 100644 --- a/doc/06_perception/07_vision_node.md +++ b/doc/06_perception/07_vision_node.md @@ -12,18 +12,17 @@ The following code shows how the Vision-Node is specified in perception.launch - ` - Depending on preferences and targets a different model can be used by replacing the value of the model parameter by one of the lines from the comment above. @@ -32,7 +31,6 @@ The Vision-Node will automatically switch between object-detection, imagesegment For now the Vision-Node only supports pyTorch models. Within the next sprint it should be able to accept other frameworks aswell. It should also be possible to run object-detection and image-segmentation at the same time. - ## How it works ### Initialization @@ -61,7 +59,6 @@ This function is automatically triggered by the Camera-Subscriber of the Vision- 5. Convert CV2-Image to ImageMsg 6. Publish ImageMsg over ImagePublisher - ## Visualization The Vision-Node implements an ImagePublisher under the topic: "/paf//Center/segmented_image" @@ -72,10 +69,10 @@ The Configuartion File of RViz has been changed accordingly to display the publi ### Time -First experiments showed that the handle_camera_image function is way to slow to be used reliably. It takes around 1.5 seconds to handle one image. +First experiments showed that the handle_camera_image function is way to slow to be used reliably. It takes around 1.5 seconds to handle one image. Right now the Vision-Node is not using cuda due to cuda-memory-issues that couldn't be fixed right away. The performance is expected to rise quite a bit when using cuda. -Also their is lots more room for testing different models inside the Vision-Node to evualte their accuracy and time-performance. \ No newline at end of file +Also their is lots more room for testing different models inside the Vision-Node to evualte their accuracy and time-performance.