diff --git a/code/agent/config/dev_objects.json b/code/agent/config/dev_objects.json index e6fbdeba..c1d5e69b 100644 --- a/code/agent/config/dev_objects.json +++ b/code/agent/config/dev_objects.json @@ -45,7 +45,7 @@ }, { "type": "sensor.camera.rgb", - "id": "Left", + "id": "Right", "spawn_point": { "x": 0.0, "y": 0.0, @@ -64,7 +64,7 @@ }, { "type": "sensor.camera.rgb", - "id": "Right", + "id": "Left", "spawn_point": { "x": 0.0, "y": 0.0, diff --git a/code/agent/launch/agent.launch b/code/agent/launch/agent.launch index 3c99c354..089c378a 100644 --- a/code/agent/launch/agent.launch +++ b/code/agent/launch/agent.launch @@ -21,6 +21,5 @@ - diff --git a/code/agent/src/agent/agent.py b/code/agent/src/agent/agent.py index 2076bef3..22b053c7 100755 --- a/code/agent/src/agent/agent.py +++ b/code/agent/src/agent/agent.py @@ -1,5 +1,6 @@ from leaderboard.autoagents.ros1_agent import ROS1Agent from leaderboard.autoagents.autonomous_agent import Track +import math def get_entry_point(): @@ -22,23 +23,67 @@ def get_ros_entrypoint(self): def sensors(self): sensors = [ - {'type': 'sensor.camera.rgb', 'id': 'Center', - 'x': 0.0, 'y': 0.0, 'z': 1.70, 'roll': 0.0, 'pitch': 0.0, - 'yaw': 0.0, 'width': 1280, 'height': 720, 'fov': 100}, - {'type': 'sensor.lidar.ray_cast', 'id': 'LIDAR', - 'x': 0.0, 'y': 0.0, 'z': 1.70, 'roll': 0.0, 'pitch': 0.0, - 'yaw': 0.0}, - {'type': 'sensor.other.radar', 'id': 'RADAR', - 'x': 0.7, 'y': -0.4, 'z': 1.60, 'roll': 0.0, 'pitch': 0.0, - 'yaw': -45.0, 'horizontal_fov': 30, 'vertical_fov': 30}, - {'type': 'sensor.other.gnss', 'id': 'GPS', - 'x': 0.7, 'y': -0.4, 'z': 1.60}, - {'type': 'sensor.other.imu', 'id': 'IMU', - 'x': 0.7, 'y': -0.4, 'z': 1.60, 'roll': 0.0, 'pitch': 0.0, - 'yaw': 0.0}, - {'type': 'sensor.opendrive_map', 'id': 'OpenDRIVE', - 'reading_frequency': 1}, - {'type': 'sensor.speedometer', 'id': 'Speed'}, + { + 'type': 'sensor.camera.rgb', + 'id': 'Center', + 'x': 0.0, 'y': 0.0, 'z': 1.70, + 'roll': 0.0, 'pitch': 0.0, 'yaw': 0.0, + 'width': 1280, 'height': 720, 'fov': 100 + }, + { + 'type': 'sensor.camera.rgb', + 'id': 'Back', + 'x': 0.0, 'y': 0.0, 'z': 1.70, + 'roll': 0.0, 'pitch': 0.0, 'yaw': math.radians(180.0), + 'width': 1280, 'height': 720, 'fov': 100 + }, + { + 'type': 'sensor.camera.rgb', + 'id': 'Left', + 'x': 0.0, 'y': 0.0, 'z': 1.70, + 'roll': 0.0, 'pitch': 0.0, 'yaw': math.radians(-90.0), + 'width': 1280, 'height': 720, 'fov': 100 + }, + { + 'type': 'sensor.camera.rgb', + 'id': 'Right', + 'x': 0.0, 'y': 0.0, 'z': 1.70, + 'roll': 0.0, 'pitch': 0.0, 'yaw': math.radians(90.0), + 'width': 1280, 'height': 720, 'fov': 100 + }, + { + 'type': 'sensor.lidar.ray_cast', + 'id': 'LIDAR', + 'x': 0.0, 'y': 0.0, 'z': 1.70, + 'roll': 0.0, 'pitch': 0.0, 'yaw': 0.0 + }, + { + 'type': 'sensor.other.radar', + 'id': 'RADAR', + 'x': 0.7, 'y': -0.4, 'z': 1.60, + 'roll': 0.0, 'pitch': 0.0, 'yaw': -45.0, + 'horizontal_fov': 30, 'vertical_fov': 30 + }, + { + 'type': 'sensor.other.gnss', + 'id': 'GPS', + 'x': 0.7, 'y': -0.4, 'z': 1.60 + }, + { + 'type': 'sensor.other.imu', + 'id': 'IMU', + 'x': 0.7, 'y': -0.4, 'z': 1.60, + 'roll': 0.0, 'pitch': 0.0, 'yaw': 0.0 + }, + { + 'type': 'sensor.opendrive_map', + 'id': 'OpenDRIVE', + 'reading_frequency': 1 + }, + { + 'type': 'sensor.speedometer', + 'id': 'Speed' + } ] return sensors diff --git a/code/perception/launch/perception.launch b/code/perception/launch/perception.launch index 163dc92f..b5e80823 100644 --- a/code/perception/launch/perception.launch +++ b/code/perception/launch/perception.launch @@ -38,10 +38,13 @@ + + + + - + - yolov8x-seg + --> @@ -76,16 +79,11 @@ - - - - - - + diff --git a/code/perception/src/lidar_distance.py b/code/perception/src/lidar_distance.py index 6a1caa4e..ff2d40df 100755 --- a/code/perception/src/lidar_distance.py +++ b/code/perception/src/lidar_distance.py @@ -4,13 +4,8 @@ import numpy as np import lidar_filter_utility from sensor_msgs.msg import PointCloud2 -from std_msgs.msg import Float32 -from sklearn.cluster import DBSCAN -from sklearn.preprocessing import StandardScaler -import matplotlib.pyplot as plt # from mpl_toolkits.mplot3d import Axes3D # from itertools import combinations -import cv2 from sensor_msgs.msg import Image as ImageMsg from cv_bridge import CvBridge # from matplotlib.colors import LinearSegmentedColormap @@ -34,68 +29,94 @@ def callback(self, data): """ coordinates = ros_numpy.point_cloud2.pointcloud2_to_array(data) - # https://stackoverflow.com/questions/44295375/how-to-slice-a-numpy- - # ndarray-made-up-of-numpy-void-numbers - min_dist_bit_mask = lidar_filter_utility.bounding_box( + # Center + reconstruct_bit_mask_center = lidar_filter_utility.bounding_box( coordinates, - max_x=50., - min_x=2., - min_z=rospy.get_param('~min_z', -np.inf), - min_y=-2.5, - max_y=2.5, + max_x=np.inf, + min_x=0., + min_z=-1.6, ) - - reconstruct_bit_mask = lidar_filter_utility.bounding_box( + reconstruct_coordinates_center = \ + coordinates[reconstruct_bit_mask_center] + reconstruct_coordinates_xyz_center = np.array( + lidar_filter_utility.remove_field_name( + reconstruct_coordinates_center, + 'intensity') + .tolist() + ) + dist_array_center = self.reconstruct_img_from_lidar( + reconstruct_coordinates_xyz_center, focus="Center") + dist_array_center_msg = \ + self.bridge.cv2_to_imgmsg(dist_array_center, + encoding="passthrough") + dist_array_center_msg.header = data.header + self.dist_array_center_publisher.publish(dist_array_center_msg) + + # Back + reconstruct_bit_mask_back = lidar_filter_utility.bounding_box( coordinates, - max_x=rospy.get_param('~max_x', np.inf), - min_x=rospy.get_param('~min_x', -np.inf), - min_z=rospy.get_param('~min_z', -np.inf), + max_x=0.0, + min_x=-np.inf, + min_z=-1.6, ) - - # Filter coordinates based in generated bit_mask - min_dist_coordinates = coordinates[min_dist_bit_mask] - reconstruct_coordinates = coordinates[reconstruct_bit_mask] - - # Create pointcloud from manipulated data - coordinates_manipulated = ros_numpy \ - .point_cloud2.array_to_pointcloud2(min_dist_coordinates) - coordinates_manipulated.header = data.header - - # Publish manipulated pointCloud2 - self.pub_pointcloud.publish(coordinates_manipulated) - - # https://stackoverflow.com/questions/1401712/how-can-the-euclidean- - # distance-be-calculated-with-numpy - min_dist_coordinates_xyz = np.array( - lidar_filter_utility.remove_field_name(min_dist_coordinates, - 'intensity') + reconstruct_coordinates_back = coordinates[reconstruct_bit_mask_back] + reconstruct_coordinates_xyz_back = np.array( + lidar_filter_utility.remove_field_name( + reconstruct_coordinates_back, + 'intensity') .tolist() ) - - reconstruct_coordinates_xyz = np.array( - lidar_filter_utility.remove_field_name(reconstruct_coordinates, - 'intensity') + dist_array_back = self.reconstruct_img_from_lidar( + reconstruct_coordinates_xyz_back, focus="Back") + dist_array_back_msg = \ + self.bridge.cv2_to_imgmsg(dist_array_back, + encoding="passthrough") + dist_array_back_msg.header = data.header + self.dist_array_back_publisher.publish(dist_array_back_msg) + + # Left + reconstruct_bit_mask_left = lidar_filter_utility.bounding_box( + coordinates, + max_y=np.inf, + min_y=0.0, + min_z=-1.6, + ) + reconstruct_coordinates_left = coordinates[reconstruct_bit_mask_left] + reconstruct_coordinates_xyz_left = np.array( + lidar_filter_utility.remove_field_name( + reconstruct_coordinates_left, + 'intensity') .tolist() ) - - # handle minimum distance - if min_dist_coordinates_xyz.shape[0] > 0: - plot = self.plot_blob(min_dist_coordinates_xyz) - img_msg = self.bridge.cv2_to_imgmsg(plot, - encoding="passthrough") - img_msg.header = data.header - self.min_dist_img_publisher.publish(img_msg) - else: - self.pub_min_dist.publish(np.inf) - - # handle reconstruction of lidar points - rainbow_cloud = self.reconstruct_img_from_lidar( - reconstruct_coordinates_xyz) - - img_msg = self.bridge.cv2_to_imgmsg(rainbow_cloud, - encoding="passthrough") - img_msg.header = data.header - self.rainbow_publisher.publish(img_msg) + dist_array_left = self.reconstruct_img_from_lidar( + reconstruct_coordinates_xyz_left, focus="Left") + dist_array_left_msg = \ + self.bridge.cv2_to_imgmsg(dist_array_left, + encoding="passthrough") + dist_array_left_msg.header = data.header + self.dist_array_left_publisher.publish(dist_array_left_msg) + + # Right + reconstruct_bit_mask_right = lidar_filter_utility.bounding_box( + coordinates, + max_y=-0.0, + min_y=-np.inf, + min_z=-1.6 + ) + reconstruct_coordinates_right = coordinates[reconstruct_bit_mask_right] + reconstruct_coordinates_xyz_right = np.array( + lidar_filter_utility.remove_field_name( + reconstruct_coordinates_right, + 'intensity') + .tolist() + ) + dist_array_right = self.reconstruct_img_from_lidar( + reconstruct_coordinates_xyz_right, focus="Right") + dist_array_right_msg = \ + self.bridge.cv2_to_imgmsg(dist_array_right, + encoding="passthrough") + dist_array_right_msg.header = data.header + self.dist_array_right_publisher.publish(dist_array_right_msg) def listener(self): """ Initializes the node and it's publishers @@ -113,31 +134,41 @@ def listener(self): queue_size=10 ) - # publisher for the closest blob in the lidar point cloud - self.pub_min_dist = rospy.Publisher( + # publisher for dist_array + self.dist_array_center_publisher = rospy.Publisher( + rospy.get_param( + '~image_distance_topic', + '/paf/hero/Center/dist_array' + ), + ImageMsg, + queue_size=10 + ) + + # publisher for dist_array + self.dist_array_back_publisher = rospy.Publisher( rospy.get_param( - '~range_topic', - '/paf/hero/Center/min_distance' + '~image_distance_topic', + '/paf/hero/Back/dist_array' ), - Float32, + ImageMsg, queue_size=10 ) - # publisher for reconstructed lidar image - self.rainbow_publisher = rospy.Publisher( + # publisher for dist_array + self.dist_array_left_publisher = rospy.Publisher( rospy.get_param( '~image_distance_topic', - '/paf/hero/Center/rainbow_image' + '/paf/hero/Left/dist_array' ), ImageMsg, queue_size=10 ) - # publisher for 3d blob graph - self.min_dist_img_publisher = rospy.Publisher( + # publisher for dist_array + self.dist_array_right_publisher = rospy.Publisher( rospy.get_param( '~image_distance_topic', - '/paf/hero/Center/min_dist_image' + '/paf/hero/Right/dist_array' ), ImageMsg, queue_size=10 @@ -148,59 +179,7 @@ def listener(self): rospy.spin() - def plot_blob(self, xyz_coords): - # creates a 3d graph thazt highlights blobs of points - xyz_coords_standardized = StandardScaler().fit_transform(xyz_coords) - """pairwise_distances_x = np.abs(np.subtract.outer(xyz_coords[:, 0], - xyz_coords[:, 0]))""" - - # publish minimum distance - eps = 0.2 - min_samples = 5 - dbscan = DBSCAN(eps=eps, min_samples=min_samples) - labels = dbscan.fit_predict(xyz_coords_standardized) - - min_distance_cluster = [] - - # Iterate through each cluster - for label in set(labels): - if label != -1: # Ignore noise points - cluster_points = xyz_coords[labels == label] - min_distance_cluster.append(np.min(cluster_points[:, 0])) - - # Publish the minimum distance within clusters - if len(min_distance_cluster) > 0: - self.pub_min_dist.publish(min(min_distance_cluster)) - else: - self.pub_min_dist.publish(np.inf) - - fig = plt.figure() - ax = fig.add_subplot(111, projection='3d') - - for label in set(labels): - # print(label) - if label == -1: - ax.scatter(xyz_coords[labels == label][:, 0], - xyz_coords[labels == label][:, 1], - xyz_coords[labels == label][:, 2], - c='gray', marker='o', label='Noise') - else: - ax.scatter(xyz_coords[labels == label][:, 0], - xyz_coords[labels == label][:, 1], - xyz_coords[labels == label][:, 2], - label=f'Cluster {label + 1}', s=50) - - ax.set_xlabel('X') - ax.set_ylabel('Y') - ax.set_zlabel('Z') - - fig.canvas.draw() - img = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='') - img = img.reshape(fig.canvas.get_width_height()[::-1] + (3,)) - img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) - return img - - def reconstruct_img_from_lidar(self, coordinates_xyz): + def reconstruct_img_from_lidar(self, coordinates_xyz, focus): # reconstruct 3d LIDAR-Data and calculate 2D Pixel # according to Camera-World @@ -220,26 +199,42 @@ def reconstruct_img_from_lidar(self, coordinates_xyz): # reconstruct camera image with LIDAR-Data img = np.zeros(shape=(720, 1280), dtype=np.float32) + dist_array = np.zeros(shape=(720, 1280, 3), dtype=np.float32) for c in coordinates_xyz: - point = np.array([c[1], c[2], c[0], 1]) - pixel = np.matmul(m, point) - x, y = int(pixel[0]/pixel[2]), int(pixel[1]/pixel[2]) - if x >= 0 and x <= 1280 and y >= 0 and y <= 720: - img[719-y][1279-x] = c[0] - - # Rainbox color mapping to highlight distances - """colors = [(0, 0, 0)] + [(1, 0, 0), (1, 1, 0), - (0, 1, 0), (0, 1, 1), - (0, 0, 1)] - cmap_name = 'rainbow' - rainbow_cmap = LinearSegmentedColormap.from_list(cmap_name, - colors, - N=256) - - img_colored = (rainbow_cmap(img / np.max(img)) * 255).astype(np.uint8) - img_bgr = cv2.cvtColor(img_colored, cv2.COLOR_RGBA2BGR)""" - - return img + if focus == "Center": + point = np.array([c[1], c[2], c[0], 1]) + pixel = np.matmul(m, point) + x, y = int(pixel[0]/pixel[2]), int(pixel[1]/pixel[2]) + if x >= 0 and x <= 1280 and y >= 0 and y <= 720: + img[719-y][1279-x] = c[0] + dist_array[719-y][1279-x] = \ + np.array([c[0], c[1], c[2]], dtype=np.float32) + if focus == "Back": + point = np.array([c[1], c[2], c[0], 1]) + pixel = np.matmul(m, point) + x, y = int(pixel[0]/pixel[2]), int(pixel[1]/pixel[2]) + if x >= 0 and x <= 1280 and y >= 0 and y < 720: + img[y][1279-x] = -c[0] + dist_array[y][1279-x] = \ + np.array([-c[0], c[1], c[2]], dtype=np.float32) + if focus == "Left": + point = np.array([c[0], c[2], c[1], 1]) + pixel = np.matmul(m, point) + x, y = int(pixel[0]/pixel[2]), int(pixel[1]/pixel[2]) + if x >= 0 and x <= 1280 and y >= 0 and y <= 720: + img[719-y][1279-x] = c[1] + dist_array[y][1279-x] = \ + np.array([c[0], c[1], c[2]], dtype=np.float32) + if focus == "Right": + point = np.array([c[0], c[2], c[1], 1]) + pixel = np.matmul(m, point) + x, y = int(pixel[0]/pixel[2]), int(pixel[1]/pixel[2]) + if x >= 0 and x < 1280 and y >= 0 and y < 720: + img[y][1279-x] = -c[1] + dist_array[y][1279-x] = \ + np.array([c[0], c[1], c[2]], dtype=np.float32) + + return dist_array if __name__ == '__main__': diff --git a/code/perception/src/vision_node.py b/code/perception/src/vision_node.py index 24d42dae..7345d0c0 100755 --- a/code/perception/src/vision_node.py +++ b/code/perception/src/vision_node.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 -from time import perf_counter from ros_compatibility.node import CompatibleNode import ros_compatibility as roscomp import torch @@ -20,6 +19,7 @@ from torchvision.utils import draw_bounding_boxes, draw_segmentation_masks import numpy as np from ultralytics import NAS, YOLO, RTDETR, SAM, FastSAM +import rospy """ VisionNode: @@ -41,17 +41,20 @@ def __init__(self, name, **kwargs): super().__init__(name, **kwargs) self.model_dict = { "fasterrcnn_resnet50_fpn_v2": - (fasterrcnn_resnet50_fpn_v2, + (fasterrcnn_resnet50_fpn_v2( + weights=FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT), FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT, "detection", "pyTorch"), "fasterrcnn_mobilenet_v3_large_320_fpn": - (fasterrcnn_mobilenet_v3_large_320_fpn, + (fasterrcnn_mobilenet_v3_large_320_fpn( + weights=FasterRCNN_MobileNet_V3_Large_320_FPN_Weights.DEFAULT), FasterRCNN_MobileNet_V3_Large_320_FPN_Weights.DEFAULT, "detection", "pyTorch"), "deeplabv3_resnet101": - (deeplabv3_resnet101, + (deeplabv3_resnet101( + weights=DeepLabV3_ResNet101_Weights.DEFAULT), DeepLabV3_ResNet101_Weights.DEFAULT, "segmentation", "pyTorch"), @@ -78,14 +81,28 @@ def __init__(self, name, **kwargs): self.bridge = CvBridge() self.role_name = self.get_param("role_name", "hero") self.side = self.get_param("side", "Center") + self.center = self.get_param("center") + self.back = self.get_param("back") + self.left = self.get_param("left") + self.right = self.get_param("right") + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.depth_images = [] - self.DEBUG = False + self.dist_arrays = None # publish / subscribe setup - self.setup_camera_subscriptions() - self.setup_rainbow_subscription() + if self.center: + self.setup_camera_subscriptions("Center") + if self.back: + self.setup_camera_subscriptions("Back") + if self.left: + self.setup_camera_subscriptions("Left") + if self.right: + self.setup_camera_subscriptions("Right") + + # self.setup_rainbow_subscription() + self.setup_dist_array_subscription() self.setup_camera_publishers() self.setup_object_distance_publishers() self.setup_traffic_light_publishers() @@ -94,10 +111,7 @@ def __init__(self, name, **kwargs): # model setup model_info = self.model_dict[self.get_param("model")] - if model_info[3] == "pyTorch": - self.model = model_info[0](weights=model_info[1]) - else: - self.model = model_info[0] + self.model = model_info[0] self.weights = model_info[1] self.type = model_info[2] self.framework = model_info[3] @@ -113,17 +127,21 @@ def __init__(self, name, **kwargs): for param in self.model.parameters(): param.requires_grad = False self.model.to(self.device) + # ultralytics setup if self.framework == "ultralytics": self.model = self.model(self.weights) - def setup_camera_subscriptions(self): + # tensorflow setup + + def setup_camera_subscriptions(self, side): self.new_subscription( msg_type=numpy_msg(ImageMsg), callback=self.handle_camera_image, - topic=f"/carla/{self.role_name}/{self.side}/image", + topic=f"/carla/{self.role_name}/{side}/image", qos_profile=1 ) + # print(f"Subscribed to Side: {side}") def setup_rainbow_subscription(self): self.new_subscription( @@ -133,13 +151,44 @@ def setup_rainbow_subscription(self): qos_profile=1 ) - def setup_camera_publishers(self): - self.publisher = self.new_publisher( + def setup_dist_array_subscription(self): + self.new_subscription( msg_type=numpy_msg(ImageMsg), - topic=f"/paf/{self.role_name}/{self.side}/segmented_image", + callback=self.handle_dist_array, + topic='/paf/hero/Center/dist_array', qos_profile=1 ) + def setup_camera_publishers(self): + if self.center: + self.publisher_center = self.new_publisher( + msg_type=numpy_msg(ImageMsg), + topic=f"/paf/{self.role_name}/Center/segmented_image", + qos_profile=1 + ) + # print("Publisher to Center!") + if self.back: + self.publisher_back = self.new_publisher( + msg_type=numpy_msg(ImageMsg), + topic=f"/paf/{self.role_name}/Back/segmented_image", + qos_profile=1 + ) + # print("Publisher to Back!") + if self.left: + self.publisher_left = self.new_publisher( + msg_type=numpy_msg(ImageMsg), + topic=f"/paf/{self.role_name}/Left/segmented_image", + qos_profile=1 + ) + # print("Publisher to Left!") + if self.right: + self.publisher_right = self.new_publisher( + msg_type=numpy_msg(ImageMsg), + topic=f"/paf/{self.role_name}/Right/segmented_image", + qos_profile=1 + ) + # print("Publisher to Right!") + def setup_object_distance_publishers(self): self.distance_publisher = self.new_publisher( msg_type=numpy_msg(Float32MultiArray), @@ -168,7 +217,17 @@ def handle_camera_image(self, image): img_msg = self.bridge.cv2_to_imgmsg(vision_result, encoding="rgb8") img_msg.header = image.header - self.publisher.publish(img_msg) + side = rospy.resolve_name(img_msg.header.frame_id).split('/')[2] + if side == "Center": + self.publisher_center.publish(img_msg) + if side == "Back": + self.publisher_back.publish(img_msg) + if side == "Left": + self.publisher_left.publish(img_msg) + if side == "Right": + self.publisher_right.publish(img_msg) + + # print(f"Published Image on Side: {side}") pass def handle_rainbow_image(self, image): @@ -189,6 +248,13 @@ def handle_rainbow_image(self, image): else: self.logerr("Depth-Fiel build up! No distances available yet!") + def handle_dist_array(self, dist_array): + dist_array = \ + self.bridge.imgmsg_to_cv2(img_msg=dist_array, + desired_encoding='passthrough') + # print("RECEIVED DIST") + self.dist_arrays = dist_array + def predict_torch(self, image): self.model.eval() cv_image = self.bridge.imgmsg_to_cv2(img_msg=image, @@ -213,78 +279,139 @@ def predict_torch(self, image): return vision_result def predict_ultralytics(self, image): - start = perf_counter() - cv_image = self.bridge.imgmsg_to_cv2(img_msg=image, desired_encoding='passthrough') cv_image = cv2.cvtColor(cv_image, cv2.COLOR_RGB2BGR) - output = self.model.predict(cv_image, half=True, verbose=False)[0] - - s1 = perf_counter() - - if 9 in output.boxes.cls: - self.process_traffic_lights(output, cv_image, image.header) - - s2 = perf_counter() + output = self.model(cv_image, half=True, verbose=False) - box_img = self.calc_draw_distance(output.boxes, cv_image) - - now = perf_counter() - if self.DEBUG: - self.loginfo("S1: {}, S2: {}, S3: {}, Total: {}".format( - round(s1 - start, 4), round(s2 - s1, 4), round(now - s2, 4), - round(now - start, 4))) + """distance_output = [] + c_boxes = [] + c_labels = [] + for r in output: + boxes = r.boxes + for box in boxes: + cls = box.cls.item() + pixels = box.xyxy[0] + print(pixels) + if len(self.depth_images) > 0: + distances = np.asarray( + [self.depth_images[i][int(pixels[1]):int(pixels[3]):1, + int(pixels[0]):int(pixels[2]):1] + for i in range(len(self.depth_images))]) + non_zero_filter = distances[distances != 0] + + if len(non_zero_filter) > 0: + obj_dist = np.min(non_zero_filter) + else: + obj_dist = np.inf + + c_boxes.append(torch.tensor(pixels)) + c_labels.append(f"Class: {cls}, Meters: {obj_dist}") + distance_output.append([cls, obj_dist]) + + # print(distance_output) + # self.logerr(distance_output) + self.distance_publisher.publish( + Float32MultiArray(data=distance_output)) - return box_img + transposed_image = np.transpose(cv_image, (2, 0, 1)) + image_np_with_detections = torch.tensor(transposed_image, + dtype=torch.uint8)""" - def calc_draw_distance(self, boxes, cv_image): + # handle distance of objects distance_output = [] c_boxes = [] c_labels = [] - - for box in boxes: - if len(self.depth_images) > 0: + for r in output: + boxes = r.boxes + for box in boxes: cls = box.cls.item() pixels = box.xyxy[0] - - distances = np.asarray( - [self.depth_images[i][int(pixels[1]):int(pixels[3]):1, - int(pixels[0]):int(pixels[2]):1] - for i in range(len(self.depth_images))]) - non_zero_filter = distances[distances != 0] - - if len(non_zero_filter) > 0: - obj_dist = np.min(non_zero_filter) - else: - obj_dist = np.inf - - c_boxes.append(torch.tensor(pixels)) - c_labels.append(f"Class: {cls}, Meters: {obj_dist}") - distance_output.append([cls, obj_dist]) + if self.dist_arrays is not None: + distances = np.asarray( + self.dist_arrays[int(pixels[1]):int(pixels[3]):1, + int(pixels[0]):int(pixels[2]):1, + ::]) + condition = distances[:, :, 0] != 0 + non_zero_filter = distances[condition] + distances_copy = distances.copy() + distances_copy[distances_copy == 0] = np.inf + + if len(non_zero_filter) > 0: + sorted_indices = np.argsort(distances_copy[:, :, 0], + axis=None) + x1, y1 = np.unravel_index(sorted_indices[0], + distances_copy.shape[:2]) + x2, y2 = np.unravel_index(sorted_indices[1], + distances_copy.shape[:2]) + obj_dist1 = distances_copy[x1][y1].copy() + obj_dist2 = distances_copy[x2][y2].copy() + + abs_distance = np.sqrt( + obj_dist1[0]**2 + + obj_dist1[1]**2 + + obj_dist1[2]**2) + + # create 2d glass plane at object + # with box dimension + scale_width = abs(obj_dist1[1] - obj_dist2[1])\ + / abs(y1-y2) + scale_height = abs(obj_dist1[2] - obj_dist2[2])\ + / abs(x1-x2) + width = distances_copy.shape[1] * scale_width + height = distances_copy.shape[0] * scale_height + + # upper left + ul_x = obj_dist1[0] + ul_y = obj_dist1[1] - (-y1 + scale_width) + ul_z = obj_dist1[2] - (-x1 + scale_height) + + # lower right + lr_x = obj_dist1[0] + lr_y = ul_y + width + lr_z = ul_z + height + + else: + obj_dist1 = (np.inf, np.inf, np.inf) + abs_distance = np.inf + + c_boxes.append(torch.tensor(pixels)) + c_labels.append(f"Class: {cls}," + f"Meters: {round(abs_distance, 2)}," + f"({round(float(obj_dist1[0]), 2)}," + f"{round(float(obj_dist1[1]), 2)}," + f"{round(float(obj_dist1[2]), 2)})") + distance_output.append([cls, + abs_distance, + ul_x, ul_y, ul_z, + lr_x, lr_y, lr_z]) self.distance_publisher.publish( - Float32MultiArray(data=distance_output)) + Float32MultiArray(data=distance_output)) transposed_image = np.transpose(cv_image, (2, 0, 1)) image_np_with_detections = torch.tensor(transposed_image, dtype=torch.uint8) + if 9 in output[0].boxes.cls: + self.process_traffic_lights(output[0], cv_image, image.header) + c_boxes = torch.stack(c_boxes) - print(image_np_with_detections.shape, c_boxes.shape, c_labels) + # print(image_np_with_detections.shape, c_boxes.shape, c_labels) box = draw_bounding_boxes(image_np_with_detections, c_boxes, c_labels, colors='blue', width=3, font_size=12) - - np_box_img = np.transpose(box.detach().numpy(), (1, 2, 0)) + np_box_img = np.transpose(box.detach().numpy(), + (1, 2, 0)) box_img = cv2.cvtColor(np_box_img, cv2.COLOR_BGR2RGB) - return box_img + # return output[0].plot() - async def process_traffic_lights(self, prediction, cv_image, image_header): + def process_traffic_lights(self, prediction, cv_image, image_header): indices = (prediction.boxes.cls == 9).nonzero().squeeze().cpu().numpy() indices = np.asarray([indices]) if indices.size == 1 else indices diff --git a/doc/06_perception/12_distance_to_objects.md b/doc/06_perception/12_distance_to_objects.md index 0d6f1825..82568f0f 100644 --- a/doc/06_perception/12_distance_to_objects.md +++ b/doc/06_perception/12_distance_to_objects.md @@ -80,14 +80,14 @@ The resulting Image takes the distance in meters as values for its pixels. It th In the next step we want to get the distance for every bounding box the object-detection found. -We want to return a list of tuple containing a class and a distance, like this: +We want to return a list of tuple containing a class, an absolut distance and X, Y, Z coordinates of the objcet, like this: [ - [class_id, distance_meters], - [12.0, 3.456], + [class_id, abs_distance, X, Y, Z], + [12.0, 7.970812491638262, 5.6549606, -5.5982423, -0.4636328], + [12.0, 8.684970384807999, 5.6547265, 6.57918, -0.40886718], ..., - [2.0, np.inf], - [2.0, 7.890], + [2.0, 1.3798048392074562, 1.065586, -0.60292965, -0.63628906] ] Since we cant be certain we will find a Lidar-Point in the depth image for every Pixel in the bounding box of the original image,