diff --git a/code/agent/config/dev_objects.json b/code/agent/config/dev_objects.json index 90b24361..e6fbdeba 100644 --- a/code/agent/config/dev_objects.json +++ b/code/agent/config/dev_objects.json @@ -104,23 +104,23 @@ "type": "sensor.lidar.ray_cast", "id": "LIDAR", "spawn_point": { - "x": 0.7, - "y": 0.4, - "z": 1.60, + "x": 0.0, + "y": 0.0, + "z": 1.70, "roll": 0.0, "pitch": 0.0, "yaw": 0.0 }, - "range": 85, - "rotation_frequency": 10, + "range": 50, + "rotation_frequency": 15, "channels": 64, "upper_fov": 10, "lower_fov": -30, - "points_per_second": 600000, + "points_per_second": 1000000, "atmosphere_attenuation_rate": 0.004, "dropoff_general_rate": 0.45, "dropoff_intensity_limit": 0.8, - "dropoff_zero_intensity": 0.4 + "dropoff_zero_intensity": 0.2 }, { "type": "sensor.other.radar", diff --git a/code/agent/src/agent/agent.py b/code/agent/src/agent/agent.py index ad8d076c..2076bef3 100755 --- a/code/agent/src/agent/agent.py +++ b/code/agent/src/agent/agent.py @@ -23,10 +23,10 @@ def get_ros_entrypoint(self): def sensors(self): sensors = [ {'type': 'sensor.camera.rgb', 'id': 'Center', - 'x': 0.7, 'y': 0.0, 'z': 1.60, 'roll': 0.0, 'pitch': 0.0, - 'yaw': 0.0, 'width': 300, 'height': 200, 'fov': 100}, + 'x': 0.0, 'y': 0.0, 'z': 1.70, 'roll': 0.0, 'pitch': 0.0, + 'yaw': 0.0, 'width': 1280, 'height': 720, 'fov': 100}, {'type': 'sensor.lidar.ray_cast', 'id': 'LIDAR', - 'x': 0.7, 'y': 0.0, 'z': 1.60, 'roll': 0.0, 'pitch': 0.0, + 'x': 0.0, 'y': 0.0, 'z': 1.70, 'roll': 0.0, 'pitch': 0.0, 'yaw': 0.0}, {'type': 'sensor.other.radar', 'id': 'RADAR', 'x': 0.7, 'y': -0.4, 'z': 1.60, 'roll': 0.0, 'pitch': 0.0, diff --git a/code/perception/launch/perception.launch b/code/perception/launch/perception.launch index 500635fb..3c463823 100644 --- a/code/perception/launch/perception.launch +++ b/code/perception/launch/perception.launch @@ -60,6 +60,7 @@ - deeplabv3_resnet101 - yolov8x-seg --> + @@ -78,8 +79,8 @@ - - + + diff --git a/code/perception/src/lidar_distance.py b/code/perception/src/lidar_distance.py index 1228c607..6a1caa4e 100755 --- a/code/perception/src/lidar_distance.py +++ b/code/perception/src/lidar_distance.py @@ -4,17 +4,16 @@ import numpy as np import lidar_filter_utility from sensor_msgs.msg import PointCloud2 - +from std_msgs.msg import Float32 from sklearn.cluster import DBSCAN from sklearn.preprocessing import StandardScaler import matplotlib.pyplot as plt -from perception.msg import MinDistance # from mpl_toolkits.mplot3d import Axes3D # from itertools import combinations import cv2 from sensor_msgs.msg import Image as ImageMsg from cv_bridge import CvBridge -from matplotlib.colors import LinearSegmentedColormap +# from matplotlib.colors import LinearSegmentedColormap class LidarDistance(): @@ -39,12 +38,11 @@ def callback(self, data): # ndarray-made-up-of-numpy-void-numbers min_dist_bit_mask = lidar_filter_utility.bounding_box( coordinates, - max_x=rospy.get_param('~max_x', np.inf), - min_x=rospy.get_param('~min_x', -np.inf), - max_y=rospy.get_param('~max_y', np.inf), - min_y=rospy.get_param('~min_y', -np.inf), - max_z=rospy.get_param('~max_z', np.inf), + max_x=50., + min_x=2., min_z=rospy.get_param('~min_z', -np.inf), + min_y=-2.5, + max_y=2.5, ) reconstruct_bit_mask = lidar_filter_utility.bounding_box( @@ -81,15 +79,19 @@ def callback(self, data): ) # handle minimum distance - plot = self.plot_blob(min_dist_coordinates_xyz) - img_msg = self.bridge.cv2_to_imgmsg(plot, - encoding="passthrough") - img_msg.header = data.header - self.min_dist_img_publisher.publish(img_msg) + if min_dist_coordinates_xyz.shape[0] > 0: + plot = self.plot_blob(min_dist_coordinates_xyz) + img_msg = self.bridge.cv2_to_imgmsg(plot, + encoding="passthrough") + img_msg.header = data.header + self.min_dist_img_publisher.publish(img_msg) + else: + self.pub_min_dist.publish(np.inf) # handle reconstruction of lidar points rainbow_cloud = self.reconstruct_img_from_lidar( reconstruct_coordinates_xyz) + img_msg = self.bridge.cv2_to_imgmsg(rainbow_cloud, encoding="passthrough") img_msg.header = data.header @@ -107,7 +109,8 @@ def listener(self): '~point_cloud_topic', '/carla/hero/' + rospy.get_namespace() + '_filtered' ), - PointCloud2 + PointCloud2, + queue_size=10 ) # publisher for the closest blob in the lidar point cloud @@ -116,7 +119,8 @@ def listener(self): '~range_topic', '/paf/hero/Center/min_distance' ), - MinDistance + Float32, + queue_size=10 ) # publisher for reconstructed lidar image @@ -125,7 +129,8 @@ def listener(self): '~image_distance_topic', '/paf/hero/Center/rainbow_image' ), - ImageMsg + ImageMsg, + queue_size=10 ) # publisher for 3d blob graph @@ -134,7 +139,8 @@ def listener(self): '~image_distance_topic', '/paf/hero/Center/min_dist_image' ), - ImageMsg + ImageMsg, + queue_size=10 ) rospy.Subscriber(rospy.get_param('~source_topic', "/carla/hero/LIDAR"), @@ -145,19 +151,34 @@ def listener(self): def plot_blob(self, xyz_coords): # creates a 3d graph thazt highlights blobs of points xyz_coords_standardized = StandardScaler().fit_transform(xyz_coords) - pairwise_distances_x = np.abs(np.subtract.outer(xyz_coords[:, 0], - xyz_coords[:, 0])) - min_x_distance = np.min(pairwise_distances_x[pairwise_distances_x > 0]) - self.pub_min_dist.publish(min_x_distance) + """pairwise_distances_x = np.abs(np.subtract.outer(xyz_coords[:, 0], + xyz_coords[:, 0]))""" + + # publish minimum distance eps = 0.2 min_samples = 5 dbscan = DBSCAN(eps=eps, min_samples=min_samples) labels = dbscan.fit_predict(xyz_coords_standardized) + min_distance_cluster = [] + + # Iterate through each cluster + for label in set(labels): + if label != -1: # Ignore noise points + cluster_points = xyz_coords[labels == label] + min_distance_cluster.append(np.min(cluster_points[:, 0])) + + # Publish the minimum distance within clusters + if len(min_distance_cluster) > 0: + self.pub_min_dist.publish(min(min_distance_cluster)) + else: + self.pub_min_dist.publish(np.inf) + fig = plt.figure() ax = fig.add_subplot(111, projection='3d') for label in set(labels): + # print(label) if label == -1: ax.scatter(xyz_coords[labels == label][:, 0], xyz_coords[labels == label][:, 1], @@ -186,9 +207,9 @@ def reconstruct_img_from_lidar(self, coordinates_xyz): # intrinsic matrix for camera: # width -> 300, height -> 200, fov -> 100 (agent.py) im = np.identity(3) - im[0, 2] = 300 / 2.0 - im[1, 2] = 200 / 2.0 - im[0, 0] = im[1, 1] = 300 / (2.0 * np.tan(100 * np.pi / 360.0)) + im[0, 2] = 1280 / 2.0 + im[1, 2] = 720 / 2.0 + im[0, 0] = im[1, 1] = 1280 / (2.0 * np.tan(100 * np.pi / 360.0)) # extrinsic matrix for camera ex = np.zeros(shape=(3, 4)) @@ -198,16 +219,16 @@ def reconstruct_img_from_lidar(self, coordinates_xyz): m = np.matmul(im, ex) # reconstruct camera image with LIDAR-Data - img = np.zeros(shape=(200, 300), dtype=np.uint8) + img = np.zeros(shape=(720, 1280), dtype=np.float32) for c in coordinates_xyz: point = np.array([c[1], c[2], c[0], 1]) pixel = np.matmul(m, point) x, y = int(pixel[0]/pixel[2]), int(pixel[1]/pixel[2]) - if x >= 0 and x <= 300 and y >= 0 and y <= 200: - img[199-y][299-x] = 255 - c[0] + if x >= 0 and x <= 1280 and y >= 0 and y <= 720: + img[719-y][1279-x] = c[0] # Rainbox color mapping to highlight distances - colors = [(0, 0, 0)] + [(1, 0, 0), (1, 1, 0), + """colors = [(0, 0, 0)] + [(1, 0, 0), (1, 1, 0), (0, 1, 0), (0, 1, 1), (0, 0, 1)] cmap_name = 'rainbow' @@ -216,9 +237,9 @@ def reconstruct_img_from_lidar(self, coordinates_xyz): N=256) img_colored = (rainbow_cmap(img / np.max(img)) * 255).astype(np.uint8) - img_bgr = cv2.cvtColor(img_colored, cv2.COLOR_RGBA2BGR) + img_bgr = cv2.cvtColor(img_colored, cv2.COLOR_RGBA2BGR)""" - return img_bgr + return img if __name__ == '__main__': diff --git a/code/perception/src/lidar_filter_utility.py b/code/perception/src/lidar_filter_utility.py index 4cd50260..5871ba35 100755 --- a/code/perception/src/lidar_filter_utility.py +++ b/code/perception/src/lidar_filter_utility.py @@ -31,6 +31,10 @@ def bounding_box(points, min_x=-np.inf, max_x=np.inf, min_y=-np.inf, """ + """print(min_x, max_x, "X") + print(min_y, max_y, "Y") + print(min_z, max_z, "Z")""" + bound_x = np.logical_and(points['x'] > min_x, points['x'] < max_x) bound_y = np.logical_and(points['y'] > min_y, points['y'] < max_y) bound_z = np.logical_and(points['z'] > min_z, points['z'] < max_z) diff --git a/code/perception/src/vision_node.py b/code/perception/src/vision_node.py index e9681a4c..c62212c1 100755 --- a/code/perception/src/vision_node.py +++ b/code/perception/src/vision_node.py @@ -14,11 +14,12 @@ import cv2 from rospy.numpy_msg import numpy_msg from sensor_msgs.msg import Image as ImageMsg -from std_msgs.msg import Header +from std_msgs.msg import Header, Float32MultiArray from cv_bridge import CvBridge from torchvision.utils import draw_bounding_boxes, draw_segmentation_masks import numpy as np from ultralytics import NAS, YOLO, RTDETR, SAM, FastSAM + """ VisionNode: @@ -81,10 +82,13 @@ def __init__(self, name, **kwargs): self.side = self.get_param("side", "Center") self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + self.depth_images = [] # publish / subscribe setup self.setup_camera_subscriptions() + self.setup_rainbow_subscription() self.setup_camera_publishers() + self.setup_object_distance_publishers() self.setup_traffic_light_publishers() self.image_msg_header = Header() self.image_msg_header.frame_id = "segmented_image_frame" @@ -95,6 +99,7 @@ def __init__(self, name, **kwargs): self.weights = model_info[1] self.type = model_info[2] self.framework = model_info[3] + self.save = True print("Vision Node Configuration:") print("Device -> ", self.device) print(f"Model -> {self.get_param('model')},") @@ -121,6 +126,14 @@ def setup_camera_subscriptions(self): qos_profile=1 ) + def setup_rainbow_subscription(self): + self.new_subscription( + msg_type=numpy_msg(ImageMsg), + callback=self.handle_rainbow_image, + topic='/paf/hero/Center/rainbow_image', + qos_profile=1 + ) + def setup_camera_publishers(self): self.publisher = self.new_publisher( msg_type=numpy_msg(ImageMsg), @@ -128,6 +141,12 @@ def setup_camera_publishers(self): qos_profile=1 ) + def setup_object_distance_publishers(self): + self.distance_publisher = self.new_publisher( + msg_type=numpy_msg(Float32MultiArray), + topic=f"/paf/{self.role_name}/{self.side}/object_distance", + qos_profile=1) + def setup_traffic_light_publishers(self): self.traffic_light_publisher = self.new_publisher( msg_type=numpy_msg(ImageMsg), @@ -151,6 +170,25 @@ def handle_camera_image(self, image): encoding="rgb8") img_msg.header = image.header self.publisher.publish(img_msg) + pass + + def handle_rainbow_image(self, image): + cv_image = self.bridge.imgmsg_to_cv2(img_msg=image, + desired_encoding='passthrough') + # cv_image = cv2.cvtColor(cv_image, cv2.COLOR_RGB2BGR) + self.depth_images.append(cv_image) + if len(self.depth_images) > 1: + self.depth_images.pop(0) + """if self.save: + for i in range(len(self.depth_images)): + cv2.imshow(f"{i}", self.depth_images[i]) + + self.save = False + cv2.waitKey(0) # Wait for any key press + cv2.destroyAllWindows()""" + + else: + self.logerr("Depth-Fiel build up! No distances available yet!") def predict_torch(self, image): self.model.eval() @@ -181,11 +219,57 @@ def predict_ultralytics(self, image): cv_image = cv2.cvtColor(cv_image, cv2.COLOR_RGB2BGR) output = self.model(cv_image, half=True, verbose=False) + distance_output = [] + c_boxes = [] + c_labels = [] + for r in output: + boxes = r.boxes + for box in boxes: + cls = box.cls.item() + pixels = box.xyxy[0] + if len(self.depth_images) > 0: + distances = np.asarray( + [self.depth_images[i][int(pixels[1]):int(pixels[3]):1, + int(pixels[0]):int(pixels[2]):1] + for i in range(len(self.depth_images))]) + non_zero_filter = distances[distances != 0] + + if len(non_zero_filter) > 0: + obj_dist = np.min(non_zero_filter) + else: + obj_dist = np.inf + + c_boxes.append(torch.tensor(pixels)) + c_labels.append(f"Class: {cls}, Meters: {obj_dist}") + distance_output.append([cls, obj_dist]) + + # print(distance_output) + # self.logerr(distance_output) + self.distance_publisher.publish( + Float32MultiArray(data=distance_output)) + + transposed_image = np.transpose(cv_image, (2, 0, 1)) + image_np_with_detections = torch.tensor(transposed_image, + dtype=torch.uint8) if 9 in output[0].boxes.cls: self.process_traffic_lights(output[0], cv_image, image.header) - return output[0].plot() + c_boxes = torch.stack(c_boxes) + print(image_np_with_detections.shape, c_boxes.shape, c_labels) + box = draw_bounding_boxes(image_np_with_detections, + c_boxes, + c_labels, + colors='blue', + width=3, + font_size=12) + # print(box.shape) + np_box_img = np.transpose(box.detach().numpy(), + (1, 2, 0)) + box_img = cv2.cvtColor(np_box_img, cv2.COLOR_BGR2RGB) + return box_img + + # return output[0].plot() def process_traffic_lights(self, prediction, cv_image, image_header): indices = (prediction.boxes.cls == 9).nonzero().squeeze().cpu().numpy() @@ -238,8 +322,10 @@ def apply_bounding_boxes(self, input_image, model_output): box = draw_bounding_boxes(image_np_with_detections, boxes, labels, - colors='red', - width=2) + colors='blue', + width=3, + font_size=24) + np_box_img = np.transpose(box.detach().numpy(), (1, 2, 0)) box_img = cv2.cvtColor(np_box_img, cv2.COLOR_BGR2RGB) diff --git a/doc/00_assets/2_15_layover.png b/doc/00_assets/2_15_layover.png new file mode 100644 index 00000000..d2880784 Binary files /dev/null and b/doc/00_assets/2_15_layover.png differ diff --git a/doc/00_assets/2_layover.png b/doc/00_assets/2_layover.png new file mode 100644 index 00000000..462c87af Binary files /dev/null and b/doc/00_assets/2_layover.png differ diff --git a/doc/00_assets/3_layover.png b/doc/00_assets/3_layover.png new file mode 100644 index 00000000..bb1cf837 Binary files /dev/null and b/doc/00_assets/3_layover.png differ diff --git a/doc/00_assets/4_layover.png b/doc/00_assets/4_layover.png new file mode 100644 index 00000000..ff8bd3d2 Binary files /dev/null and b/doc/00_assets/4_layover.png differ diff --git a/doc/06_perception/12_distance_to_objects.md b/doc/06_perception/12_distance_to_objects.md index 59863390..0d6f1825 100644 --- a/doc/06_perception/12_distance_to_objects.md +++ b/doc/06_perception/12_distance_to_objects.md @@ -39,14 +39,14 @@ The goal is to calculate the projection of point P and find its Pixl-Coordinates To do this you need a couple of thins: 1. Camera-Calibration - 1. Width - 2. Height - 3. Field-of-View + 1. Width + 2. Height + 3. Field-of-View 2. LIDAR-Sensor-Position - 1. Origin of 3D-World-Coordinates + 1. Origin of 3D-World-Coordinates 3. Camera-Sensor - 1. Position - 2. Orientation + 1. Position + 2. Orientation The formula for this projection proposed by the literature looks like this: @@ -55,11 +55,11 @@ The formula for this projection proposed by the literature looks like this: To get the camera-intrinsic matrix we need the width, height and fov of the image produced by the camera. Luckily we cn easly get these values from the sensor configuration in (agent.py) -In our case we use the following configuration: Width: 300, Height: 200, FOV: 100 +In our case we use the following configuration: Width: 1280, Height: 720, FOV: 100 -The Intrinsic Matrix is calculated within the code using these values and a piece of code from pyLot (insert link) +The Intrinsic Matrix is calculated within the code using these values and a piece of code from pyLot. -Next up we need the extrinsic Camera-Matrix. We can set this Matrix to the Identity-Matrix, if both LIDAR and Camera are in the exact same position (e.g (0,0, 0)) in the world. +Next up we need the extrinsic Camera-Matrix. We can set this Matrix to the Identity-Matrix, if both LIDAR and Camera are in the exact same position (e.g (0, 0, 0)) in the world. ### Purpose @@ -72,16 +72,47 @@ U can imageine putting both images on top of each other. ### Implementation -All that is left now is the calculation of the Pixel-Coordinates for every LIDAR-Point in front of the vehicle and some nice Visualization. +To reconstruct the depth image, we simply implement the above formulas using numpy. In order to get better results we had to adjust the LIDAR-Sensor Setup. Check the topic below for more details. -LIDAR-Points that are not within the field of view of the camera would be projected to (u, v)-Points that dont match the Camera-Image -(for example: (-100, 23)) +The resulting Image takes the distance in meters as values for its pixels. It therefore is a grayscale image. -To visualize the reconstruced Image we create a cv2 Image where the RGB-Values a mapped to a rainbox-color scheme according to the distance at each pixel. +![Grayscale Depth Image](../00_assets/2_15_layover.png) -insert image +In the next step we want to get the distance for every bounding box the object-detection found. -## Next Steps +We want to return a list of tuple containing a class and a distance, like this: -- Combine Object-Detection with LIDAR-Reconstruction to get a depth image -- Provide List of Objects with distance in a publisher +[ + [class_id, distance_meters], + [12.0, 3.456], + ..., + [2.0, np.inf], + [2.0, 7.890], +] + +Since we cant be certain we will find a Lidar-Point in the depth image for every Pixel in the bounding box of the original image, +we will check for the minimum value within the bounding box in the depth image. + +This makes sense, since the LIDAR doesn´t recognize points behind any object. + +If there is no distance found in the depth image, we will return infinity for this bounding box. + +## LIDAR-Configuration + +This topic came to our attention, as we realised that the LIDAR was flickering, as you can see in the following image series. + +![Grayscale Depth Image](../00_assets/2_layover.png) +![Grayscale Depth Image](../00_assets/3_layover.png) +![Grayscale Depth Image](../00_assets/4_layover.png) + +These are the Grayscale-Depth Images reconstructed within 600 milliseconds. + +We can see that we usually only see one half of the LIDAR-Points. + +The simple reason for this is a lower spin rate in the LIDAR-Configuration. + +By adjusting the spin rate we can achieve a fully reconstructed image. However, a higher spin rate results in a lower Resolution of LIDAR-Points. + +We now use a slightly higher spin rate, that consistently produces a full depth image, but also receives only sightly less LIDAR-Points. + +Doing this, we noticed, that the LIDAR-Sensor wasn´t positioned correctly. For some reason it was offset to the front left.