diff --git a/code/agent/config/dev_objects.json b/code/agent/config/dev_objects.json
index 90b24361..e6fbdeba 100644
--- a/code/agent/config/dev_objects.json
+++ b/code/agent/config/dev_objects.json
@@ -104,23 +104,23 @@
"type": "sensor.lidar.ray_cast",
"id": "LIDAR",
"spawn_point": {
- "x": 0.7,
- "y": 0.4,
- "z": 1.60,
+ "x": 0.0,
+ "y": 0.0,
+ "z": 1.70,
"roll": 0.0,
"pitch": 0.0,
"yaw": 0.0
},
- "range": 85,
- "rotation_frequency": 10,
+ "range": 50,
+ "rotation_frequency": 15,
"channels": 64,
"upper_fov": 10,
"lower_fov": -30,
- "points_per_second": 600000,
+ "points_per_second": 1000000,
"atmosphere_attenuation_rate": 0.004,
"dropoff_general_rate": 0.45,
"dropoff_intensity_limit": 0.8,
- "dropoff_zero_intensity": 0.4
+ "dropoff_zero_intensity": 0.2
},
{
"type": "sensor.other.radar",
diff --git a/code/agent/src/agent/agent.py b/code/agent/src/agent/agent.py
index ad8d076c..2076bef3 100755
--- a/code/agent/src/agent/agent.py
+++ b/code/agent/src/agent/agent.py
@@ -23,10 +23,10 @@ def get_ros_entrypoint(self):
def sensors(self):
sensors = [
{'type': 'sensor.camera.rgb', 'id': 'Center',
- 'x': 0.7, 'y': 0.0, 'z': 1.60, 'roll': 0.0, 'pitch': 0.0,
- 'yaw': 0.0, 'width': 300, 'height': 200, 'fov': 100},
+ 'x': 0.0, 'y': 0.0, 'z': 1.70, 'roll': 0.0, 'pitch': 0.0,
+ 'yaw': 0.0, 'width': 1280, 'height': 720, 'fov': 100},
{'type': 'sensor.lidar.ray_cast', 'id': 'LIDAR',
- 'x': 0.7, 'y': 0.0, 'z': 1.60, 'roll': 0.0, 'pitch': 0.0,
+ 'x': 0.0, 'y': 0.0, 'z': 1.70, 'roll': 0.0, 'pitch': 0.0,
'yaw': 0.0},
{'type': 'sensor.other.radar', 'id': 'RADAR',
'x': 0.7, 'y': -0.4, 'z': 1.60, 'roll': 0.0, 'pitch': 0.0,
diff --git a/code/perception/launch/perception.launch b/code/perception/launch/perception.launch
index 500635fb..3c463823 100644
--- a/code/perception/launch/perception.launch
+++ b/code/perception/launch/perception.launch
@@ -60,6 +60,7 @@
- deeplabv3_resnet101
- yolov8x-seg
-->
+
@@ -78,8 +79,8 @@
-
-
+
+
diff --git a/code/perception/src/lidar_distance.py b/code/perception/src/lidar_distance.py
index 1228c607..6a1caa4e 100755
--- a/code/perception/src/lidar_distance.py
+++ b/code/perception/src/lidar_distance.py
@@ -4,17 +4,16 @@
import numpy as np
import lidar_filter_utility
from sensor_msgs.msg import PointCloud2
-
+from std_msgs.msg import Float32
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
-from perception.msg import MinDistance
# from mpl_toolkits.mplot3d import Axes3D
# from itertools import combinations
import cv2
from sensor_msgs.msg import Image as ImageMsg
from cv_bridge import CvBridge
-from matplotlib.colors import LinearSegmentedColormap
+# from matplotlib.colors import LinearSegmentedColormap
class LidarDistance():
@@ -39,12 +38,11 @@ def callback(self, data):
# ndarray-made-up-of-numpy-void-numbers
min_dist_bit_mask = lidar_filter_utility.bounding_box(
coordinates,
- max_x=rospy.get_param('~max_x', np.inf),
- min_x=rospy.get_param('~min_x', -np.inf),
- max_y=rospy.get_param('~max_y', np.inf),
- min_y=rospy.get_param('~min_y', -np.inf),
- max_z=rospy.get_param('~max_z', np.inf),
+ max_x=50.,
+ min_x=2.,
min_z=rospy.get_param('~min_z', -np.inf),
+ min_y=-2.5,
+ max_y=2.5,
)
reconstruct_bit_mask = lidar_filter_utility.bounding_box(
@@ -81,15 +79,19 @@ def callback(self, data):
)
# handle minimum distance
- plot = self.plot_blob(min_dist_coordinates_xyz)
- img_msg = self.bridge.cv2_to_imgmsg(plot,
- encoding="passthrough")
- img_msg.header = data.header
- self.min_dist_img_publisher.publish(img_msg)
+ if min_dist_coordinates_xyz.shape[0] > 0:
+ plot = self.plot_blob(min_dist_coordinates_xyz)
+ img_msg = self.bridge.cv2_to_imgmsg(plot,
+ encoding="passthrough")
+ img_msg.header = data.header
+ self.min_dist_img_publisher.publish(img_msg)
+ else:
+ self.pub_min_dist.publish(np.inf)
# handle reconstruction of lidar points
rainbow_cloud = self.reconstruct_img_from_lidar(
reconstruct_coordinates_xyz)
+
img_msg = self.bridge.cv2_to_imgmsg(rainbow_cloud,
encoding="passthrough")
img_msg.header = data.header
@@ -107,7 +109,8 @@ def listener(self):
'~point_cloud_topic',
'/carla/hero/' + rospy.get_namespace() + '_filtered'
),
- PointCloud2
+ PointCloud2,
+ queue_size=10
)
# publisher for the closest blob in the lidar point cloud
@@ -116,7 +119,8 @@ def listener(self):
'~range_topic',
'/paf/hero/Center/min_distance'
),
- MinDistance
+ Float32,
+ queue_size=10
)
# publisher for reconstructed lidar image
@@ -125,7 +129,8 @@ def listener(self):
'~image_distance_topic',
'/paf/hero/Center/rainbow_image'
),
- ImageMsg
+ ImageMsg,
+ queue_size=10
)
# publisher for 3d blob graph
@@ -134,7 +139,8 @@ def listener(self):
'~image_distance_topic',
'/paf/hero/Center/min_dist_image'
),
- ImageMsg
+ ImageMsg,
+ queue_size=10
)
rospy.Subscriber(rospy.get_param('~source_topic', "/carla/hero/LIDAR"),
@@ -145,19 +151,34 @@ def listener(self):
def plot_blob(self, xyz_coords):
# creates a 3d graph thazt highlights blobs of points
xyz_coords_standardized = StandardScaler().fit_transform(xyz_coords)
- pairwise_distances_x = np.abs(np.subtract.outer(xyz_coords[:, 0],
- xyz_coords[:, 0]))
- min_x_distance = np.min(pairwise_distances_x[pairwise_distances_x > 0])
- self.pub_min_dist.publish(min_x_distance)
+ """pairwise_distances_x = np.abs(np.subtract.outer(xyz_coords[:, 0],
+ xyz_coords[:, 0]))"""
+
+ # publish minimum distance
eps = 0.2
min_samples = 5
dbscan = DBSCAN(eps=eps, min_samples=min_samples)
labels = dbscan.fit_predict(xyz_coords_standardized)
+ min_distance_cluster = []
+
+ # Iterate through each cluster
+ for label in set(labels):
+ if label != -1: # Ignore noise points
+ cluster_points = xyz_coords[labels == label]
+ min_distance_cluster.append(np.min(cluster_points[:, 0]))
+
+ # Publish the minimum distance within clusters
+ if len(min_distance_cluster) > 0:
+ self.pub_min_dist.publish(min(min_distance_cluster))
+ else:
+ self.pub_min_dist.publish(np.inf)
+
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
for label in set(labels):
+ # print(label)
if label == -1:
ax.scatter(xyz_coords[labels == label][:, 0],
xyz_coords[labels == label][:, 1],
@@ -186,9 +207,9 @@ def reconstruct_img_from_lidar(self, coordinates_xyz):
# intrinsic matrix for camera:
# width -> 300, height -> 200, fov -> 100 (agent.py)
im = np.identity(3)
- im[0, 2] = 300 / 2.0
- im[1, 2] = 200 / 2.0
- im[0, 0] = im[1, 1] = 300 / (2.0 * np.tan(100 * np.pi / 360.0))
+ im[0, 2] = 1280 / 2.0
+ im[1, 2] = 720 / 2.0
+ im[0, 0] = im[1, 1] = 1280 / (2.0 * np.tan(100 * np.pi / 360.0))
# extrinsic matrix for camera
ex = np.zeros(shape=(3, 4))
@@ -198,16 +219,16 @@ def reconstruct_img_from_lidar(self, coordinates_xyz):
m = np.matmul(im, ex)
# reconstruct camera image with LIDAR-Data
- img = np.zeros(shape=(200, 300), dtype=np.uint8)
+ img = np.zeros(shape=(720, 1280), dtype=np.float32)
for c in coordinates_xyz:
point = np.array([c[1], c[2], c[0], 1])
pixel = np.matmul(m, point)
x, y = int(pixel[0]/pixel[2]), int(pixel[1]/pixel[2])
- if x >= 0 and x <= 300 and y >= 0 and y <= 200:
- img[199-y][299-x] = 255 - c[0]
+ if x >= 0 and x <= 1280 and y >= 0 and y <= 720:
+ img[719-y][1279-x] = c[0]
# Rainbox color mapping to highlight distances
- colors = [(0, 0, 0)] + [(1, 0, 0), (1, 1, 0),
+ """colors = [(0, 0, 0)] + [(1, 0, 0), (1, 1, 0),
(0, 1, 0), (0, 1, 1),
(0, 0, 1)]
cmap_name = 'rainbow'
@@ -216,9 +237,9 @@ def reconstruct_img_from_lidar(self, coordinates_xyz):
N=256)
img_colored = (rainbow_cmap(img / np.max(img)) * 255).astype(np.uint8)
- img_bgr = cv2.cvtColor(img_colored, cv2.COLOR_RGBA2BGR)
+ img_bgr = cv2.cvtColor(img_colored, cv2.COLOR_RGBA2BGR)"""
- return img_bgr
+ return img
if __name__ == '__main__':
diff --git a/code/perception/src/lidar_filter_utility.py b/code/perception/src/lidar_filter_utility.py
index 4cd50260..5871ba35 100755
--- a/code/perception/src/lidar_filter_utility.py
+++ b/code/perception/src/lidar_filter_utility.py
@@ -31,6 +31,10 @@ def bounding_box(points, min_x=-np.inf, max_x=np.inf, min_y=-np.inf,
"""
+ """print(min_x, max_x, "X")
+ print(min_y, max_y, "Y")
+ print(min_z, max_z, "Z")"""
+
bound_x = np.logical_and(points['x'] > min_x, points['x'] < max_x)
bound_y = np.logical_and(points['y'] > min_y, points['y'] < max_y)
bound_z = np.logical_and(points['z'] > min_z, points['z'] < max_z)
diff --git a/code/perception/src/vision_node.py b/code/perception/src/vision_node.py
index e9681a4c..c62212c1 100755
--- a/code/perception/src/vision_node.py
+++ b/code/perception/src/vision_node.py
@@ -14,11 +14,12 @@
import cv2
from rospy.numpy_msg import numpy_msg
from sensor_msgs.msg import Image as ImageMsg
-from std_msgs.msg import Header
+from std_msgs.msg import Header, Float32MultiArray
from cv_bridge import CvBridge
from torchvision.utils import draw_bounding_boxes, draw_segmentation_masks
import numpy as np
from ultralytics import NAS, YOLO, RTDETR, SAM, FastSAM
+
"""
VisionNode:
@@ -81,10 +82,13 @@ def __init__(self, name, **kwargs):
self.side = self.get_param("side", "Center")
self.device = torch.device("cuda"
if torch.cuda.is_available() else "cpu")
+ self.depth_images = []
# publish / subscribe setup
self.setup_camera_subscriptions()
+ self.setup_rainbow_subscription()
self.setup_camera_publishers()
+ self.setup_object_distance_publishers()
self.setup_traffic_light_publishers()
self.image_msg_header = Header()
self.image_msg_header.frame_id = "segmented_image_frame"
@@ -95,6 +99,7 @@ def __init__(self, name, **kwargs):
self.weights = model_info[1]
self.type = model_info[2]
self.framework = model_info[3]
+ self.save = True
print("Vision Node Configuration:")
print("Device -> ", self.device)
print(f"Model -> {self.get_param('model')},")
@@ -121,6 +126,14 @@ def setup_camera_subscriptions(self):
qos_profile=1
)
+ def setup_rainbow_subscription(self):
+ self.new_subscription(
+ msg_type=numpy_msg(ImageMsg),
+ callback=self.handle_rainbow_image,
+ topic='/paf/hero/Center/rainbow_image',
+ qos_profile=1
+ )
+
def setup_camera_publishers(self):
self.publisher = self.new_publisher(
msg_type=numpy_msg(ImageMsg),
@@ -128,6 +141,12 @@ def setup_camera_publishers(self):
qos_profile=1
)
+ def setup_object_distance_publishers(self):
+ self.distance_publisher = self.new_publisher(
+ msg_type=numpy_msg(Float32MultiArray),
+ topic=f"/paf/{self.role_name}/{self.side}/object_distance",
+ qos_profile=1)
+
def setup_traffic_light_publishers(self):
self.traffic_light_publisher = self.new_publisher(
msg_type=numpy_msg(ImageMsg),
@@ -151,6 +170,25 @@ def handle_camera_image(self, image):
encoding="rgb8")
img_msg.header = image.header
self.publisher.publish(img_msg)
+ pass
+
+ def handle_rainbow_image(self, image):
+ cv_image = self.bridge.imgmsg_to_cv2(img_msg=image,
+ desired_encoding='passthrough')
+ # cv_image = cv2.cvtColor(cv_image, cv2.COLOR_RGB2BGR)
+ self.depth_images.append(cv_image)
+ if len(self.depth_images) > 1:
+ self.depth_images.pop(0)
+ """if self.save:
+ for i in range(len(self.depth_images)):
+ cv2.imshow(f"{i}", self.depth_images[i])
+
+ self.save = False
+ cv2.waitKey(0) # Wait for any key press
+ cv2.destroyAllWindows()"""
+
+ else:
+ self.logerr("Depth-Fiel build up! No distances available yet!")
def predict_torch(self, image):
self.model.eval()
@@ -181,11 +219,57 @@ def predict_ultralytics(self, image):
cv_image = cv2.cvtColor(cv_image, cv2.COLOR_RGB2BGR)
output = self.model(cv_image, half=True, verbose=False)
+ distance_output = []
+ c_boxes = []
+ c_labels = []
+ for r in output:
+ boxes = r.boxes
+ for box in boxes:
+ cls = box.cls.item()
+ pixels = box.xyxy[0]
+ if len(self.depth_images) > 0:
+ distances = np.asarray(
+ [self.depth_images[i][int(pixels[1]):int(pixels[3]):1,
+ int(pixels[0]):int(pixels[2]):1]
+ for i in range(len(self.depth_images))])
+ non_zero_filter = distances[distances != 0]
+
+ if len(non_zero_filter) > 0:
+ obj_dist = np.min(non_zero_filter)
+ else:
+ obj_dist = np.inf
+
+ c_boxes.append(torch.tensor(pixels))
+ c_labels.append(f"Class: {cls}, Meters: {obj_dist}")
+ distance_output.append([cls, obj_dist])
+
+ # print(distance_output)
+ # self.logerr(distance_output)
+ self.distance_publisher.publish(
+ Float32MultiArray(data=distance_output))
+
+ transposed_image = np.transpose(cv_image, (2, 0, 1))
+ image_np_with_detections = torch.tensor(transposed_image,
+ dtype=torch.uint8)
if 9 in output[0].boxes.cls:
self.process_traffic_lights(output[0], cv_image, image.header)
- return output[0].plot()
+ c_boxes = torch.stack(c_boxes)
+ print(image_np_with_detections.shape, c_boxes.shape, c_labels)
+ box = draw_bounding_boxes(image_np_with_detections,
+ c_boxes,
+ c_labels,
+ colors='blue',
+ width=3,
+ font_size=12)
+ # print(box.shape)
+ np_box_img = np.transpose(box.detach().numpy(),
+ (1, 2, 0))
+ box_img = cv2.cvtColor(np_box_img, cv2.COLOR_BGR2RGB)
+ return box_img
+
+ # return output[0].plot()
def process_traffic_lights(self, prediction, cv_image, image_header):
indices = (prediction.boxes.cls == 9).nonzero().squeeze().cpu().numpy()
@@ -238,8 +322,10 @@ def apply_bounding_boxes(self, input_image, model_output):
box = draw_bounding_boxes(image_np_with_detections,
boxes,
labels,
- colors='red',
- width=2)
+ colors='blue',
+ width=3,
+ font_size=24)
+
np_box_img = np.transpose(box.detach().numpy(),
(1, 2, 0))
box_img = cv2.cvtColor(np_box_img, cv2.COLOR_BGR2RGB)
diff --git a/doc/00_assets/2_15_layover.png b/doc/00_assets/2_15_layover.png
new file mode 100644
index 00000000..d2880784
Binary files /dev/null and b/doc/00_assets/2_15_layover.png differ
diff --git a/doc/00_assets/2_layover.png b/doc/00_assets/2_layover.png
new file mode 100644
index 00000000..462c87af
Binary files /dev/null and b/doc/00_assets/2_layover.png differ
diff --git a/doc/00_assets/3_layover.png b/doc/00_assets/3_layover.png
new file mode 100644
index 00000000..bb1cf837
Binary files /dev/null and b/doc/00_assets/3_layover.png differ
diff --git a/doc/00_assets/4_layover.png b/doc/00_assets/4_layover.png
new file mode 100644
index 00000000..ff8bd3d2
Binary files /dev/null and b/doc/00_assets/4_layover.png differ
diff --git a/doc/06_perception/12_distance_to_objects.md b/doc/06_perception/12_distance_to_objects.md
index 59863390..0d6f1825 100644
--- a/doc/06_perception/12_distance_to_objects.md
+++ b/doc/06_perception/12_distance_to_objects.md
@@ -39,14 +39,14 @@ The goal is to calculate the projection of point P and find its Pixl-Coordinates
To do this you need a couple of thins:
1. Camera-Calibration
- 1. Width
- 2. Height
- 3. Field-of-View
+ 1. Width
+ 2. Height
+ 3. Field-of-View
2. LIDAR-Sensor-Position
- 1. Origin of 3D-World-Coordinates
+ 1. Origin of 3D-World-Coordinates
3. Camera-Sensor
- 1. Position
- 2. Orientation
+ 1. Position
+ 2. Orientation
The formula for this projection proposed by the literature looks like this:
@@ -55,11 +55,11 @@ The formula for this projection proposed by the literature looks like this:
To get the camera-intrinsic matrix we need the width, height and fov of the image produced by the camera.
Luckily we cn easly get these values from the sensor configuration in (agent.py)
-In our case we use the following configuration: Width: 300, Height: 200, FOV: 100
+In our case we use the following configuration: Width: 1280, Height: 720, FOV: 100
-The Intrinsic Matrix is calculated within the code using these values and a piece of code from pyLot (insert link)
+The Intrinsic Matrix is calculated within the code using these values and a piece of code from pyLot.
-Next up we need the extrinsic Camera-Matrix. We can set this Matrix to the Identity-Matrix, if both LIDAR and Camera are in the exact same position (e.g (0,0, 0)) in the world.
+Next up we need the extrinsic Camera-Matrix. We can set this Matrix to the Identity-Matrix, if both LIDAR and Camera are in the exact same position (e.g (0, 0, 0)) in the world.
### Purpose
@@ -72,16 +72,47 @@ U can imageine putting both images on top of each other.
### Implementation
-All that is left now is the calculation of the Pixel-Coordinates for every LIDAR-Point in front of the vehicle and some nice Visualization.
+To reconstruct the depth image, we simply implement the above formulas using numpy. In order to get better results we had to adjust the LIDAR-Sensor Setup. Check the topic below for more details.
-LIDAR-Points that are not within the field of view of the camera would be projected to (u, v)-Points that dont match the Camera-Image
-(for example: (-100, 23))
+The resulting Image takes the distance in meters as values for its pixels. It therefore is a grayscale image.
-To visualize the reconstruced Image we create a cv2 Image where the RGB-Values a mapped to a rainbox-color scheme according to the distance at each pixel.
+![Grayscale Depth Image](../00_assets/2_15_layover.png)
-insert image
+In the next step we want to get the distance for every bounding box the object-detection found.
-## Next Steps
+We want to return a list of tuple containing a class and a distance, like this:
-- Combine Object-Detection with LIDAR-Reconstruction to get a depth image
-- Provide List of Objects with distance in a publisher
+[
+ [class_id, distance_meters],
+ [12.0, 3.456],
+ ...,
+ [2.0, np.inf],
+ [2.0, 7.890],
+]
+
+Since we cant be certain we will find a Lidar-Point in the depth image for every Pixel in the bounding box of the original image,
+we will check for the minimum value within the bounding box in the depth image.
+
+This makes sense, since the LIDAR doesn´t recognize points behind any object.
+
+If there is no distance found in the depth image, we will return infinity for this bounding box.
+
+## LIDAR-Configuration
+
+This topic came to our attention, as we realised that the LIDAR was flickering, as you can see in the following image series.
+
+![Grayscale Depth Image](../00_assets/2_layover.png)
+![Grayscale Depth Image](../00_assets/3_layover.png)
+![Grayscale Depth Image](../00_assets/4_layover.png)
+
+These are the Grayscale-Depth Images reconstructed within 600 milliseconds.
+
+We can see that we usually only see one half of the LIDAR-Points.
+
+The simple reason for this is a lower spin rate in the LIDAR-Configuration.
+
+By adjusting the spin rate we can achieve a fully reconstructed image. However, a higher spin rate results in a lower Resolution of LIDAR-Points.
+
+We now use a slightly higher spin rate, that consistently produces a full depth image, but also receives only sightly less LIDAR-Points.
+
+Doing this, we noticed, that the LIDAR-Sensor wasn´t positioned correctly. For some reason it was offset to the front left.