Skip to content

Commit

Permalink
modifications to nodes to adapt for field test (onnx inference + disp…
Browse files Browse the repository at this point in the history
…lay, code changed for utils
  • Loading branch information
Ishaan-Datta committed Dec 7, 2024
1 parent dab4f80 commit 97517dc
Show file tree
Hide file tree
Showing 9 changed files with 145 additions and 39 deletions.
2 changes: 1 addition & 1 deletion python_wip/bbox_display.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def draw_bounding_boxes(image_path, bboxes):

# Show the image with bounding boxes (press any key to close)
cv2.imshow('Bounding Boxes', img)
cv2.waitKey(10000)
cv2.waitKey(100000)
cv2.destroyAllWindows()

def read_bounding_boxes(txt_file):
Expand Down
11 changes: 5 additions & 6 deletions python_wip/display.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
# should take boolean for displaying roi or not, import path verify library for image source
# copy ultralytics color picker selection

def display_annotated_image(image_path, boxes, labels=None, colors=None, window_name="Annotated Image"):
def display_annotated_image(image, boxes, labels=None, colors=None, window_name="Annotated Image"):
"""
Display an image with annotated bounding boxes in an OpenCV window using Arial font for labels.
Expand All @@ -20,7 +20,7 @@ def display_annotated_image(image_path, boxes, labels=None, colors=None, window_
window_name (str): Name of the OpenCV window. Defaults to "Annotated Image".
"""
# Convert OpenCV image to PIL Image
image = cv2.imread(image_path)
# image = cv2.imread(image_path)

# Load Arial font
try:
Expand All @@ -32,7 +32,7 @@ def display_annotated_image(image_path, boxes, labels=None, colors=None, window_
if labels is None:
labels = ["12%" for _ in boxes]
if colors is None:
colors = [tuple(np.random.randint(0, 256, size=3).tolist()) for _ in boxes]
colors = [(0, 255, 0) for _ in boxes]

tic = time.perf_counter_ns()
annotated_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
Expand Down Expand Up @@ -66,11 +66,8 @@ def display_annotated_image(image_path, boxes, labels=None, colors=None, window_

# Convert annotated image back to OpenCV format
image = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)

toc = time.perf_counter_ns()

pil_transform = toc-tic


overlay = image.copy()
output = image.copy()
Expand All @@ -85,6 +82,8 @@ def display_annotated_image(image_path, boxes, labels=None, colors=None, window_

overlay = tic-toc

return output

print(f"pillow: {pil_transform/1e6} ms")
print(f"overlay: {overlay/1e6} ms")

Expand Down
9 changes: 2 additions & 7 deletions ros2_ws/src/python_workspace/python_workspace/camera_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,8 @@ def __init__(self):
self.roi_dimensions = self.get_parameter('roi_dimensions').get_parameter_value().integer_array_value
self.source_type = self.get_parameter('source_type').get_parameter_value().string_value

# path = ""

# # initialize image_source type
# image = ImageSource(self.source_type, path)

self.shift_constant = 0
self.frame_rate = 1
self.frame_rate = 10
self.model_dimensions = (640, 640)
self.velocity = 0.0
self.bridge = CvBridge()
Expand Down Expand Up @@ -93,7 +88,7 @@ def preprocess_image(self, image):
shifted_x2 = roi_x2 + self.shift_constant

preprocessed_img = image[int(roi_y1):int(roi_y2), int(shifted_x1):int(shifted_x2), :3] # replace w/ util
preprocessed_img = cv2.resize(preprocessed_img, self.model_dimensions) # check if necessary?
preprocessed_img = cv2.resize(preprocessed_img, self.model_dimensions)
preprocessed_img_msg = self.bridge.cv2_to_imgmsg(preprocessed_img, encoding='rgb8')

raw_image = image[:, :, :3]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import cv2
import sys
import os

import rclpy
from rclpy.time import Time
Expand All @@ -11,6 +13,8 @@
from custom_interfaces.msg import InferenceOutput

from .scripts.utils import ModelInference
sys.path.append(os.path.abspath('/home/user/ROS/python_wip'))
from display import display_annotated_image

class ExterminationNode(Node):
def __init__(self):
Expand Down Expand Up @@ -47,11 +51,13 @@ def inference_callback(self, msg):
preprocessed_image = self.bridge.imgmsg_to_cv2(msg.preprocessed_image, desired_encoding='passthrough') # what is this needed for?
raw_image = self.bridge.imgmsg_to_cv2(msg.raw_image, desired_encoding='passthrough')
bounding_boxes = self.model.postprocess(msg.confidences.data,msg.bounding_boxes.data, raw_image,msg.velocity)
final_image = self.model.draw_boxes(raw_image,bounding_boxes,velocity=msg.velocity)
# final_image = self.model.draw_boxes(raw_image,bounding_boxes,velocity=msg.velocity)
labels = [f"{i}%" for i in msg.confidences.data]
final_image = display_annotated_image(raw_image, bounding_boxes, )

if self.use_display_node:
cv2.imshow(self.window, final_image)
cv2.waitKey(10)
cv2.waitKey(0)

if len(bounding_boxes) > 0:
self.boxes_present = 1
Expand All @@ -76,6 +82,7 @@ def main(args=None):
except KeyboardInterrupt:
print("Shutting down extermination node")
finally:
cv2.destroyAllWindows()
executor.shutdown()
extermination_node.destroy_node()
rclpy.shutdown()
Expand Down
12 changes: 9 additions & 3 deletions ros2_ws/src/python_workspace/python_workspace/inference_node.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import time
import os
import sys

import rclpy
from rclpy.time import Time
Expand All @@ -11,7 +12,9 @@
from std_msgs.msg import Int32MultiArray, Float32MultiArray
from custom_interfaces.msg import ImageInput, InferenceOutput

from .scripts.utils import ModelInference
# from .scripts.utils import ModelInference
sys.path.append(os.path.abspath('/home/user/ROS/tools/utils'))
from new_infer import ONNXModel

class InferenceNode(Node):
def __init__(self):
Expand All @@ -26,7 +29,9 @@ def __init__(self):
self.weights_path = self.get_parameter('weights_path').get_parameter_value().string_value
self.precision = self.get_parameter('precision').get_parameter_value().string_value

self.model = ModelInference(self.weights_path, self.precision)
self.get_logger().info("Loading ONNX Model")
self.model = ONNXModel(self.weights_path)
# self.model = ModelInference(self.weights_path, self.precision)
self.bridge = CvBridge()

self.image_subscription = self.create_subscription(ImageInput, f'{self.camera_side}_image_input', self.image_callback, 10)
Expand All @@ -36,7 +41,8 @@ def image_callback(self, msg):
self.get_logger().info("Received Image")
opencv_img = self.bridge.imgmsg_to_cv2(msg.preprocessed_image, desired_encoding='passthrough')
tic = time.perf_counter_ns()
output_img, confidences, boxes = self.model.inference(opencv_img)
# output_img, confidences, boxes = self.model.inference(opencv_img)
confidences, boxes = self.model(opencv_img)
toc = time.perf_counter_ns()

output_msg = InferenceOutput()
Expand Down
39 changes: 19 additions & 20 deletions ros2_ws/src/python_workspace/python_workspace/scripts/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,26 +100,25 @@ def postprocess(self, confidence, bbox_array,raw_image: np.ndarray, velocity=0):
Returns:
list: A list of refined bounding boxes in pixel coordinates (xyxy).
"""
velocity = int(velocity)
detections = []
if len(bbox_array) > 0 and type(bbox_array[0]) != tuple: #CONVERT TO TUPLE
for offset in range(int(len(bbox_array) / 4)):
x1 = int(bbox_array[offset * 4] * raw_image.shape[1])
y1 = int(bbox_array[offset * 4 + 1] * raw_image.shape[0])
x2 = int(bbox_array[offset * 4 + 2] * raw_image.shape[1])
y2 = int(bbox_array[offset * 4 + 3] * raw_image.shape[0])
detections.append((x1, y1, x2, y2))
else:
for bbox in bbox_array: #resize so it first the original image
x1,y1,x2,y2 = bbox

x1 = int(x1 * raw_image.shape[1])
y1 = int(y1 * raw_image.shape[0])
x2 = int(x2 * raw_image.shape[1])
y2 = int(y2 * raw_image.shape[0])

detections.append((x1, y1, x2, y2))

# velocity = int(velocity)
# detections = []
# if len(bbox_array) > 0 and type(bbox_array[0]) != tuple: #CONVERT TO TUPLE
# for offset in range(int(len(bbox_array) / 4)):
# x1 = int(bbox_array[offset * 4] * raw_image.shape[1])
# y1 = int(bbox_array[offset * 4 + 1] * raw_image.shape[0])
# x2 = int(bbox_array[offset * 4 + 2] * raw_image.shape[1])
# y2 = int(bbox_array[offset * 4 + 3] * raw_image.shape[0])
# detections.append((x1, y1, x2, y2))
# else:
# for bbox in bbox_array: #resize so it first the original image
# x1,y1,x2,y2 = bbox

# x1 = int(x1 * raw_image.shape[1])
# y1 = int(y1 * raw_image.shape[0])
# x2 = int(x2 * raw_image.shape[1])
# y2 = int(y2 * raw_image.shape[0])

# detections.append((x1, y1, x2, y2))

detections = self.object_filter(raw_image, detections) #color segmentation
detections = self.verify_object(raw_image, detections,velocity)
Expand Down
100 changes: 100 additions & 0 deletions tools/utils/new_infer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import os
import cv2
from onnxruntime import InferenceSession
import warnings
import numpy as np

class ONNXModel:
def __init__(self, onnx_path, session=None):
warnings.filterwarnings("ignore")
self.session = session
if self.session is None:
assert onnx_path is not None
assert os.path.exists(onnx_path)
self.session = InferenceSession(onnx_path,
providers=['CPUExecutionProvider']) # fix
self.inputs = self.session.get_inputs()[0]
self.confidence_threshold = 0.5
self.iou_threshold = 0.9
self.input_size = 640
shape = (1, 3, self.input_size, self.input_size)
image = np.zeros(shape, dtype='float32')
for _ in range(10):
self.session.run(output_names=None,
input_feed={self.inputs.name: image})

def __call__(self, image):
image, scale = self.resize(image, self.input_size)
# image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = image.transpose((2, 0, 1))[::-1]
image = image.astype('float32') / 255
image = image[np.newaxis, ...]

outputs = self.session.run(output_names=None,
input_feed={self.inputs.name: image})
outputs = np.transpose(np.squeeze(outputs[0]))

# Lists to store the bounding boxes, scores, and class IDs of the detections
boxes = []
scores = []
class_indices = []

# Iterate over each row in the outputs array
for i in range(outputs.shape[0]):
# Extract the class scores from the current row
classes_scores = outputs[i][4:]

# Find the maximum score among the class scores
max_score = np.amax(classes_scores)

# If the maximum score is above the confidence threshold
if max_score >= self.confidence_threshold:
# Get the class ID with the highest score
class_id = np.argmax(classes_scores)

# Extract the bounding box coordinates from the current row
image, y, w, h = outputs[i][0], outputs[i][1], outputs[i][2], outputs[i][3]

# Calculate the scaled coordinates of the bounding box
left = int((image - w / 2) / scale)
top = int((y - h / 2) / scale)
width = int(w / scale)
height = int(h / scale)

# Add the class ID, score, and box coordinates to the respective lists
class_indices.append(class_id)
scores.append(max_score)
boxes.append([left, top, width, height])

# Apply non-maximum suppression to filter out overlapping bounding boxes
indices = cv2.dnn.NMSBoxes(boxes, scores, self.confidence_threshold, self.iou_threshold)

# convert to xyxy

# Iterate over the selected indices after non-maximum suppression
confidences = []
bboxes = []
for i in indices:
x, y, w, h = boxes[i]
x1, y1 = x, y
x2, y2 = x + w, y + h
bboxes.append([x1, y1, x2, y2])
confidences.append(scores[i])
return confidences, bboxes

@staticmethod
def resize(image, input_size):
shape = image.shape

ratio = float(shape[0]) / shape[1]
if ratio > 1:
h = input_size
w = int(h / ratio)
else:
w = input_size
h = int(w * ratio)
scale = float(h) / shape[0]
resized_image = cv2.resize(image, (w, h))
det_image = np.zeros((input_size, input_size, 3), dtype=np.uint8)
det_image[:h, :w, :] = resized_image
return det_image, scale
File renamed without changes.
Empty file added tools/utils/new_pre.py
Empty file.

0 comments on commit 97517dc

Please sign in to comment.