Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FIX][MOTION ESTIMATION] Fix error on homography calculation #278

Merged
merged 11 commits into from
Jan 30, 2024
29 changes: 21 additions & 8 deletions demos/camera_motion/src/demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,15 @@
Tracker,
Video,
draw_absolute_grid,
draw_tracked_boxes,
)

from norfair.camera_motion import (
HomographyTransformationGetter,
MotionEstimator,
TranslationTransformationGetter,
)
from norfair.drawing import draw_tracked_objects

from norfair.drawing import draw_points, draw_boxes


def yolo_detections_to_norfair_detections(yolo_detections, track_boxes):
Expand Down Expand Up @@ -66,7 +67,7 @@ def run():
parser.add_argument(
"--distance-threshold",
type=float,
default=0.8,
default=None,
help="Max distance to consider when matching detections and tracked objects",
)
parser.add_argument(
Expand Down Expand Up @@ -223,10 +224,22 @@ def run():
else partial(video.show, downsample_ratio=args.downsample_ratio)
)

distance_threshold = args.distance_threshold
if args.track_boxes:
drawing_function = draw_boxes
distance_function = "iou"
if args.distance_threshold is None:
distance_threshold = 0.5
else:
drawing_function = draw_points
distance_function = "euclidean"
if args.distance_threshold is None:
distance_threshold = video.input_height / 10

tracker = Tracker(
distance_function="euclidean",
distance_function=distance_function,
detection_threshold=args.confidence_threshold,
distance_threshold=args.distance_threshold,
distance_threshold=distance_threshold,
initialization_delay=args.initialization_delay,
hit_counter_max=args.hit_counter_max,
)
Expand Down Expand Up @@ -259,11 +272,11 @@ def run():
)

if args.draw_objects:
draw_tracked_objects(
drawing_function(
frame,
tracked_objects,
id_size=args.id_size,
id_thickness=None
text_size=args.id_size,
text_thickness=None
if args.id_size is None
else int(args.id_size * 2),
)
Expand Down
83 changes: 57 additions & 26 deletions norfair/camera_motion.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
"Camera motion stimation module."
import copy
from abc import ABC, abstractmethod
from logging import warning
from typing import Optional, Tuple

import numpy as np
Expand Down Expand Up @@ -148,16 +150,21 @@ def abs_to_rel(self, points: np.ndarray):
ones = np.ones((len(points), 1))
points_with_ones = np.hstack((points, ones))
points_transformed = points_with_ones @ self.homography_matrix.T
points_transformed = points_transformed / points_transformed[:, -1].reshape(
last_column = points_transformed[:, -1]
last_column[last_column == 0] = 0.0000001
points_transformed = points_transformed / last_column.reshape(
-1, 1
)
return points_transformed[:, :2]
new_points_transformed = points_transformed[:, :2]
return new_points_transformed
Copy link
Collaborator

@aguscas aguscas Nov 19, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great catch. I assume this is happening when dividing by the last column of the points_transformed array, since it might contain zero values (really weird case, but sure, can happen). Tell me if you think the source of the problem might be other, because I actually couldn't reproduce the nan or infs error.

Anyway, I'm not exactly convinced that this is the best way of fixing it if you get infinite values. The two main reasons I say that are

  1. You are just returning an array of two points, but take into account that the number of points that this function is receiving can be arbitrary. The expected shape of the output array should be the same as the input.
  2. The 'arbitrarily' chosen 0 coordinates for the output also doesn't sit well with me. We could instead just add some really small value to the last column of the points_transformed array whenever it has some zero entry, so that when we compute the quotient we are not getting a division by zero. I think at least that feels somewhat less arbitrary than just returning the zero point.

Could you please try the suggestion of my second point and check if you still can reproduce the nan/inf error or if that would also solve it?


def rel_to_abs(self, points: np.ndarray):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess the same error that happened in abs_to_rel could also happen in rel_to_abs because of doing essentially the same computation but with the inverse matrix, so the fix should also be applied here. Is there any reason for not fixing it here that I am missing?

ones = np.ones((len(points), 1))
points_with_ones = np.hstack((points, ones))
points_transformed = points_with_ones @ self.inverse_homography_matrix.T
points_transformed = points_transformed / points_transformed[:, -1].reshape(
last_column = points_transformed[:, -1]
last_column[last_column == 0] = 0.0000001
points_transformed = points_transformed / last_column.reshape(
-1, 1
)
return points_transformed[:, :2]
Expand Down Expand Up @@ -212,7 +219,17 @@ def __init__(

def __call__(
self, curr_pts: np.ndarray, prev_pts: np.ndarray
) -> Tuple[bool, HomographyTransformation]:
) -> Tuple[bool, Optional[HomographyTransformation]]:

if not (isinstance(prev_pts, np.ndarray) and prev_pts.shape[0] >= 4
and isinstance(curr_pts, np.ndarray) and curr_pts.shape[0] >= 4):
warning("The homography couldn't be computed in this frame "
"due to low amount of points")
if isinstance(self.data, np.ndarray):
return True, HomographyTransformation(self.data)
else:
return True, None

homography_matrix, points_used = cv2.findHomography(
prev_pts,
curr_pts,
Expand Down Expand Up @@ -340,13 +357,15 @@ def __init__(
transformations_getter = HomographyTransformationGetter()

self.transformations_getter = transformations_getter
self.transformations_getter_copy = copy.deepcopy(transformations_getter)

self.prev_mask = None
self.gray_next = None
self.quality_level = quality_level

def update(
self, frame: np.ndarray, mask: np.ndarray = None
) -> CoordinatesTransformation:
) -> Optional[CoordinatesTransformation]:
"""
Estimate camera motion for each frame

Expand All @@ -371,36 +390,48 @@ def update(
The CoordinatesTransformation that can transform coordinates on this frame to absolute coordinates
or vice versa.
"""

self.gray_next = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
if self.gray_prvs is None:
self.gray_prvs = self.gray_next
self.prev_mask = mask

curr_pts, self.prev_pts = _get_sparse_flow(
self.gray_next,
self.gray_prvs,
self.prev_pts,
self.max_points,
self.min_distance,
self.block_size,
self.prev_mask,
quality_level=self.quality_level,
)
if self.draw_flow:
for (curr, prev) in zip(curr_pts, self.prev_pts):
c = tuple(curr.astype(int).ravel())
p = tuple(prev.astype(int).ravel())
cv2.line(frame, c, p, self.flow_color, 2)
cv2.circle(frame, c, 3, self.flow_color, -1)

update_prvs, coord_transformations = self.transformations_getter(
curr_pts,
self.prev_pts,
)
curr_pts, prev_pts = None, None
try:
curr_pts, prev_pts = _get_sparse_flow(
self.gray_next,
self.gray_prvs,
self.prev_pts,
self.max_points,
self.min_distance,
self.block_size,
self.prev_mask,
quality_level=self.quality_level,
)
if self.draw_flow:
for (curr, prev) in zip(curr_pts, prev_pts):
c = tuple(curr.astype(int).ravel())
p = tuple(prev.astype(int).ravel())
cv2.line(frame, c, p, self.flow_color, 2)
cv2.circle(frame, c, 3, self.flow_color, -1)
except Exception as e:
warning(e)

update_prvs, coord_transformations = True, None
try:
update_prvs, coord_transformations = self.transformations_getter(
curr_pts, prev_pts
)
except Exception as e:
warning(e)
del self.transformations_getter
self.transformations_getter = copy.deepcopy(self.transformations_getter_copy)

if update_prvs:
self.gray_prvs = self.gray_next
self.prev_pts = None
self.prev_mask = mask
else:
self.prev_pts = prev_pts

return coord_transformations
Loading