Merge pull request #1790 from roboflow/feature/support_for_qwen_2_5_vl

support for converting Qwen2.5-VL into sv.Detections
roboflow · Feb 18, 2025 · 752818e · 752818e
2 parents dc449c4 + 2e38e89
commit 752818e
Show file tree

Hide file tree

Showing 14 changed files with 584 additions and 322 deletions.
diff --git a/docs/how_to/track_objects.md b/docs/how_to/track_objects.md
@@ -55,7 +55,7 @@ it will be modified to include tracking, labeling, and trace annotations.
     from ultralytics import YOLO
 
     model = YOLO("yolov8n.pt")
-    box_annotator = sv.BoundingBoxAnnotator()
+    box_annotator = sv.BoxAnnotator()
 
     def callback(frame: np.ndarray, _: int) -> np.ndarray:
         results = model(frame)[0]
@@ -77,7 +77,7 @@ it will be modified to include tracking, labeling, and trace annotations.
     from inference.models.utils import get_roboflow_model
 
     model = get_roboflow_model(model_id="yolov8n-640", api_key=<ROBOFLOW API KEY>)
-    box_annotator = sv.BoundingBoxAnnotator()
+    box_annotator = sv.BoxAnnotator()
 
     def callback(frame: np.ndarray, _: int) -> np.ndarray:
         results = model.infer(frame)[0]
@@ -112,7 +112,7 @@ enabling the continuous following of the object's motion path across different f
 
     model = YOLO("yolov8n.pt")
     tracker = sv.ByteTrack()
-    box_annotator = sv.BoundingBoxAnnotator()
+    box_annotator = sv.BoxAnnotator()
 
     def callback(frame: np.ndarray, _: int) -> np.ndarray:
         results = model(frame)[0]
@@ -136,7 +136,7 @@ enabling the continuous following of the object's motion path across different f
 
     model = get_roboflow_model(model_id="yolov8n-640", api_key=<ROBOFLOW API KEY>)
     tracker = sv.ByteTrack()
-    box_annotator = sv.BoundingBoxAnnotator()
+    box_annotator = sv.BoxAnnotator()
 
     def callback(frame: np.ndarray, _: int) -> np.ndarray:
         results = model.infer(frame)[0]
@@ -168,7 +168,7 @@ offering a clear visual representation of each object's class and unique identif
 
     model = YOLO("yolov8n.pt")
     tracker = sv.ByteTrack()
-    box_annotator = sv.BoundingBoxAnnotator()
+    box_annotator = sv.BoxAnnotator()
     label_annotator = sv.LabelAnnotator()
 
     def callback(frame: np.ndarray, _: int) -> np.ndarray:
@@ -203,7 +203,7 @@ offering a clear visual representation of each object's class and unique identif
 
     model = get_roboflow_model(model_id="yolov8n-640", api_key=<ROBOFLOW API KEY>)
     tracker = sv.ByteTrack()
-    box_annotator = sv.BoundingBoxAnnotator()
+    box_annotator = sv.BoxAnnotator()
     label_annotator = sv.LabelAnnotator()
 
     def callback(frame: np.ndarray, _: int) -> np.ndarray:
@@ -250,7 +250,7 @@ movement patterns and interactions between objects in the video.
 
     model = YOLO("yolov8n.pt")
     tracker = sv.ByteTrack()
-    box_annotator = sv.BoundingBoxAnnotator()
+    box_annotator = sv.BoxAnnotator()
     label_annotator = sv.LabelAnnotator()
     trace_annotator = sv.TraceAnnotator()
 
@@ -288,7 +288,7 @@ movement patterns and interactions between objects in the video.
 
     model = get_roboflow_model(model_id="yolov8n-640", api_key=<ROBOFLOW API KEY>)
     tracker = sv.ByteTrack()
-    box_annotator = sv.BoundingBoxAnnotator()
+    box_annotator = sv.BoxAnnotator()
     label_annotator = sv.LabelAnnotator()
     trace_annotator = sv.TraceAnnotator()
 

diff --git a/examples/count_people_in_zone/inference_example.py b/examples/count_people_in_zone/inference_example.py
@@ -35,9 +35,7 @@ def load_zones_config(file_path: str) -> List[np.ndarray]:
 
 def initiate_annotators(
     polygons: List[np.ndarray], resolution_wh: Tuple[int, int]
-) -> Tuple[
-    List[sv.PolygonZone], List[sv.PolygonZoneAnnotator], List[sv.BoundingBoxAnnotator]
-]:
+) -> Tuple[List[sv.PolygonZone], List[sv.PolygonZoneAnnotator], List[sv.BoxAnnotator]]:
     line_thickness = sv.calculate_optimal_line_thickness(resolution_wh=resolution_wh)
     text_scale = sv.calculate_optimal_text_scale(resolution_wh=resolution_wh)
 
@@ -54,7 +52,7 @@ def initiate_annotators(
             text_thickness=line_thickness * 2,
             text_scale=text_scale * 2,
         )
-        box_annotator = sv.BoundingBoxAnnotator(
+        box_annotator = sv.BoxAnnotator(
             color=COLORS.by_idx(index), thickness=line_thickness
         )
         zones.append(zone)
@@ -97,7 +95,7 @@ def annotate(
     frame: np.ndarray,
     zones: List[sv.PolygonZone],
     zone_annotators: List[sv.PolygonZoneAnnotator],
-    box_annotators: List[sv.BoundingBoxAnnotator],
+    box_annotators: List[sv.BoxAnnotator],
     detections: sv.Detections,
 ) -> np.ndarray:
     """
@@ -108,7 +106,7 @@ def annotate(
         zones (List[sv.PolygonZone]): A list of polygon zones used for detection.
         zone_annotators (List[sv.PolygonZoneAnnotator]): A list of annotators for
             drawing zone annotations.
-        box_annotators (List[sv.BoundingBoxAnnotator]): A list of annotators for
+        box_annotators (List[sv.BoxAnnotator]): A list of annotators for
             drawing box annotations.
         detections (sv.Detections): Detections to be used for annotation.
 

diff --git a/examples/count_people_in_zone/ultralytics_example.py b/examples/count_people_in_zone/ultralytics_example.py
@@ -33,9 +33,7 @@ def load_zones_config(file_path: str) -> List[np.ndarray]:
 
 def initiate_annotators(
     polygons: List[np.ndarray], resolution_wh: Tuple[int, int]
-) -> Tuple[
-    List[sv.PolygonZone], List[sv.PolygonZoneAnnotator], List[sv.BoundingBoxAnnotator]
-]:
+) -> Tuple[List[sv.PolygonZone], List[sv.PolygonZoneAnnotator], List[sv.BoxAnnotator]]:
     line_thickness = sv.calculate_optimal_line_thickness(resolution_wh=resolution_wh)
     text_scale = sv.calculate_optimal_text_scale(resolution_wh=resolution_wh)
 
@@ -52,7 +50,7 @@ def initiate_annotators(
             text_thickness=line_thickness * 2,
             text_scale=text_scale * 2,
         )
-        box_annotator = sv.BoundingBoxAnnotator(
+        box_annotator = sv.BoxAnnotator(
             color=COLORS.by_idx(index), thickness=line_thickness
         )
         zones.append(zone)
@@ -94,7 +92,7 @@ def annotate(
     frame: np.ndarray,
     zones: List[sv.PolygonZone],
     zone_annotators: List[sv.PolygonZoneAnnotator],
-    box_annotators: List[sv.BoundingBoxAnnotator],
+    box_annotators: List[sv.BoxAnnotator],
     detections: sv.Detections,
 ) -> np.ndarray:
     """
@@ -105,7 +103,7 @@ def annotate(
         zones (List[sv.PolygonZone]): A list of polygon zones used for detection.
         zone_annotators (List[sv.PolygonZoneAnnotator]): A list of annotators for
             drawing zone annotations.
-        box_annotators (List[sv.BoundingBoxAnnotator]): A list of annotators for
+        box_annotators (List[sv.BoxAnnotator]): A list of annotators for
             drawing box annotations.
         detections (sv.Detections): Detections to be used for annotation.
 

diff --git a/examples/tracking/inference_example.py b/examples/tracking/inference_example.py
@@ -18,7 +18,7 @@ def process_video(
     model = get_roboflow_model(model_id=model_id, api_key=roboflow_api_key)
 
     tracker = sv.ByteTrack()
-    box_annotator = sv.BoundingBoxAnnotator()
+    box_annotator = sv.BoxAnnotator()
     label_annotator = sv.LabelAnnotator()
     frame_generator = sv.get_video_frames_generator(source_path=source_video_path)
     video_info = sv.VideoInfo.from_video_path(video_path=source_video_path)

diff --git a/examples/tracking/ultralytics_example.py b/examples/tracking/ultralytics_example.py
@@ -16,7 +16,7 @@ def process_video(
     model = YOLO(source_weights_path)
 
     tracker = sv.ByteTrack()
-    box_annotator = sv.BoundingBoxAnnotator()
+    box_annotator = sv.BoxAnnotator()
     label_annotator = sv.LabelAnnotator()
     frame_generator = sv.get_video_frames_generator(source_path=source_video_path)
     video_info = sv.VideoInfo.from_video_path(video_path=source_video_path)

diff --git a/pyproject.toml b/pyproject.toml
@@ -2,7 +2,7 @@
 name = "supervision"
 description = "A set of easy-to-use utils that will come in handy in any Computer Vision project"
 license = { text = "MIT" }
-version = "0.26.0rc3"
+version = "0.26.0rc4"
 readme = "README.md"
 requires-python = ">=3.8"
 authors = [

diff --git a/supervision/__init__.py b/supervision/__init__.py
@@ -9,7 +9,6 @@
 from supervision.annotators.core import (
     BackgroundOverlayAnnotator,
     BlurAnnotator,
-    BoundingBoxAnnotator,
     BoxAnnotator,
     BoxCornerAnnotator,
     CircleAnnotator,
@@ -46,7 +45,6 @@
     LineZoneAnnotator,
     LineZoneAnnotatorMulticlass,
 )
-from supervision.detection.lmm import LMM
 from supervision.detection.overlap_filter import (
     OverlapFilter,
     box_non_max_merge,
@@ -80,6 +78,7 @@
     xyxy_to_polygons,
     xyxy_to_xywh,
 )
+from supervision.detection.vlm import LMM, VLM
 from supervision.draw.color import Color, ColorPalette
 from supervision.draw.utils import (
     calculate_optimal_line_thickness,
@@ -127,7 +126,6 @@
     "BackgroundOverlayAnnotator",
     "BaseDataset",
     "BlurAnnotator",
-    "BoundingBoxAnnotator",
     "BoxAnnotator",
     "BoxCornerAnnotator",
     "ByteTrack",

diff --git a/supervision/annotators/core.py b/supervision/annotators/core.py
@@ -36,7 +36,6 @@
     overlay_image,
     scale_image,
 )
-from supervision.utils.internal import deprecated
 
 CV2_FONT = cv2.FONT_HERSHEY_SIMPLEX
 
@@ -124,92 +123,6 @@ def annotate(
         return scene
 
 
-@deprecated(
-    "`BoundingBoxAnnotator` is deprecated and has been renamed to `BoxAnnotator`."
-    " `BoundingBoxAnnotator` will be removed in supervision-0.26.0."
-)
-class BoundingBoxAnnotator(BaseAnnotator):
-    """
-    A class for drawing bounding boxes on an image using provided detections.
-    """
-
-    def __init__(
-        self,
-        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,
-        thickness: int = 2,
-        color_lookup: ColorLookup = ColorLookup.CLASS,
-    ):
-        """
-        Args:
-            color (Union[Color, ColorPalette]): The color or color palette to use for
-                annotating detections.
-            thickness (int): Thickness of the bounding box lines.
-            color_lookup (ColorLookup): Strategy for mapping colors to annotations.
-                Options are `INDEX`, `CLASS`, `TRACK`.
-        """
-        self.color: Union[Color, ColorPalette] = color
-        self.thickness: int = thickness
-        self.color_lookup: ColorLookup = color_lookup
-
-    @ensure_cv2_image_for_annotation
-    def annotate(
-        self,
-        scene: ImageType,
-        detections: Detections,
-        custom_color_lookup: Optional[np.ndarray] = None,
-    ) -> ImageType:
-        """
-        Annotates the given scene with bounding boxes based on the provided detections.
-
-        Args:
-            scene (ImageType): The image where bounding boxes will be drawn. `ImageType`
-            is a flexible type, accepting either `numpy.ndarray` or `PIL.Image.Image`.
-            detections (Detections): Object detections to annotate.
-            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.
-                Allows to override the default color mapping strategy.
-
-        Returns:
-            The annotated image, matching the type of `scene` (`numpy.ndarray`
-                or `PIL.Image.Image`)
-
-        Example:
-            ```python
-            import supervision as sv
-
-            image = ...
-            detections = sv.Detections(...)
-
-            bounding_box_annotator = sv.BoundingBoxAnnotator()
-            annotated_frame = bounding_box_annotator.annotate(
-                scene=image.copy(),
-                detections=detections
-            )
-            ```
-
-        ![bounding-box-annotator-example](https://media.roboflow.com/
-        supervision-annotator-examples/bounding-box-annotator-example-purple.png)
-        """
-        assert isinstance(scene, np.ndarray)
-        for detection_idx in range(len(detections)):
-            x1, y1, x2, y2 = detections.xyxy[detection_idx].astype(int)
-            color = resolve_color(
-                color=self.color,
-                detections=detections,
-                detection_idx=detection_idx,
-                color_lookup=self.color_lookup
-                if custom_color_lookup is None
-                else custom_color_lookup,
-            )
-            cv2.rectangle(
-                img=scene,
-                pt1=(x1, y1),
-                pt2=(x2, y2),
-                color=color.as_bgr(),
-                thickness=self.thickness,
-            )
-        return scene
-
-
 class OrientedBoxAnnotator(BaseAnnotator):
     """
     A class for drawing oriented bounding boxes on an image using provided detections.

diff --git a/supervision/dataset/core.py b/supervision/dataset/core.py
@@ -705,28 +705,6 @@ def __init__(
                 "a list of paths `List[str]` instead."
             )
 
-    @property
-    @deprecated(
-        "`DetectionDataset.images` property is deprecated and will be removed in "
-        "`supervision-0.26.0`. Iterate with `for path, image, annotation in dataset:` "
-        "instead."
-    )
-    def images(self) -> Dict[str, np.ndarray]:
-        """
-        Load all images to memory and return them as a dictionary.
-
-        !!! warning
-
-            Only use this when you need all images at once.
-            It is much more memory-efficient to initialize dataset with
-            image paths and use `for path, image, annotation in dataset:`.
-        """
-        if self._images_in_memory:
-            return self._images_in_memory
-
-        images = {image_path: cv2.imread(image_path) for image_path in self.image_paths}
-        return images
-
     def _get_image(self, image_path: str) -> np.ndarray:
         """Assumes that image is in dataset"""
         if self._images_in_memory: