pytorch · NicolasHug · Jun 30, 2025 · Jun 27, 2025 · Jun 27, 2025 · Jun 27, 2025
diff --git a/test/common_utils.py b/test/common_utils.py
@@ -410,6 +410,7 @@ def make_bounding_boxes(
     canvas_size=DEFAULT_SIZE,
     *,
     format=tv_tensors.BoundingBoxFormat.XYXY,
+    clamping_mode="hard",  # TODOBB
     num_boxes=1,
     dtype=None,
     device="cpu",
@@ -474,13 +475,16 @@ def sample_position(values, max_value):
         # numerical issues during the testing
         buffer = 4
         out_boxes = clamp_bounding_boxes(
-            out_boxes, format=format, canvas_size=(canvas_size[0] - buffer, canvas_size[1] - buffer)
+            out_boxes,
+            format=format,
+            canvas_size=(canvas_size[0] - buffer, canvas_size[1] - buffer),
+            clamping_mode=clamping_mode,
         )
         if format is tv_tensors.BoundingBoxFormat.XYWHR or format is tv_tensors.BoundingBoxFormat.CXCYWHR:
             out_boxes[:, :2] += buffer // 2
         elif format is tv_tensors.BoundingBoxFormat.XYXYXYXY:
             out_boxes[:, :] += buffer // 2
-    return tv_tensors.BoundingBoxes(out_boxes, format=format, canvas_size=canvas_size)
+    return tv_tensors.BoundingBoxes(out_boxes, format=format, canvas_size=canvas_size, clamping_mode=clamping_mode)
 
 
 def make_detection_masks(size=DEFAULT_SIZE, *, num_masks=1, dtype=None, device="cpu"):

diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py
@@ -492,6 +492,7 @@ def adapt_fill(value, *, dtype):
 def reference_affine_bounding_boxes_helper(bounding_boxes, *, affine_matrix, new_canvas_size=None, clamp=True):
     format = bounding_boxes.format
     canvas_size = new_canvas_size or bounding_boxes.canvas_size
+    clamping_mode = bounding_boxes.clamping_mode
 
     def affine_bounding_boxes(bounding_boxes):
         dtype = bounding_boxes.dtype
@@ -535,6 +536,7 @@ def affine_bounding_boxes(bounding_boxes):
                 output,
                 format=format,
                 canvas_size=canvas_size,
+                clamping_mode=clamping_mode,
             )
         else:
             # We leave the bounding box as float64 so the caller gets the full precision to perform any additional
@@ -557,6 +559,7 @@ def reference_affine_rotated_bounding_boxes_helper(
 ):
     format = bounding_boxes.format
     canvas_size = new_canvas_size or bounding_boxes.canvas_size
+    clamping_mode = bounding_boxes.clamping_mode
 
     def affine_rotated_bounding_boxes(bounding_boxes):
         dtype = bounding_boxes.dtype
@@ -618,6 +621,7 @@ def affine_rotated_bounding_boxes(bounding_boxes):
                 output.to(dtype=dtype, device=device),
                 format=format,
                 canvas_size=canvas_size,
+                clamping_mode=clamping_mode,
             )
             if clamp
             else output.to(dtype=output.dtype, device=device)
@@ -831,7 +835,6 @@ def test_functional(self, size, make_input):
             (F.resize_image, torch.Tensor),
             (F._geometry._resize_image_pil, PIL.Image.Image),
             (F.resize_image, tv_tensors.Image),
-            (F.resize_bounding_boxes, tv_tensors.BoundingBoxes),
             (F.resize_mask, tv_tensors.Mask),
             (F.resize_video, tv_tensors.Video),
             (F.resize_keypoints, tv_tensors.KeyPoints),
@@ -3289,7 +3292,6 @@ def test_functional(self, make_input):
             (F.elastic_image, torch.Tensor),
             (F._geometry._elastic_image_pil, PIL.Image.Image),
             (F.elastic_image, tv_tensors.Image),
-            (F.elastic_bounding_boxes, tv_tensors.BoundingBoxes),
             (F.elastic_mask, tv_tensors.Mask),
             (F.elastic_video, tv_tensors.Video),
             (F.elastic_keypoints, tv_tensors.KeyPoints),
@@ -5126,6 +5128,7 @@ def test_image_functional_correctness(self, coefficients, interpolation, fill):
     def _reference_perspective_bounding_boxes(self, bounding_boxes, *, startpoints, endpoints):
         format = bounding_boxes.format
         canvas_size = bounding_boxes.canvas_size
+        clamping_mode = bounding_boxes.clamping_mode
         dtype = bounding_boxes.dtype
         device = bounding_boxes.device
         is_rotated = tv_tensors.is_rotated_bounding_format(format)
@@ -5226,6 +5229,7 @@ def perspective_bounding_boxes(bounding_boxes):
                 output,
                 format=format,
                 canvas_size=canvas_size,
+                clamping_mode=clamping_mode,
             ).to(dtype=dtype, device=device)
 
         return tv_tensors.BoundingBoxes(
@@ -5506,29 +5510,35 @@ def test_correctness_image(self, mean, std, dtype, fn):
 
 class TestClampBoundingBoxes:
     @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("clamping_mode", ("hard", "none"))  # TODOBB add soft
     @pytest.mark.parametrize("dtype", [torch.int64, torch.float32])
     @pytest.mark.parametrize("device", cpu_and_cuda())
-    def test_kernel(self, format, dtype, device):
-        bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device)
+    def test_kernel(self, format, clamping_mode, dtype, device):
+        bounding_boxes = make_bounding_boxes(format=format, clamping_mode=clamping_mode, dtype=dtype, device=device)
         check_kernel(
             F.clamp_bounding_boxes,
             bounding_boxes,
             format=bounding_boxes.format,
             canvas_size=bounding_boxes.canvas_size,
+            clamping_mode=clamping_mode,
         )
 
     @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
-    def test_functional(self, format):
-        check_functional(F.clamp_bounding_boxes, make_bounding_boxes(format=format))
+    @pytest.mark.parametrize("clamping_mode", ("hard", "none"))  # TODOBB add soft
+    def test_functional(self, format, clamping_mode):
+        check_functional(F.clamp_bounding_boxes, make_bounding_boxes(format=format, clamping_mode=clamping_mode))
 
     def test_errors(self):
         input_tv_tensor = make_bounding_boxes()
         input_pure_tensor = input_tv_tensor.as_subclass(torch.Tensor)
         format, canvas_size = input_tv_tensor.format, input_tv_tensor.canvas_size
 
-        for format_, canvas_size_ in [(None, None), (format, None), (None, canvas_size)]:
+        for format_, canvas_size_, clamping_mode_ in itertools.product(
+            (format, None), (canvas_size, None), (input_tv_tensor.clamping_mode, None)
+        ):
             with pytest.raises(
-                ValueError, match="For pure tensor inputs, `format` and `canvas_size` have to be passed."
+                ValueError,
+                match="For pure tensor inputs, `format`, `canvas_size` and `clamping_mode` have to be passed.",
             ):
                 F.clamp_bounding_boxes(input_pure_tensor, format=format_, canvas_size=canvas_size_)
 
@@ -5541,6 +5551,103 @@ def test_errors(self):
     def test_transform(self):
         check_transform(transforms.ClampBoundingBoxes(), make_bounding_boxes())
 
+    @pytest.mark.parametrize("rotated", (True, False))
+    @pytest.mark.parametrize("constructor_clamping_mode", ("hard", "none"))
+    @pytest.mark.parametrize("clamping_mode", ("hard", "none", None))  # TODOBB add soft here.
+    @pytest.mark.parametrize("pass_pure_tensor", (True, False))
+    @pytest.mark.parametrize("fn", [F.clamp_bounding_boxes, transform_cls_to_functional(transforms.ClampBoundingBoxes)])
+    def test_clamping_mode(self, rotated, constructor_clamping_mode, clamping_mode, pass_pure_tensor, fn):
+        # This test checks 2 things:
+        # - That passing clamping_mode=None to the clamp_bounding_boxes
+        #   functional (or to the class) relies on the box's `.clamping_mode`
+        #   attribute
+        # - That clamping happens when it should, and only when it should, i.e.
+        #   when the clamping mode is not "none". It doesn't validate the
+        #   nunmerical results, only that clamping happened. For that, we create
+        #   a large 100x100 box inside of a small 10x10 image.
+
+        if pass_pure_tensor and fn is not F.clamp_bounding_boxes:
+            # Only the functional supports pure tensors, not the class
+            return
+        if pass_pure_tensor and clamping_mode is None:
+            # cannot leave clamping_mode=None when passing pure tensor
+            return
+
+        if rotated:
+            boxes = tv_tensors.BoundingBoxes(
+                [0, 0, 100, 100, 0], format="XYWHR", canvas_size=(10, 10), clamping_mode=constructor_clamping_mode
+            )
+            expected_clamped_output = torch.tensor([[0, 0, 10, 10, 0]])
+        else:
+            boxes = tv_tensors.BoundingBoxes(
+                [0, 100, 0, 100], format="XYXY", canvas_size=(10, 10), clamping_mode=constructor_clamping_mode
+            )
+            expected_clamped_output = torch.tensor([[0, 10, 0, 10]])
+
+        if pass_pure_tensor:
+            out = fn(
+                boxes.as_subclass(torch.Tensor),
+                format=boxes.format,
+                canvas_size=boxes.canvas_size,
+                clamping_mode=clamping_mode,
+            )
+        else:
+            out = fn(boxes, clamping_mode=clamping_mode)
+
+        clamping_mode_prevailing = constructor_clamping_mode if clamping_mode is None else clamping_mode
+        if clamping_mode_prevailing == "none":
+            assert_equal(boxes, out)  # should be a pass-through
+        else:
+            assert_equal(out, expected_clamped_output)
+
+
+class TestSetClampingMode:
+    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("constructor_clamping_mode", ("hard", "none"))  # TODOBB add soft
+    @pytest.mark.parametrize("desired_clamping_mode", ("hard", "none"))  # TODOBB add soft
+    def test_setter(self, format, constructor_clamping_mode, desired_clamping_mode):
+
+        in_boxes = make_bounding_boxes(format=format, clamping_mode=constructor_clamping_mode)
+        out_boxes = transforms.SetClampingMode(clamping_mode=desired_clamping_mode)(in_boxes)
+
+        assert in_boxes.clamping_mode == constructor_clamping_mode  # input is unchanged: no leak
+        assert out_boxes.clamping_mode == desired_clamping_mode
+
+    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("constructor_clamping_mode", ("hard", "none"))  # TODOBB add soft
+    def test_pipeline_no_leak(self, format, constructor_clamping_mode):
+        class AssertClampingMode(transforms.Transform):
+            def __init__(self, expected_clamping_mode):
+                super().__init__()
+                self.expected_clamping_mode = expected_clamping_mode
+
+            _transformed_types = (tv_tensors.BoundingBoxes,)
+
+            def transform(self, inpt, _):
+                assert inpt.clamping_mode == self.expected_clamping_mode
+                return inpt
+
+        t = transforms.Compose(
+            [
+                transforms.SetClampingMode("none"),
+                AssertClampingMode("none"),
+                transforms.SetClampingMode("hard"),
+                AssertClampingMode("hard"),
+                transforms.SetClampingMode("none"),
+                AssertClampingMode("none"),
+                transforms.ClampBoundingBoxes("hard"),
+            ]
+        )
+
+        in_boxes = make_bounding_boxes(format=format, clamping_mode=constructor_clamping_mode)
+        out_boxes = t(in_boxes)
+
+        assert in_boxes.clamping_mode == constructor_clamping_mode  # input is unchanged: no leak
+
+        # assert that the output boxes clamping_mode is the one set by the last SetClampingMode.
+        # ClampBoundingBoxes doesn't set clamping_mode.
+        assert out_boxes.clamping_mode == "none"
+
 
 class TestClampKeyPoints:
     @pytest.mark.parametrize("dtype", [torch.int64, torch.float32])

diff --git a/torchvision/transforms/v2/__init__.py b/torchvision/transforms/v2/__init__.py
@@ -41,7 +41,7 @@
     ScaleJitter,
     TenCrop,
 )
-from ._meta import ClampBoundingBoxes, ClampKeyPoints, ConvertBoundingBoxFormat
+from ._meta import ClampBoundingBoxes, ClampKeyPoints, ConvertBoundingBoxFormat, SetClampingMode
 from ._misc import (
     ConvertImageDtype,
     GaussianBlur,

diff --git a/torchvision/transforms/v2/_meta.py b/torchvision/transforms/v2/_meta.py
@@ -1,7 +1,8 @@
-from typing import Any, Union
+from typing import Any, Optional, Union
 
 from torchvision import tv_tensors
 from torchvision.transforms.v2 import functional as F, Transform
+from torchvision.tv_tensors._bounding_boxes import CLAMPING_MODE_TYPE
 
 
 class ConvertBoundingBoxFormat(Transform):
@@ -28,12 +29,19 @@ class ClampBoundingBoxes(Transform):
 
     The clamping is done according to the bounding boxes' ``canvas_size`` meta-data.
 
+    Args:
+        clamping_mode: TODOBB more docs. Default is None which relies on the input box' clamping_mode attribute.
+
     """
 
+    def __init__(self, clamping_mode: Optional[CLAMPING_MODE_TYPE] = None) -> None:
+        super().__init__()
+        self.clamping_mode = clamping_mode
+
     _transformed_types = (tv_tensors.BoundingBoxes,)
 
     def transform(self, inpt: tv_tensors.BoundingBoxes, params: dict[str, Any]) -> tv_tensors.BoundingBoxes:
-        return F.clamp_bounding_boxes(inpt)  # type: ignore[return-value]
+        return F.clamp_bounding_boxes(inpt, clamping_mode=self.clamping_mode)  # type: ignore[return-value]
 
 
 class ClampKeyPoints(Transform):
@@ -46,3 +54,19 @@ class ClampKeyPoints(Transform):
 
     def transform(self, inpt: tv_tensors.KeyPoints, params: dict[str, Any]) -> tv_tensors.KeyPoints:
         return F.clamp_keypoints(inpt)  # type: ignore[return-value]
+
+
+class SetClampingMode(Transform):
+    """TODOBB"""
+
+    def __init__(self, clamping_mode: CLAMPING_MODE_TYPE) -> None:
+        super().__init__()
+        # TODOBB validate mode
+        self.clamping_mode = clamping_mode
+
+    _transformed_types = (tv_tensors.BoundingBoxes,)
+
+    def transform(self, inpt: tv_tensors.BoundingBoxes, params: dict[str, Any]) -> tv_tensors.BoundingBoxes:
+        out: tv_tensors.BoundingBoxes = inpt.clone()  # type: ignore[assignment]
+        out.clamping_mode = self.clamping_mode
+        return out