Added adaptive max pooling 3D to Ivy backend

arshPratap · Oct 24, 2023 · 7bb1683 · 7bb1683
1 parent bb0b201
commit 7bb1683
Show file tree

Hide file tree

Showing 4 changed files with 236 additions and 0 deletions.
diff --git a/ivy/data_classes/array/experimental/layers.py b/ivy/data_classes/array/experimental/layers.py
@@ -1328,3 +1328,30 @@ def max_unpool1d(
             padding=padding,
             data_format=data_format,
         )
+
+    def adaptive_max_pool3d(
+        self: ivy.Array,
+        output_size: Union[Sequence[int], int],
+    ) -> ivy.Array:
+        """
+        Apply a 3D adaptive maximum pooling over an input signal composed of several
+        input planes.
+        Parameters
+        ----------
+        self
+            Input array. Must have shape (N, C, D_in, H_in, W_in)
+            or (C, D_in, H_in, W_in) where N is the batch dimension,
+            C is the feature dimension, and D_in, H_in and W_in are
+            the 3 spatial dimensions.
+        output_size
+            Spatial output size.
+        Returns
+        -------
+            The result of the pooling operation.
+            Will have shape (N, C, S_0, S_1, S_2) or (C, S_0, S_1, S_2),
+            where S = `output_size`
+        """
+        return ivy.adaptive_max_pool3d(
+            self._data,
+            output_size,
+        )
diff --git a/ivy/data_classes/container/experimental/layers.py b/ivy/data_classes/container/experimental/layers.py
@@ -2854,3 +2854,73 @@ def max_unpool1d(
             padding=padding,
             data_format=data_format,
         )
+
+    @staticmethod
+    def static_adaptive_max_pool3d(
+        input: Union[ivy.Array, ivy.NativeArray, ivy.Container],
+        output_size: Union[Sequence[int], int, ivy.Container],
+        *,
+        key_chains: Optional[Union[List[str], Dict[str, str], ivy.Container]] = None,
+        to_apply: Union[bool, ivy.Container] = True,
+        prune_unapplied: Union[bool, ivy.Container] = False,
+        map_sequences: Union[bool, ivy.Container] = False,
+    ) -> ivy.Container:
+        """
+        ivy.Container static method variant of ivy.adaptive_max_pool3d. This method
+        simply wraps the function, and so the docstring for ivy.adaptive_max_pool3d also
+        applies to this method with minimal changes.
+        Parameters
+        ----------
+        input
+            Input array. Must have shape (N, C, D_in, H_in, W_in)
+            or (C, D_in, H_in, W_in) where N is the batch dimension,
+            C is the feature dimension, and D_in, H_in and W_in are
+            the 3 spatial dimensions.
+        output_size
+            Spatial output size.
+        Returns
+        -------
+            The result of the pooling operation.
+            Will have shape (N, C, S_0, S_1, S_2) or(C, S_0, S_1, S_2),
+            where S = `output_size`
+        """
+        return ContainerBase.cont_multi_map_in_function(
+            "adaptive_max_pool3d",
+            input,
+            output_size,
+            key_chains=key_chains,
+            to_apply=to_apply,
+            prune_unapplied=prune_unapplied,
+            map_sequences=map_sequences,
+        )
+
+    def adaptive_max_pool3d(
+        self: ivy.Container,
+        output_size: Union[int, ivy.Container],
+        *,
+        key_chains: Optional[Union[List[str], Dict[str, str], ivy.Container]] = None,
+        to_apply: Union[bool, ivy.Container] = True,
+        prune_unapplied: Union[bool, ivy.Container] = False,
+        map_sequences: Union[bool, ivy.Container] = False,
+    ) -> ivy.Container:
+        """
+        Apply a 3D adaptive maximum pooling over an input signal composed of several
+        input planes.
+        Parameters
+        ----------
+        self
+            Input container.
+        output_size
+            Spatial output size.
+        Returns
+        -------
+            The result of the pooling operation.
+        """
+        return self.static_adaptive_max_pool3d(
+            self,
+            output_size,
+            key_chains=key_chains,
+            to_apply=to_apply,
+            prune_unapplied=prune_unapplied,
+            map_sequences=map_sequences,
+        )
diff --git a/ivy/functional/ivy/experimental/layers.py b/ivy/functional/ivy/experimental/layers.py
@@ -3221,3 +3221,104 @@ def max_unpool1d(
     ),
     "to_skip": ("inputs_to_ivy_arrays", "handle_partial_mixed_function"),
 }
+
+
+@handle_nestable
+@inputs_to_ivy_arrays
+def adaptive_max_pool3d(
+    input: Union[ivy.Array, ivy.NativeArray],
+    output_size: Union[Sequence[int], int],
+):
+    """
+    Apply a 3D adaptive maximum pooling over an input signal composed of several input
+    planes.
+    Parameters
+    ----------
+    input
+        Input array. Must have shape (N, C, D_in, H_in, W_in)
+        or (C, D_in, H_in, W_in) where N is the batch dimension,
+        C is the feature dimension, and D_in, H_in and W_in are
+        the 3 spatial dimensions.
+    output_size
+        Spatial output size.
+    Returns
+    -------
+        The result of the pooling operation. Will have shape (N, C, S_0, S_1, S_2) or
+        (C, S_0, S_1, S_2), where S = `output_size`
+    """
+    squeeze = False
+    if input.ndim == 4:
+        input = ivy.expand_dims(input, axis=0)
+        squeeze = True
+    elif input.ndim != 5:
+        raise ivy.utils.exceptions.IvyException(
+            f"Got {len(input.shape)}D input, but only 4D and 5D inputs are supported.",
+        )
+
+    if isinstance(output_size, int):
+        output_size = (output_size, output_size, output_size)
+
+    if all(i_s % o_s == 0 for i_s, o_s in zip(input.shape[-3:], output_size)):
+        stride = tuple(i_s // o_s for i_s, o_s in zip(input.shape[-3:], output_size))
+        kernel_size = stride
+        pooled_output = ivy.max_pool3d(
+            input, kernel_size, stride, "VALID", data_format="NCHW"
+        )
+        if squeeze:
+            return ivy.squeeze(pooled_output, axis=0)
+        return pooled_output
+
+    idxd, length_d, range_max_d, adaptive_d = _compute_idx(
+        input.shape[-3], output_size[-3], input.device
+    )
+    idxh, length_h, range_max_h, adaptive_h = _compute_idx(
+        input.shape[-2], output_size[-2], input.device
+    )
+    idxw, length_w, range_max_w, adaptive_w = _compute_idx(
+        input.shape[-1], output_size[-1], input.device
+    )
+
+    # to numpy and back in order to bypass a slicing error in tensorflow
+    vals = ivy.array(
+        input.to_numpy()[..., _expand_to_dim(idxd, 5), idxh, idxw], device=input.device
+    )
+
+    if not adaptive_d and not adaptive_h and not adaptive_w:
+        ret = ivy.max(vals, axis=(-4, -2, -1))
+        ret = ivy.squeeze(ret, axis=0) if squeeze else ret
+        return ret
+
+    vals, length_d = _mask(
+        vals, length_d, range_max_d, dim=-3, mask_value=float("-inf")
+    )
+    vals, length_h = _mask(
+        vals, length_h, range_max_h, dim=-2, mask_value=float("-inf")
+    )
+    vals, length_w = _mask(
+        vals, length_w, range_max_w, dim=-1, mask_value=float("-inf")
+    )
+
+    ret = None
+    for i, (j, k) in itertools.product(
+        range(vals.shape[-4]),
+        itertools.product(range(vals.shape[-2]), range(vals.shape[-1])),
+    ):
+        if ret is None:
+            ret = vals[..., i, :, j, k]
+        else:
+            ret = ivy.maximum(ret, vals[..., i, :, j, k])
+    pooled_output = ret.astype(vals.dtype)
+
+    pooled_output = ivy.squeeze(pooled_output, axis=0) if squeeze else pooled_output
+    return pooled_output
+
+
+adaptive_max_pool3d.mixed_backend_wrappers = {
+    "to_add": (
+        "handle_backend_invalid",
+        "inputs_to_native_arrays",
+        "outputs_to_ivy_arrays",
+        "handle_device_shifting",
+    ),
+    "to_skip": ("inputs_to_ivy_arrays",),
+}
diff --git a/ivy_tests/test_ivy/test_functional/test_experimental/test_nn/test_layers.py b/ivy_tests/test_ivy/test_functional/test_experimental/test_nn/test_layers.py
@@ -1437,3 +1437,41 @@ def test_stft(
         window_fn=None,
         pad_end=True,
     )
+
+
+@handle_test(
+    fn_tree="functional.ivy.experimental.adaptive_max_pool3d",
+    dtype_and_x=helpers.dtype_and_values(
+        available_dtypes=helpers.get_dtypes("float"),
+        min_num_dims=4,
+        max_num_dims=5,
+        min_dim_size=1,
+        # Setting max and min value because this operation in paddle is not
+        # numerically stable
+        max_value=100,
+        min_value=-100,
+    ),
+    output_size=st.one_of(
+        st.tuples(
+            helpers.ints(min_value=1, max_value=5),
+            helpers.ints(min_value=1, max_value=5),
+            helpers.ints(min_value=1, max_value=5),
+        ),
+        helpers.ints(min_value=1, max_value=5),
+    ),
+    test_with_out=st.just(False),
+    ground_truth_backend="torch",
+)
+def test_adaptive_max_pool3d(
+    *, dtype_and_x, output_size, test_flags, backend_fw, fn_name, on_device
+):
+    input_dtype, x = dtype_and_x
+    helpers.test_function(
+        input_dtypes=input_dtype,
+        test_flags=test_flags,
+        backend_to_test=backend_fw,
+        on_device=on_device,
+        fn_name=fn_name,
+        input=x[0],
+        output_size=output_size,
+    )