feat(tidy3d): FXC-3961-faster-convolutions-for-tidy-3-d-plugins-autograd-filters

marcorudolphflex · marcorudolphflex · commit 4df0cd54bdd3 · 2025-11-05T17:21:52.000+01:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -53,7 +53,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Unified run submission API: `web.run(...)` is now a container-aware wrapper that accepts a single simulation or arbitrarily nested containers (`list`, `tuple`, `dict` values) and returns results in the same shape.
 - `web.Batch(ComponentModeler)` and `web.Job(ComponentModeler)` native support
 - Simulation data of batch jobs are now automatically downloaded upon their individual completion in `Batch.run()`, avoiding waiting for the entire batch to reach completion.
-
+- Improved speed of autograd tracing for convolutions.
+- 
 ### Fixed
 - Ensured the legacy `Env` proxy mirrors `config.web` profile switches and preserves API URL.
 - More robust `Sellmeier` and `Debye` material model, and prevent very large pole parameters in `PoleResidue` material model.
diff --git a/tests/test_plugins/autograd/test_functions.py b/tests/test_plugins/autograd/test_functions.py
@@ -168,6 +168,52 @@ def test_kernel_array_dimension_mismatch(self):
             convolve(self.array, kernel_mismatch)
 
 
+class TestConvolveAxes:
+    @pytest.mark.parametrize("mode", ["valid", "same", "full"])
+    @pytest.mark.parametrize("padding", ["constant", "edge"])
+    def test_convolve_axes_val(self, rng, mode, padding):
+        """Test convolution with explicit axes against NumPy implementations."""
+        array = rng.random((2, 5))
+        kernel = rng.random((3, 3))
+        axes = ([1], [1])
+
+        conv_td = convolve(array, kernel, padding=padding, mode=mode, axes=axes)
+
+        working_array = array
+        scipy_mode = mode
+        if mode in ("same", "full"):
+            pad_width = kernel.shape[1] // 2
+            working_array = pad(array, (pad_width, pad_width), mode=padding, axis=1)
+            scipy_mode = "valid" if mode == "same" else mode
+
+        working_array_np = np.asarray(working_array)
+        kernel_np = np.asarray(kernel)
+        conv_length = np.convolve(working_array_np[0], kernel_np[0], mode=scipy_mode).shape[0]
+
+        expected = np.empty((array.shape[0], kernel.shape[0], conv_length))
+        for i in range(array.shape[0]):
+            for j in range(kernel.shape[0]):
+                expected[i, j] = np.convolve(
+                    working_array_np[i],
+                    kernel_np[j],
+                    mode=scipy_mode,
+                )
+
+        npt.assert_allclose(conv_td, expected, atol=1e-12)
+
+    def test_convolve_axes_grad(self, rng):
+        """Test gradients of convolution when specific axes are provided."""
+        array = rng.random((2, 5))
+        kernel = rng.random((3, 3))
+        check_grads(convolve, modes=["rev"], order=2)(
+            array,
+            kernel,
+            padding="constant",
+            mode="valid",
+            axes=([1], [1]),
+        )
+
+
 @pytest.mark.parametrize(
     "op,sp_op",
     [
diff --git a/tidy3d/plugins/autograd/functions.py b/tidy3d/plugins/autograd/functions.py
@@ -7,7 +7,7 @@
 import numpy as onp
 from autograd import jacobian
 from autograd.extend import defvjp, primitive
-from autograd.scipy.signal import convolve as convolve_ag
+from autograd.numpy.fft import fftn, ifftn
 from autograd.scipy.special import logsumexp
 from autograd.tracer import getval
 from numpy.lib.stride_tricks import sliding_window_view
@@ -37,6 +37,128 @@
 ]
 
 
+def _normalize_axes(
+    ndim_array: int,
+    ndim_kernel: int,
+    axes: Union[tuple[Iterable[int], Iterable[int]], None],
+) -> tuple[tuple[int, ...], tuple[int, ...]]:
+    """Normalize the axes specification for convolution."""
+
+    if axes is None:
+        if ndim_array != ndim_kernel:
+            raise ValueError(
+                "Kernel dimensions must match array dimensions when 'axes' is not provided, "
+                f"got array ndim {ndim_array} and kernel ndim {ndim_kernel}."
+            )
+        axes_array = tuple(range(ndim_array))
+        axes_kernel = tuple(range(ndim_kernel))
+        return axes_array, axes_kernel
+
+    if len(axes) != 2:
+        raise ValueError("'axes' must be a tuple of two iterable collections of axis indices.")
+
+    axes_array_raw, axes_kernel_raw = axes
+
+    axes_array = tuple((ax + ndim_array) % ndim_array for ax in axes_array_raw)
+    axes_kernel = tuple((ax + ndim_kernel) % ndim_kernel for ax in axes_kernel_raw)
+
+    if len(axes_array) != len(axes_kernel):
+        raise ValueError(
+            "The number of convolution axes for the array and kernel must be the same, "
+            f"got {len(axes_array)} and {len(axes_kernel)}."
+        )
+
+    if len(set(axes_array)) != len(axes_array) or len(set(axes_kernel)) != len(axes_kernel):
+        raise ValueError("Convolution axes must be unique for both the array and the kernel.")
+
+    if any(ax < 0 or ax >= ndim_array for ax in axes_array):
+        raise ValueError(
+            f"Array axes out of bounds for array with {ndim_array} dimensions: {axes_array}."
+        )
+
+    if any(ax < 0 or ax >= ndim_kernel for ax in axes_kernel):
+        raise ValueError(
+            f"Kernel axes out of bounds for kernel with {ndim_kernel} dimensions: {axes_kernel}."
+        )
+
+    return axes_array, axes_kernel
+
+
+def _fft_convolve_general(
+    array: NDArray,
+    kernel: NDArray,
+    axes_array: tuple[int, ...],
+    axes_kernel: tuple[int, ...],
+    mode: Literal["full", "valid"],
+) -> NDArray:
+    """Perform convolution using FFT along the specified axes."""
+
+    num_conv_axes = len(axes_array)
+
+    if num_conv_axes == 0:
+        array_shape = array.shape
+        kernel_shape = kernel.shape
+        result = np.multiply(
+            array.reshape(array_shape + (1,) * kernel.ndim),
+            kernel.reshape((1,) * array.ndim + kernel_shape),
+        )
+        return result.reshape(array_shape + kernel_shape)
+
+    ignore_axes_array = tuple(ax for ax in range(array.ndim) if ax not in axes_array)
+    ignore_axes_kernel = tuple(ax for ax in range(kernel.ndim) if ax not in axes_kernel)
+
+    new_order_array = ignore_axes_array + axes_array
+    new_order_kernel = ignore_axes_kernel + axes_kernel
+
+    array_reordered = np.transpose(array, new_order_array) if array.ndim else array
+    kernel_reordered = np.transpose(kernel, new_order_kernel) if kernel.ndim else kernel
+
+    num_batch_array = len(ignore_axes_array)
+    num_batch_kernel = len(ignore_axes_kernel)
+
+    array_batch_shape = array_reordered.shape[:num_batch_array]
+    kernel_batch_shape = kernel_reordered.shape[:num_batch_kernel]
+
+    array_conv_shape = array_reordered.shape[num_batch_array:]
+    kernel_conv_shape = kernel_reordered.shape[num_batch_kernel:]
+
+    array_expand_shape = array_batch_shape + (1,) * num_batch_kernel + array_conv_shape
+    kernel_expand_shape = (1,) * num_batch_array + kernel_batch_shape + kernel_conv_shape
+
+    array_expanded = np.reshape(array_reordered, array_expand_shape)
+    kernel_expanded = np.reshape(kernel_reordered, kernel_expand_shape)
+
+    fft_axes = tuple(range(-num_conv_axes, 0))
+    fft_shape = tuple(
+        int(array_dim + kernel_dim - 1)
+        for array_dim, kernel_dim in zip(array_conv_shape, kernel_conv_shape)
+    )
+
+    array_fft = fftn(array_expanded, fft_shape, axes=fft_axes)
+    kernel_fft = fftn(kernel_expanded, fft_shape, axes=fft_axes)
+    full_result = ifftn(array_fft * kernel_fft, fft_shape, axes=fft_axes)
+
+    if mode == "full":
+        result = full_result
+    elif mode == "valid":
+        valid_slices = [slice(None)] * full_result.ndim
+        for axis_offset, (array_dim, kernel_dim) in enumerate(
+            zip(array_conv_shape, kernel_conv_shape)
+        ):
+            start = int(min(array_dim, kernel_dim) - 1)
+            length = int(abs(array_dim - kernel_dim) + 1)
+            axis = full_result.ndim - num_conv_axes + axis_offset
+            valid_slices[axis] = slice(start, start + length)
+        result = full_result[tuple(valid_slices)]
+    else:
+        raise ValueError(f"Unsupported convolution mode '{mode}'.")
+
+    if not np.iscomplexobj(array) and not np.iscomplexobj(kernel):
+        result = np.real(result)
+
+    return result
+
+
 def _get_pad_indices(
     n: int,
     pad_width: tuple[int, int],
@@ -189,19 +311,21 @@ def convolve(
     if any(k % 2 == 0 for k in kernel.shape):
         raise ValueError(f"All kernel dimensions must be odd, got {kernel.shape}.")
 
-    if kernel.ndim != array.ndim and axes is None:
-        raise ValueError(
-            f"Kernel dimensions must match array dimensions, got kernel {kernel.shape} and array {array.shape}."
-        )
+    axes_array, axes_kernel = _normalize_axes(array.ndim, kernel.ndim, axes)
 
-    if mode in ("same", "full"):
-        kernel_dims = kernel.shape if axes is None else [kernel.shape[d] for d in axes[1]]
-        pad_widths = [(ks // 2, ks // 2) for ks in kernel_dims]
-        for axis, pad_width in enumerate(pad_widths):
-            array = pad(array, pad_width, mode=padding, axis=axis)
-        mode = "valid" if mode == "same" else mode
+    working_array = array
+    effective_mode = mode
 
-    return convolve_ag(array, kernel, axes=axes, mode=mode)
+    if mode in ("same", "full"):
+        for ax_array, ax_kernel in zip(axes_array, axes_kernel):
+            pad_width = kernel.shape[ax_kernel] // 2
+            if pad_width > 0:
+                working_array = pad(
+                    working_array, (pad_width, pad_width), mode=padding, axis=ax_array
+                )
+        effective_mode = "valid" if mode == "same" else mode
+
+    return _fft_convolve_general(working_array, kernel, axes_array, axes_kernel, effective_mode)
 
 
 def _get_footprint(size, structure, maxval):