Lightning-AI · VijayVignesh1 · Sep 2, 2025 · Sep 2, 2025 · Sep 3, 2025 · Sep 3, 2025
@@ -13,7 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 
--
+- Added `Upper Face Dynamics Deviation (FDD)` metric to multimodal domain. ([#3097](https://github.com/Lightning-AI/torchmetrics/issues/3097))
 
 
 ### Changed

@@ -150,6 +150,7 @@
 .. _CLIP-IQA: https://arxiv.org/abs/2207.12396
 .. _CLIP: https://arxiv.org/abs/2103.00020
 .. _LVE: https://openaccess.thecvf.com/content/ICCV2021/papers/Richard_MeshTalk_3D_Face_Animation_From_Speech_Using_Cross-Modality_Disentanglement_ICCV_2021_paper.pdf
+.. _FDD: https://openaccess.thecvf.com/content/CVPR2023/papers/Xing_CodeTalker_Speech-Driven_3D_Facial_Animation_With_Discrete_Motion_Prior_CVPR_2023_paper.pdf
 .. _PPL : https://arxiv.org/abs/1812.04948
 .. _CIOU: https://arxiv.org/abs/2005.03572
 .. _DIOU: https://arxiv.org/abs/1911.08287v1

@@ -0,0 +1,20 @@
+.. customcarditem::
+   :header: Upper Face Dynamics Deviation (FDD)
+   :image: https://pl-flash-data.s3.amazonaws.com/assets/thumbnails/image_classification.svg
+   :tags: Multimodal
+
+.. include:: ../links.rst
+
+###################################
+Upper Face Dynamics Deviation (FDD)
+###################################
+
+Module Interface
+________________
+
+.. autoclass:: torchmetrics.multimodal.fdd.UpperFaceDynamicsDeviation
+
+Functional Interface
+____________________
+
+.. autofunction:: torchmetrics.functional.multimodal.fdd.upper_face_dynamics_deviation
@@ -74,7 +74,7 @@
 )
 from torchmetrics.functional.image._deprecated import _total_variation as total_variation
 from torchmetrics.functional.image._deprecated import _universal_image_quality_index as universal_image_quality_index
-from torchmetrics.functional.multimodal import lip_vertex_error
+from torchmetrics.functional.multimodal import lip_vertex_error, upper_face_dynamics_deviation
 from torchmetrics.functional.nominal import (
     cramers_v,
     cramers_v_matrix,
@@ -246,6 +246,7 @@
     "tschuprows_t_matrix",
     "tweedie_deviance_score",
     "universal_image_quality_index",
+    "upper_face_dynamics_deviation",
     "weighted_mean_absolute_percentage_error",
     "word_error_rate",
     "word_information_lost",

@@ -11,10 +11,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from torchmetrics.functional.multimodal.fdd import upper_face_dynamics_deviation
 from torchmetrics.functional.multimodal.lve import lip_vertex_error
 from torchmetrics.utilities.imports import _TRANSFORMERS_GREATER_EQUAL_4_10
 
-__all__ = ["lip_vertex_error"]
+__all__ = ["lip_vertex_error", "upper_face_dynamics_deviation"]
 
 if _TRANSFORMERS_GREATER_EQUAL_4_10:
     from torchmetrics.functional.multimodal.clip_iqa import clip_image_quality_assessment

@@ -0,0 +1,116 @@
+# Copyright The Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import List
+
+import torch
+from torch import Tensor
+
+
+def upper_face_dynamics_deviation(
+    vertices_pred: Tensor,
+    vertices_gt: Tensor,
+    template: Tensor,
+    upper_face_map: List[int],
+) -> Tensor:
+    r"""Compute Upper Face Dynamics Deviation (FDD) for 3D talking head evaluation.
+
+    The Upper Face Dynamics Deviation (FDD) metric evaluates the quality of facial expressions in the upper
+    face region for 3D talking head models. It quantifies the deviation in vertex motion dynamics between the
+    predicted and ground truth sequences by comparing the temporal variation (standard deviation) of per-vertex
+    squared displacements relative to a neutral template. Lower values of FDD indicate closer alignment of the
+    predicted upper-face motion dynamics with the ground truth.
+
+    The metric is defined as:
+
+    .. math::
+        \text{FDD} = \frac{1}{|S_U|} \sum_{v \in S_U} \Big( \text{std}(\| x_{1:T,v} -
+        \text{template}_v \|_2^2) - \text{std}(\| \hat{x}_{1:T,v} - \text{template}_v \|_2^2) \Big)
+
+    where :math:`T` is the number of frames, :math:`S_U` is the set of upper-face vertices with :math:`M = |S_U|`,
+    :math:`x_{t,v}` are the 3D coordinates of vertex :math:`v` at frame :math:`t` in the ground truth sequence,
+    and :math:`\hat{x}_{t,v} \in \mathbb{R}^3` are the corresponding predicted vertices. The neutral template coordinate
+    of vertex :math:`v` is denoted as :math:`\text{template}_v \in \mathbb{R}^3`. The operator :math:`\text{std}(\cdot)`
+    computes the standard deviation of the temporal sequence.
+
+    Args:
+        vertices_pred: Predicted vertices tensor of shape (T, V, 3) where T is number of frames,
+            V is number of vertices, and 3 represents XYZ coordinates.
+        vertices_gt: Ground truth vertices tensor of shape (T, V, 3) where T is number of frames,
+            V is number of vertices, and 3 represents XYZ coordinates.
+        template: Template mesh tensor of shape (V, 3) representing the neutral face.
+        upper_face_map: List of vertex indices corresponding to the upper face region.
+
+    Returns:
+        torch.Tensor: Scalar tensor containing the mean FDD value across upper-face vertices.
+
+    Raises:
+        ValueError:
+            If the number of dimensions of `vertices_pred` or `vertices_gt` is not 3.
+            If `template` does not have shape (No_of_vertices, 3).
+            If `vertices_pred` and `vertices_gt` do not have the same vertex and coordinate dimensions.
+            If `template` shape does not match the vertex-coordinate dimensions of `vertices_pred` (and `vertices_gt`).
+            If ``upper_face_map`` is empty or contains invalid vertex indices.
+
+    Example:
+        >>> import torch
+        >>> from torchmetrics.functional.multimodal import upper_face_dynamics_deviation
+        >>> vertices_pred = torch.randn(10, 100, 3, generator=torch.manual_seed(41))
+        >>> vertices_gt = torch.randn(10, 100, 3, generator=torch.manual_seed(42))
+        >>> upper_face_map = [10, 11, 12, 13, 14]
+        >>> template = torch.randn(100, 3, generator=torch.manual_seed(43))
+        >>> upper_face_dynamics_deviation(vertices_pred, vertices_gt, template, upper_face_map)
+        tensor(1.0385)
+
+    """
+    if vertices_pred.ndim != 3 or vertices_gt.ndim != 3:
+        raise ValueError(
+            f"Expected both vertices_pred and vertices_gt to have 3 dimensions but got "
+            f"{vertices_pred.ndim} and {vertices_gt.ndim} dimensions respectively."
+        )
+    if template.ndim != 2 or template.shape[1] != 3:
+        raise ValueError(f"Expected template to have shape (V, 3) but got {template.shape}.")
+    if vertices_pred.shape[1:] != vertices_gt.shape[1:]:
+        raise ValueError(
+            f"Expected vertices_pred and vertices_gt to have same vertex and coordinate dimensions but got "
+            f"shapes {vertices_pred.shape} and {vertices_gt.shape}."
+        )
+    if vertices_pred.shape[1:] != template.shape:
+        raise ValueError(
+            f"Shape mismatch: expected template shape {template.shape} to match "
+            f"vertex-coordinate dimensions of predictions {vertices_pred.shape[1:]}, "
+            f"but got template shape {template.shape} instead."
+        )
+    if not upper_face_map:
+        raise ValueError("upper_face_map cannot be empty.")
+    if min(upper_face_map) < 0 or max(upper_face_map) >= template.shape[0]:
+        raise ValueError(
+            f"upper_face_map contains out-of-range vertex indices. "
+            f"Valid index range is [0, {template.shape[0] - 1}], "
+            f"but received indices in range [{min(upper_face_map)}, {max(upper_face_map)}]."
+        )
+    min_frames = min(vertices_pred.shape[0], vertices_gt.shape[0])
+    pred = vertices_pred[:min_frames, upper_face_map, :]  # (T, M, 3)
+    gt = vertices_gt[:min_frames, upper_face_map, :]
+    template = template.to(pred.device)[upper_face_map, :]  # (M, 3)
+
+    pred_disp = pred - template  # (T, M, 3)
+    gt_disp = gt - template
+
+    pred_norm_sq = torch.sum(pred_disp**2, dim=-1)  # (T, M)
+    gt_norm_sq = torch.sum(gt_disp**2, dim=-1)  # (T, M)
+
+    pred_dyn = torch.std(pred_norm_sq, dim=0, unbiased=False)  # (M,)
+    gt_dyn = torch.std(gt_norm_sq, dim=0, unbiased=False)
+
+    return torch.mean(gt_dyn - pred_dyn)  # scalar
@@ -11,10 +11,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from torchmetrics.multimodal.fdd import UpperFaceDynamicsDeviation
 from torchmetrics.multimodal.lve import LipVertexError
 from torchmetrics.utilities.imports import _TRANSFORMERS_GREATER_EQUAL_4_10
 
-__all__ = ["LipVertexError"]
+__all__ = ["LipVertexError", "UpperFaceDynamicsDeviation"]
 
 if _TRANSFORMERS_GREATER_EQUAL_4_10:
     from torchmetrics.multimodal.clip_iqa import CLIPImageQualityAssessment
-Original file line number
+Diff line change
@@ Expand Up @@
     ### Added
-    -
+    - Added `Upper Face Dynamics Deviation (FDD)` metric to multimodal domain. ([#3097](https://github.com/Lightning-AI/torchmetrics/issues/3097))
     ### Changed
@@ Expand Down @@