Add vertical offset option for training

torzdf · May 26, 2024 · 760bcaa · 760bcaa
1 parent 7e2f861
commit 760bcaa
Show file tree

Hide file tree

Showing 7 changed files with 56 additions and 10 deletions.
diff --git a/lib/align/aligned_face.py b/lib/align/aligned_face.py
@@ -85,7 +85,8 @@ def transform_image(image: np.ndarray,
 def get_adjusted_center(image_size: int,
                         source_offset: np.ndarray,
                         target_offset: np.ndarray,
-                        source_centering: CenteringType) -> np.ndarray:
+                        source_centering: CenteringType,
+                        y_offset: float) -> np.ndarray:
     """ Obtain the correct center of a face extracted image to translate between two different
     extract centerings.
 
@@ -99,20 +100,22 @@ def get_adjusted_center(image_size: int,
         The pose offset to translate a base extracted face to target centering
     source_centering: ["face", "head", "legacy"]
         The centering of the source image
+    y_offset: float
+        Amount to additionally offset the center of the image along the y-axis
 
     Returns
     -------
     :class:`numpy.ndarray`
         The center point of the image at the given size for the target centering
     """
     source_size = image_size - (image_size * EXTRACT_RATIOS[source_centering])
-    offset = target_offset - source_offset
+    offset = target_offset - source_offset - [0., y_offset]
     offset *= source_size
     center = np.rint(offset + image_size / 2).astype("int32")
     logger.trace(  # type:ignore[attr-defined]
         "image_size: %s, source_offset: %s, target_offset: %s, source_centering: '%s', "
-        "adjusted_offset: %s, center: %s",
-        image_size, source_offset, target_offset, source_centering, offset, center)
+        "y_offset: %s, adjusted_offset: %s, center: %s",
+        image_size, source_offset, target_offset, source_centering, y_offset, offset, center)
     return center
 
 
@@ -154,6 +157,7 @@ def get_centered_size(source_centering: CenteringType,
         ratio
     """
     if source_centering == target_centering and coverage_ratio == 1.0:
+        src_size: float | int = size
         retval = size
     else:
         src_size = size - (size * EXTRACT_RATIOS[source_centering])
@@ -263,6 +267,8 @@ class AlignedFace():
         The amount of the aligned image to return. A ratio of 1.0 will return the full contents of
         the aligned image. A ratio of 0.5 will return an image of the given size, but will crop to
         the central 50%% of the image.
+    y_offset: float, optional
+        Amount to adjust the aligned face along the y-axis in the range -1. to 1. Default: 0.0
     dtype: str, optional
         Set a data type for the final face to be returned as. Passing ``None`` will return a face
         with the same data type as the original :attr:`image`. Default: ``None``
@@ -279,6 +285,7 @@ def __init__(self,
                  centering: CenteringType = "face",
                  size: int = 64,
                  coverage_ratio: float = 1.0,
+                 y_offset: float = 0.0,
                  dtype: str | None = None,
                  is_aligned: bool = False,
                  is_legacy: bool = False) -> None:
@@ -288,6 +295,7 @@ def __init__(self,
         self._centering = centering
         self._size = size
         self._coverage_ratio = coverage_ratio
+        self._y_offset = y_offset
         self._dtype = dtype
         self._is_aligned = is_aligned
         self._source_centering: CenteringType = "legacy" if is_legacy and is_aligned else "head"
@@ -320,6 +328,11 @@ def padding(self) -> int:
         extracted face image for the selected extract type. """
         return self._padding[self._centering]
 
+    @property
+    def y_offset(self) -> float:
+        """ float: Additional offset applied to the face along the y-axis in -1. to 1. range """
+        return self._y_offset
+
     @property
     def matrix(self) -> np.ndarray:
         """ :class:`numpy.ndarray`: The 3x2 transformation matrix for extracting and aligning the
@@ -532,8 +545,7 @@ def extract_face(self, image: np.ndarray | None) -> np.ndarray | None:
                          "image. Returning empty face.")
             return None
 
-        if self._is_aligned and (self._centering != self._source_centering or
-                                 self._coverage_ratio != 1.0):
+        if self._is_aligned:
             # Crop out the sub face from full head
             image = self._convert_centering(image)
 
@@ -648,7 +660,8 @@ def get_cropped_roi(self,
                 center = get_adjusted_center(image_size,
                                              self.pose.offset[self._source_centering],
                                              self.pose.offset[centering],
-                                             self._source_centering)
+                                             self._source_centering,
+                                             self.y_offset)
                 padding = target_size // 2
                 roi = np.array([center - padding, center + padding]).ravel()
                 logger.trace(  # type:ignore[attr-defined]

diff --git a/lib/align/aligned_mask.py b/lib/align/aligned_mask.py
@@ -258,7 +258,8 @@ def set_sub_crop(self,
                      source_offset: np.ndarray,
                      target_offset: np.ndarray,
                      centering: CenteringType,
-                     coverage_ratio: float = 1.0) -> None:
+                     coverage_ratio: float = 1.0,
+                     y_offset: float = 0.0) -> None:
         """ Set the internal crop area of the mask to be returned.
 
         This impacts the returned mask from :attr:`mask` if the requested mask is required for
@@ -275,14 +276,17 @@ def set_sub_crop(self,
         coverage_ratio: float, optional
             The coverage ratio to be applied to the target image. ``None`` for default (1.0).
             Default: ``None``
+        y_offset: float, optional
+            Amount to additionally adjust the masks's offset along the y-axis. Default: 0.0
         """
         if centering == self.stored_centering and coverage_ratio == 1.0:
             return
 
         center = get_adjusted_center(self.stored_size,
                                      source_offset,
                                      target_offset,
-                                     self.stored_centering)
+                                     self.stored_centering,
+                                     y_offset)
         crop_size = get_centered_size(self.stored_centering,
                                       centering,
                                       self.stored_size,

diff --git a/lib/align/detected_face.py b/lib/align/detected_face.py
@@ -431,6 +431,7 @@ def load_aligned(self,
                      dtype: str | None = None,
                      centering: CenteringType = "head",
                      coverage_ratio: float = 1.0,
+                     y_offset: float = 0.0,
                      force: bool = False,
                      is_aligned: bool = False,
                      is_legacy: bool = False) -> None:
@@ -463,6 +464,9 @@ def load_aligned(self,
             The amount of the aligned image to return. A ratio of 1.0 will return the full contents
             of the aligned image. A ratio of 0.5 will return an image of the given size, but will
             crop to the central 50%% of the image. Default: `1.0`
+        y_offset: float, optional
+            The amount to adjust the aligned face along the y_axis in -1. to 1. range.
+            Default: `0.0`
         force: bool, optional
             Force an update of the aligned face, even if it is already loaded. Default: ``False``
         is_aligned: bool, optional
@@ -487,6 +491,7 @@ def load_aligned(self,
                                         centering=centering,
                                         size=size,
                                         coverage_ratio=coverage_ratio,
+                                        y_offset=y_offset,
                                         dtype=dtype,
                                         is_aligned=is_aligned,
                                         is_legacy=is_aligned and is_legacy)

diff --git a/lib/training/cache.py b/lib/training/cache.py
@@ -381,10 +381,13 @@ def _load_detected_face(self,
         """
         detected_face = DetectedFace()
         detected_face.from_png_meta(alignments)
+        y_offset = self._config["vertical_offset"] 
+        assert isinstance(y_offset, int)
         detected_face.load_aligned(None,
                                    size=self._size,
                                    centering=self._centering,
                                    coverage_ratio=self._coverage_ratio,
+                                   y_offset=y_offset / 100.,
                                    is_aligned=True,
                                    is_legacy=self._extract_version == 1.0)
         logger.trace("Cached aligned face for: %s", filename)  # type: ignore
@@ -453,7 +456,8 @@ def _get_face_mask(self, filename: str, detected_face: DetectedFace) -> np.ndarr
         mask.set_sub_crop(pose.offset[mask.stored_centering],
                           pose.offset[self._centering],
                           self._centering,
-                          self._coverage_ratio)
+                          self._coverage_ratio,
+                          detected_face.aligned.y_offset)
         face_mask = mask.mask
         if self._size != face_mask.shape[0]:
             interpolator = cv2.INTER_CUBIC if mask.stored_size < self._size else cv2.INTER_AREA

diff --git a/lib/training/generator.py b/lib/training/generator.py
@@ -663,11 +663,14 @@ def _create_samples(self,
 
         assert self._config["centering"] in T.get_args(CenteringType)
         retval = np.empty((full_size, full_size, 3), dtype="float32")
+        y_offset = self._config["vertical_offset"]
+        assert isinstance(y_offset, int)
         retval = self._to_float32(np.array([
             AlignedFace(face.landmarks_xy,
                         image=images[idx],
                         centering=T.cast(CenteringType,
                                          self._config["centering"]),
+                        y_offset=y_offset / 100.,
                         size=full_size,
                         dtype="uint8",
                         is_aligned=True).face

diff --git a/plugins/train/_config.py b/plugins/train/_config.py
@@ -161,6 +161,20 @@ def _set_globals(self) -> None:
                 "\n\t75.0% spans from temple to temple."
                 "\n\t87.5% spans from ear to ear."
                 "\n\t100.0% is a mugshot."))
+        self.add_item(
+            section=section,
+            title="vertical_offset",
+            datatype=int,
+            default=0,
+            min_max=(-25, 25),
+            rounding=1,
+            fixed=True,
+            group=_("face"),
+            info=_(
+                "How much to adjust the vertical position of the aligned face as a percentage of "
+                "face image size. Negative values move the face up (expose more chin and less "
+                "forehead). Positive values move the face down (expose less chin and more "
+                "forehead)"))
         self.add_item(
             section=section,
             title="icnr_init",

diff --git a/scripts/convert.py b/scripts/convert.py
@@ -747,6 +747,7 @@ def __init__(self, queue_size: int, arguments: Namespace) -> None:
         self._batchsize = self._get_batchsize(queue_size)
         self._sizes = self._get_io_sizes()
         self._coverage_ratio = self._model.coverage_ratio
+        self._y_offset = self._model.config["vertical_offset"] / 100.
         self._centering = self._model.config["centering"]
 
         self._thread: MultiThread | None = None
@@ -1000,6 +1001,7 @@ def load_aligned(self, item: ConvertItem) -> None:
                                     centering=self._centering,
                                     size=self._sizes["input"],
                                     coverage_ratio=self._coverage_ratio,
+                                    y_offset=self._y_offset,
                                     dtype="float32")
             if self._sizes["input"] == self._sizes["output"]:
                 reference_faces.append(feed_face)
@@ -1009,6 +1011,7 @@ def load_aligned(self, item: ConvertItem) -> None:
                                                    centering=self._centering,
                                                    size=self._sizes["output"],
                                                    coverage_ratio=self._coverage_ratio,
+                                                   y_offset=self._y_offset,
                                                    dtype="float32"))
             feed_faces.append(feed_face)
         item.feed_faces = feed_faces