From 0b37496c57abdc07b0c18e1d797df75d9a29d048 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Wed, 5 Jun 2024 10:30:29 -0400 Subject: [PATCH 01/10] Move IdealSizeInvocation to its own file. No functional changes. --- invokeai/app/invocations/ideal_size.py | 65 ++++++++++++++++++++++++++ invokeai/app/invocations/latent.py | 56 ---------------------- 2 files changed, 65 insertions(+), 56 deletions(-) create mode 100644 invokeai/app/invocations/ideal_size.py diff --git a/invokeai/app/invocations/ideal_size.py b/invokeai/app/invocations/ideal_size.py new file mode 100644 index 00000000000..120f8c1ba01 --- /dev/null +++ b/invokeai/app/invocations/ideal_size.py @@ -0,0 +1,65 @@ +import math +from typing import Tuple + +from invokeai.app.invocations.baseinvocation import BaseInvocation, BaseInvocationOutput, invocation, invocation_output +from invokeai.app.invocations.constants import LATENT_SCALE_FACTOR +from invokeai.app.invocations.fields import FieldDescriptions, InputField, OutputField +from invokeai.app.invocations.model import UNetField +from invokeai.app.services.shared.invocation_context import InvocationContext +from invokeai.backend.model_manager.config import BaseModelType + + +@invocation_output("ideal_size_output") +class IdealSizeOutput(BaseInvocationOutput): + """Base class for invocations that output an image""" + + width: int = OutputField(description="The ideal width of the image (in pixels)") + height: int = OutputField(description="The ideal height of the image (in pixels)") + + +@invocation( + "ideal_size", + title="Ideal Size", + tags=["latents", "math", "ideal_size"], + version="1.0.3", +) +class IdealSizeInvocation(BaseInvocation): + """Calculates the ideal size for generation to avoid duplication""" + + width: int = InputField(default=1024, description="Final image width") + height: int = InputField(default=576, description="Final image height") + unet: UNetField = InputField(default=None, description=FieldDescriptions.unet) + multiplier: float = InputField( + default=1.0, + description="Amount to multiply the model's dimensions by when calculating the ideal size (may result in " + "initial generation artifacts if too large)", + ) + + def trim_to_multiple_of(self, *args: int, multiple_of: int = LATENT_SCALE_FACTOR) -> Tuple[int, ...]: + return tuple((x - x % multiple_of) for x in args) + + def invoke(self, context: InvocationContext) -> IdealSizeOutput: + unet_config = context.models.get_config(self.unet.unet.key) + aspect = self.width / self.height + dimension: float = 512 + if unet_config.base == BaseModelType.StableDiffusion2: + dimension = 768 + elif unet_config.base == BaseModelType.StableDiffusionXL: + dimension = 1024 + dimension = dimension * self.multiplier + min_dimension = math.floor(dimension * 0.5) + model_area = dimension * dimension # hardcoded for now since all models are trained on square images + + if aspect > 1.0: + init_height = max(min_dimension, math.sqrt(model_area / aspect)) + init_width = init_height * aspect + else: + init_width = max(min_dimension, math.sqrt(model_area * aspect)) + init_height = init_width / aspect + + scaled_width, scaled_height = self.trim_to_multiple_of( + math.floor(init_width), + math.floor(init_height), + ) + + return IdealSizeOutput(width=scaled_width, height=scaled_height) diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py index 8fb9b93f4c4..6bb5b6882fa 100644 --- a/invokeai/app/invocations/latent.py +++ b/invokeai/app/invocations/latent.py @@ -1,6 +1,5 @@ # Copyright (c) 2023 Kyle Schouviller (https://github.com/kyle0654) import inspect -import math from contextlib import ExitStack from functools import singledispatchmethod from typing import Any, Dict, Iterator, List, Literal, Optional, Tuple, Union @@ -1448,58 +1447,3 @@ def invoke(self, context: InvocationContext) -> LatentsOutput: name = context.tensors.save(tensor=cropped_latents) return LatentsOutput.build(latents_name=name, latents=cropped_latents) - - -@invocation_output("ideal_size_output") -class IdealSizeOutput(BaseInvocationOutput): - """Base class for invocations that output an image""" - - width: int = OutputField(description="The ideal width of the image (in pixels)") - height: int = OutputField(description="The ideal height of the image (in pixels)") - - -@invocation( - "ideal_size", - title="Ideal Size", - tags=["latents", "math", "ideal_size"], - version="1.0.3", -) -class IdealSizeInvocation(BaseInvocation): - """Calculates the ideal size for generation to avoid duplication""" - - width: int = InputField(default=1024, description="Final image width") - height: int = InputField(default=576, description="Final image height") - unet: UNetField = InputField(default=None, description=FieldDescriptions.unet) - multiplier: float = InputField( - default=1.0, - description="Amount to multiply the model's dimensions by when calculating the ideal size (may result in initial generation artifacts if too large)", - ) - - def trim_to_multiple_of(self, *args: int, multiple_of: int = LATENT_SCALE_FACTOR) -> Tuple[int, ...]: - return tuple((x - x % multiple_of) for x in args) - - def invoke(self, context: InvocationContext) -> IdealSizeOutput: - unet_config = context.models.get_config(self.unet.unet.key) - aspect = self.width / self.height - dimension: float = 512 - if unet_config.base == BaseModelType.StableDiffusion2: - dimension = 768 - elif unet_config.base == BaseModelType.StableDiffusionXL: - dimension = 1024 - dimension = dimension * self.multiplier - min_dimension = math.floor(dimension * 0.5) - model_area = dimension * dimension # hardcoded for now since all models are trained on square images - - if aspect > 1.0: - init_height = max(min_dimension, math.sqrt(model_area / aspect)) - init_width = init_height * aspect - else: - init_width = max(min_dimension, math.sqrt(model_area * aspect)) - init_height = init_width / aspect - - scaled_width, scaled_height = self.trim_to_multiple_of( - math.floor(init_width), - math.floor(init_height), - ) - - return IdealSizeOutput(width=scaled_width, height=scaled_height) From ed03d281e683ad6a9fdf373679008b7c6ec805f5 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Wed, 5 Jun 2024 10:53:24 -0400 Subject: [PATCH 02/10] Move CropLatentsCoreInvocation to its own file. No functional changes. --- invokeai/app/invocations/crop_latents.py | 61 ++++++++++++++++++++++++ invokeai/app/invocations/latent.py | 56 ---------------------- 2 files changed, 61 insertions(+), 56 deletions(-) create mode 100644 invokeai/app/invocations/crop_latents.py diff --git a/invokeai/app/invocations/crop_latents.py b/invokeai/app/invocations/crop_latents.py new file mode 100644 index 00000000000..258049fd2c1 --- /dev/null +++ b/invokeai/app/invocations/crop_latents.py @@ -0,0 +1,61 @@ +from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation +from invokeai.app.invocations.constants import LATENT_SCALE_FACTOR +from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, LatentsField +from invokeai.app.invocations.primitives import LatentsOutput +from invokeai.app.services.shared.invocation_context import InvocationContext + + +# The Crop Latents node was copied from @skunkworxdark's implementation here: +# https://github.com/skunkworxdark/XYGrid_nodes/blob/74647fa9c1fa57d317a94bd43ca689af7f0aae5e/images_to_grids.py#L1117C1-L1167C80 +@invocation( + "crop_latents", + title="Crop Latents", + tags=["latents", "crop"], + category="latents", + version="1.0.2", +) +# TODO(ryand): Named `CropLatentsCoreInvocation` to prevent a conflict with custom node `CropLatentsInvocation`. +# Currently, if the class names conflict then 'GET /openapi.json' fails. +class CropLatentsCoreInvocation(BaseInvocation): + """Crops a latent-space tensor to a box specified in image-space. The box dimensions and coordinates must be + divisible by the latent scale factor of 8. + """ + + latents: LatentsField = InputField( + description=FieldDescriptions.latents, + input=Input.Connection, + ) + x: int = InputField( + ge=0, + multiple_of=LATENT_SCALE_FACTOR, + description="The left x coordinate (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space.", + ) + y: int = InputField( + ge=0, + multiple_of=LATENT_SCALE_FACTOR, + description="The top y coordinate (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space.", + ) + width: int = InputField( + ge=1, + multiple_of=LATENT_SCALE_FACTOR, + description="The width (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space.", + ) + height: int = InputField( + ge=1, + multiple_of=LATENT_SCALE_FACTOR, + description="The height (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space.", + ) + + def invoke(self, context: InvocationContext) -> LatentsOutput: + latents = context.tensors.load(self.latents.latents_name) + + x1 = self.x // LATENT_SCALE_FACTOR + y1 = self.y // LATENT_SCALE_FACTOR + x2 = x1 + (self.width // LATENT_SCALE_FACTOR) + y2 = y1 + (self.height // LATENT_SCALE_FACTOR) + + cropped_latents = latents[..., y1:y2, x1:x2] + + name = context.tensors.save(tensor=cropped_latents) + + return LatentsOutput.build(latents_name=name, latents=cropped_latents) diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py index 6bb5b6882fa..747e0ddc655 100644 --- a/invokeai/app/invocations/latent.py +++ b/invokeai/app/invocations/latent.py @@ -1391,59 +1391,3 @@ def slerp( name = context.tensors.save(tensor=blended_latents) return LatentsOutput.build(latents_name=name, latents=blended_latents, seed=self.latents_a.seed) - - -# The Crop Latents node was copied from @skunkworxdark's implementation here: -# https://github.com/skunkworxdark/XYGrid_nodes/blob/74647fa9c1fa57d317a94bd43ca689af7f0aae5e/images_to_grids.py#L1117C1-L1167C80 -@invocation( - "crop_latents", - title="Crop Latents", - tags=["latents", "crop"], - category="latents", - version="1.0.2", -) -# TODO(ryand): Named `CropLatentsCoreInvocation` to prevent a conflict with custom node `CropLatentsInvocation`. -# Currently, if the class names conflict then 'GET /openapi.json' fails. -class CropLatentsCoreInvocation(BaseInvocation): - """Crops a latent-space tensor to a box specified in image-space. The box dimensions and coordinates must be - divisible by the latent scale factor of 8. - """ - - latents: LatentsField = InputField( - description=FieldDescriptions.latents, - input=Input.Connection, - ) - x: int = InputField( - ge=0, - multiple_of=LATENT_SCALE_FACTOR, - description="The left x coordinate (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space.", - ) - y: int = InputField( - ge=0, - multiple_of=LATENT_SCALE_FACTOR, - description="The top y coordinate (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space.", - ) - width: int = InputField( - ge=1, - multiple_of=LATENT_SCALE_FACTOR, - description="The width (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space.", - ) - height: int = InputField( - ge=1, - multiple_of=LATENT_SCALE_FACTOR, - description="The height (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space.", - ) - - def invoke(self, context: InvocationContext) -> LatentsOutput: - latents = context.tensors.load(self.latents.latents_name) - - x1 = self.x // LATENT_SCALE_FACTOR - y1 = self.y // LATENT_SCALE_FACTOR - x2 = x1 + (self.width // LATENT_SCALE_FACTOR) - y2 = y1 + (self.height // LATENT_SCALE_FACTOR) - - cropped_latents = latents[..., y1:y2, x1:x2] - - name = context.tensors.save(tensor=cropped_latents) - - return LatentsOutput.build(latents_name=name, latents=cropped_latents) From 595096bdcf07c343e566ccdcece7431c896ce916 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Wed, 5 Jun 2024 11:04:17 -0400 Subject: [PATCH 03/10] Move BlendLatentsInvocation to its own file. No functional changes. --- invokeai/app/invocations/blend_latents.py | 98 +++++++++++++++++++++++ invokeai/app/invocations/latent.py | 88 -------------------- 2 files changed, 98 insertions(+), 88 deletions(-) create mode 100644 invokeai/app/invocations/blend_latents.py diff --git a/invokeai/app/invocations/blend_latents.py b/invokeai/app/invocations/blend_latents.py new file mode 100644 index 00000000000..9238f4b34c5 --- /dev/null +++ b/invokeai/app/invocations/blend_latents.py @@ -0,0 +1,98 @@ +from typing import Any, Union + +import numpy as np +import numpy.typing as npt +import torch + +from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation +from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, LatentsField +from invokeai.app.invocations.primitives import LatentsOutput +from invokeai.app.services.shared.invocation_context import InvocationContext +from invokeai.backend.util.devices import TorchDevice + + +@invocation( + "lblend", + title="Blend Latents", + tags=["latents", "blend"], + category="latents", + version="1.0.3", +) +class BlendLatentsInvocation(BaseInvocation): + """Blend two latents using a given alpha. Latents must have same size.""" + + latents_a: LatentsField = InputField( + description=FieldDescriptions.latents, + input=Input.Connection, + ) + latents_b: LatentsField = InputField( + description=FieldDescriptions.latents, + input=Input.Connection, + ) + alpha: float = InputField(default=0.5, description=FieldDescriptions.blend_alpha) + + def invoke(self, context: InvocationContext) -> LatentsOutput: + latents_a = context.tensors.load(self.latents_a.latents_name) + latents_b = context.tensors.load(self.latents_b.latents_name) + + if latents_a.shape != latents_b.shape: + raise Exception("Latents to blend must be the same size.") + + device = TorchDevice.choose_torch_device() + + def slerp( + t: Union[float, npt.NDArray[Any]], # FIXME: maybe use np.float32 here? + v0: Union[torch.Tensor, npt.NDArray[Any]], + v1: Union[torch.Tensor, npt.NDArray[Any]], + DOT_THRESHOLD: float = 0.9995, + ) -> Union[torch.Tensor, npt.NDArray[Any]]: + """ + Spherical linear interpolation + Args: + t (float/np.ndarray): Float value between 0.0 and 1.0 + v0 (np.ndarray): Starting vector + v1 (np.ndarray): Final vector + DOT_THRESHOLD (float): Threshold for considering the two vectors as + colineal. Not recommended to alter this. + Returns: + v2 (np.ndarray): Interpolation vector between v0 and v1 + """ + inputs_are_torch = False + if not isinstance(v0, np.ndarray): + inputs_are_torch = True + v0 = v0.detach().cpu().numpy() + if not isinstance(v1, np.ndarray): + inputs_are_torch = True + v1 = v1.detach().cpu().numpy() + + dot = np.sum(v0 * v1 / (np.linalg.norm(v0) * np.linalg.norm(v1))) + if np.abs(dot) > DOT_THRESHOLD: + v2 = (1 - t) * v0 + t * v1 + else: + theta_0 = np.arccos(dot) + sin_theta_0 = np.sin(theta_0) + theta_t = theta_0 * t + sin_theta_t = np.sin(theta_t) + s0 = np.sin(theta_0 - theta_t) / sin_theta_0 + s1 = sin_theta_t / sin_theta_0 + v2 = s0 * v0 + s1 * v1 + + if inputs_are_torch: + v2_torch: torch.Tensor = torch.from_numpy(v2).to(device) + return v2_torch + else: + assert isinstance(v2, np.ndarray) + return v2 + + # blend + bl = slerp(self.alpha, latents_a, latents_b) + assert isinstance(bl, torch.Tensor) + blended_latents: torch.Tensor = bl # for type checking convenience + + # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699 + blended_latents = blended_latents.to("cpu") + + TorchDevice.empty_cache() + + name = context.tensors.save(tensor=blended_latents) + return LatentsOutput.build(latents_name=name, latents=blended_latents, seed=self.latents_a.seed) diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py index 747e0ddc655..240a52517b3 100644 --- a/invokeai/app/invocations/latent.py +++ b/invokeai/app/invocations/latent.py @@ -6,7 +6,6 @@ import einops import numpy as np -import numpy.typing as npt import torch import torchvision import torchvision.transforms as T @@ -1304,90 +1303,3 @@ def _(vae: AutoencoderTiny, image_tensor: torch.FloatTensor) -> torch.FloatTenso assert isinstance(vae, torch.nn.Module) latents: torch.FloatTensor = vae.encode(image_tensor).latents return latents - - -@invocation( - "lblend", - title="Blend Latents", - tags=["latents", "blend"], - category="latents", - version="1.0.3", -) -class BlendLatentsInvocation(BaseInvocation): - """Blend two latents using a given alpha. Latents must have same size.""" - - latents_a: LatentsField = InputField( - description=FieldDescriptions.latents, - input=Input.Connection, - ) - latents_b: LatentsField = InputField( - description=FieldDescriptions.latents, - input=Input.Connection, - ) - alpha: float = InputField(default=0.5, description=FieldDescriptions.blend_alpha) - - def invoke(self, context: InvocationContext) -> LatentsOutput: - latents_a = context.tensors.load(self.latents_a.latents_name) - latents_b = context.tensors.load(self.latents_b.latents_name) - - if latents_a.shape != latents_b.shape: - raise Exception("Latents to blend must be the same size.") - - device = TorchDevice.choose_torch_device() - - def slerp( - t: Union[float, npt.NDArray[Any]], # FIXME: maybe use np.float32 here? - v0: Union[torch.Tensor, npt.NDArray[Any]], - v1: Union[torch.Tensor, npt.NDArray[Any]], - DOT_THRESHOLD: float = 0.9995, - ) -> Union[torch.Tensor, npt.NDArray[Any]]: - """ - Spherical linear interpolation - Args: - t (float/np.ndarray): Float value between 0.0 and 1.0 - v0 (np.ndarray): Starting vector - v1 (np.ndarray): Final vector - DOT_THRESHOLD (float): Threshold for considering the two vectors as - colineal. Not recommended to alter this. - Returns: - v2 (np.ndarray): Interpolation vector between v0 and v1 - """ - inputs_are_torch = False - if not isinstance(v0, np.ndarray): - inputs_are_torch = True - v0 = v0.detach().cpu().numpy() - if not isinstance(v1, np.ndarray): - inputs_are_torch = True - v1 = v1.detach().cpu().numpy() - - dot = np.sum(v0 * v1 / (np.linalg.norm(v0) * np.linalg.norm(v1))) - if np.abs(dot) > DOT_THRESHOLD: - v2 = (1 - t) * v0 + t * v1 - else: - theta_0 = np.arccos(dot) - sin_theta_0 = np.sin(theta_0) - theta_t = theta_0 * t - sin_theta_t = np.sin(theta_t) - s0 = np.sin(theta_0 - theta_t) / sin_theta_0 - s1 = sin_theta_t / sin_theta_0 - v2 = s0 * v0 + s1 * v1 - - if inputs_are_torch: - v2_torch: torch.Tensor = torch.from_numpy(v2).to(device) - return v2_torch - else: - assert isinstance(v2, np.ndarray) - return v2 - - # blend - bl = slerp(self.alpha, latents_a, latents_b) - assert isinstance(bl, torch.Tensor) - blended_latents: torch.Tensor = bl # for type checking convenience - - # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699 - blended_latents = blended_latents.to("cpu") - - TorchDevice.empty_cache() - - name = context.tensors.save(tensor=blended_latents) - return LatentsOutput.build(latents_name=name, latents=blended_latents, seed=self.latents_a.seed) From 5e419dbb562b6bedaffeeb44b7d302747c81f135 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Wed, 5 Jun 2024 11:05:44 -0400 Subject: [PATCH 04/10] Move ScaleLatentsInvocation and ResizeLatentsInvocation to their own file. No functional changes. --- invokeai/app/invocations/latent.py | 89 ------------------ invokeai/app/invocations/resize_latents.py | 103 +++++++++++++++++++++ 2 files changed, 103 insertions(+), 89 deletions(-) create mode 100644 invokeai/app/invocations/resize_latents.py diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py index 240a52517b3..79d23199c58 100644 --- a/invokeai/app/invocations/latent.py +++ b/invokeai/app/invocations/latent.py @@ -1117,95 +1117,6 @@ def invoke(self, context: InvocationContext) -> ImageOutput: return ImageOutput.build(image_dto) -LATENTS_INTERPOLATION_MODE = Literal["nearest", "linear", "bilinear", "bicubic", "trilinear", "area", "nearest-exact"] - - -@invocation( - "lresize", - title="Resize Latents", - tags=["latents", "resize"], - category="latents", - version="1.0.2", -) -class ResizeLatentsInvocation(BaseInvocation): - """Resizes latents to explicit width/height (in pixels). Provided dimensions are floor-divided by 8.""" - - latents: LatentsField = InputField( - description=FieldDescriptions.latents, - input=Input.Connection, - ) - width: int = InputField( - ge=64, - multiple_of=LATENT_SCALE_FACTOR, - description=FieldDescriptions.width, - ) - height: int = InputField( - ge=64, - multiple_of=LATENT_SCALE_FACTOR, - description=FieldDescriptions.width, - ) - mode: LATENTS_INTERPOLATION_MODE = InputField(default="bilinear", description=FieldDescriptions.interp_mode) - antialias: bool = InputField(default=False, description=FieldDescriptions.torch_antialias) - - def invoke(self, context: InvocationContext) -> LatentsOutput: - latents = context.tensors.load(self.latents.latents_name) - device = TorchDevice.choose_torch_device() - - resized_latents = torch.nn.functional.interpolate( - latents.to(device), - size=(self.height // LATENT_SCALE_FACTOR, self.width // LATENT_SCALE_FACTOR), - mode=self.mode, - antialias=self.antialias if self.mode in ["bilinear", "bicubic"] else False, - ) - - # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699 - resized_latents = resized_latents.to("cpu") - - TorchDevice.empty_cache() - - name = context.tensors.save(tensor=resized_latents) - return LatentsOutput.build(latents_name=name, latents=resized_latents, seed=self.latents.seed) - - -@invocation( - "lscale", - title="Scale Latents", - tags=["latents", "resize"], - category="latents", - version="1.0.2", -) -class ScaleLatentsInvocation(BaseInvocation): - """Scales latents by a given factor.""" - - latents: LatentsField = InputField( - description=FieldDescriptions.latents, - input=Input.Connection, - ) - scale_factor: float = InputField(gt=0, description=FieldDescriptions.scale_factor) - mode: LATENTS_INTERPOLATION_MODE = InputField(default="bilinear", description=FieldDescriptions.interp_mode) - antialias: bool = InputField(default=False, description=FieldDescriptions.torch_antialias) - - def invoke(self, context: InvocationContext) -> LatentsOutput: - latents = context.tensors.load(self.latents.latents_name) - - device = TorchDevice.choose_torch_device() - - # resizing - resized_latents = torch.nn.functional.interpolate( - latents.to(device), - scale_factor=self.scale_factor, - mode=self.mode, - antialias=self.antialias if self.mode in ["bilinear", "bicubic"] else False, - ) - - # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699 - resized_latents = resized_latents.to("cpu") - TorchDevice.empty_cache() - - name = context.tensors.save(tensor=resized_latents) - return LatentsOutput.build(latents_name=name, latents=resized_latents, seed=self.latents.seed) - - @invocation( "i2l", title="Image to Latents", diff --git a/invokeai/app/invocations/resize_latents.py b/invokeai/app/invocations/resize_latents.py new file mode 100644 index 00000000000..90253e52e83 --- /dev/null +++ b/invokeai/app/invocations/resize_latents.py @@ -0,0 +1,103 @@ +from typing import Literal + +import torch + +from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation +from invokeai.app.invocations.constants import LATENT_SCALE_FACTOR +from invokeai.app.invocations.fields import ( + FieldDescriptions, + Input, + InputField, + LatentsField, +) +from invokeai.app.invocations.primitives import LatentsOutput +from invokeai.app.services.shared.invocation_context import InvocationContext +from invokeai.backend.util.devices import TorchDevice + +LATENTS_INTERPOLATION_MODE = Literal["nearest", "linear", "bilinear", "bicubic", "trilinear", "area", "nearest-exact"] + + +@invocation( + "lresize", + title="Resize Latents", + tags=["latents", "resize"], + category="latents", + version="1.0.2", +) +class ResizeLatentsInvocation(BaseInvocation): + """Resizes latents to explicit width/height (in pixels). Provided dimensions are floor-divided by 8.""" + + latents: LatentsField = InputField( + description=FieldDescriptions.latents, + input=Input.Connection, + ) + width: int = InputField( + ge=64, + multiple_of=LATENT_SCALE_FACTOR, + description=FieldDescriptions.width, + ) + height: int = InputField( + ge=64, + multiple_of=LATENT_SCALE_FACTOR, + description=FieldDescriptions.width, + ) + mode: LATENTS_INTERPOLATION_MODE = InputField(default="bilinear", description=FieldDescriptions.interp_mode) + antialias: bool = InputField(default=False, description=FieldDescriptions.torch_antialias) + + def invoke(self, context: InvocationContext) -> LatentsOutput: + latents = context.tensors.load(self.latents.latents_name) + device = TorchDevice.choose_torch_device() + + resized_latents = torch.nn.functional.interpolate( + latents.to(device), + size=(self.height // LATENT_SCALE_FACTOR, self.width // LATENT_SCALE_FACTOR), + mode=self.mode, + antialias=self.antialias if self.mode in ["bilinear", "bicubic"] else False, + ) + + # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699 + resized_latents = resized_latents.to("cpu") + + TorchDevice.empty_cache() + + name = context.tensors.save(tensor=resized_latents) + return LatentsOutput.build(latents_name=name, latents=resized_latents, seed=self.latents.seed) + + +@invocation( + "lscale", + title="Scale Latents", + tags=["latents", "resize"], + category="latents", + version="1.0.2", +) +class ScaleLatentsInvocation(BaseInvocation): + """Scales latents by a given factor.""" + + latents: LatentsField = InputField( + description=FieldDescriptions.latents, + input=Input.Connection, + ) + scale_factor: float = InputField(gt=0, description=FieldDescriptions.scale_factor) + mode: LATENTS_INTERPOLATION_MODE = InputField(default="bilinear", description=FieldDescriptions.interp_mode) + antialias: bool = InputField(default=False, description=FieldDescriptions.torch_antialias) + + def invoke(self, context: InvocationContext) -> LatentsOutput: + latents = context.tensors.load(self.latents.latents_name) + + device = TorchDevice.choose_torch_device() + + # resizing + resized_latents = torch.nn.functional.interpolate( + latents.to(device), + scale_factor=self.scale_factor, + mode=self.mode, + antialias=self.antialias if self.mode in ["bilinear", "bicubic"] else False, + ) + + # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699 + resized_latents = resized_latents.to("cpu") + TorchDevice.empty_cache() + + name = context.tensors.save(tensor=resized_latents) + return LatentsOutput.build(latents_name=name, latents=resized_latents, seed=self.latents.seed) From 58697141bfe57a7a293cc4c1dd8d20b116be3fa3 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Wed, 5 Jun 2024 13:47:38 -0400 Subject: [PATCH 05/10] Move ImageToLatentsInvocation to its own file. No functional changes. --- invokeai/app/invocations/image_to_latents.py | 125 +++++++++++++++++++ invokeai/app/invocations/latent.py | 101 --------------- 2 files changed, 125 insertions(+), 101 deletions(-) create mode 100644 invokeai/app/invocations/image_to_latents.py diff --git a/invokeai/app/invocations/image_to_latents.py b/invokeai/app/invocations/image_to_latents.py new file mode 100644 index 00000000000..53c64d8b4eb --- /dev/null +++ b/invokeai/app/invocations/image_to_latents.py @@ -0,0 +1,125 @@ +from functools import singledispatchmethod + +import einops +import torch +from diffusers.models.attention_processor import ( + AttnProcessor2_0, + LoRAAttnProcessor2_0, + LoRAXFormersAttnProcessor, + XFormersAttnProcessor, +) +from diffusers.models.autoencoders.autoencoder_kl import AutoencoderKL +from diffusers.models.autoencoders.autoencoder_tiny import AutoencoderTiny + +from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation +from invokeai.app.invocations.fields import ( + FieldDescriptions, + ImageField, + Input, + InputField, +) +from invokeai.app.invocations.latent import DEFAULT_PRECISION +from invokeai.app.invocations.model import VAEField +from invokeai.app.invocations.primitives import LatentsOutput +from invokeai.app.services.shared.invocation_context import InvocationContext +from invokeai.backend.model_manager import LoadedModel +from invokeai.backend.stable_diffusion.diffusers_pipeline import image_resized_to_grid_as_tensor + + +@invocation( + "i2l", + title="Image to Latents", + tags=["latents", "image", "vae", "i2l"], + category="latents", + version="1.0.2", +) +class ImageToLatentsInvocation(BaseInvocation): + """Encodes an image into latents.""" + + image: ImageField = InputField( + description="The image to encode", + ) + vae: VAEField = InputField( + description=FieldDescriptions.vae, + input=Input.Connection, + ) + tiled: bool = InputField(default=False, description=FieldDescriptions.tiled) + fp32: bool = InputField(default=DEFAULT_PRECISION == "float32", description=FieldDescriptions.fp32) + + @staticmethod + def vae_encode(vae_info: LoadedModel, upcast: bool, tiled: bool, image_tensor: torch.Tensor) -> torch.Tensor: + with vae_info as vae: + assert isinstance(vae, torch.nn.Module) + orig_dtype = vae.dtype + if upcast: + vae.to(dtype=torch.float32) + + use_torch_2_0_or_xformers = hasattr(vae.decoder, "mid_block") and isinstance( + vae.decoder.mid_block.attentions[0].processor, + ( + AttnProcessor2_0, + XFormersAttnProcessor, + LoRAXFormersAttnProcessor, + LoRAAttnProcessor2_0, + ), + ) + # if xformers or torch_2_0 is used attention block does not need + # to be in float32 which can save lots of memory + if use_torch_2_0_or_xformers: + vae.post_quant_conv.to(orig_dtype) + vae.decoder.conv_in.to(orig_dtype) + vae.decoder.mid_block.to(orig_dtype) + # else: + # latents = latents.float() + + else: + vae.to(dtype=torch.float16) + # latents = latents.half() + + if tiled: + vae.enable_tiling() + else: + vae.disable_tiling() + + # non_noised_latents_from_image + image_tensor = image_tensor.to(device=vae.device, dtype=vae.dtype) + with torch.inference_mode(): + latents = ImageToLatentsInvocation._encode_to_tensor(vae, image_tensor) + + latents = vae.config.scaling_factor * latents + latents = latents.to(dtype=orig_dtype) + + return latents + + @torch.no_grad() + def invoke(self, context: InvocationContext) -> LatentsOutput: + image = context.images.get_pil(self.image.image_name) + + vae_info = context.models.load(self.vae.vae) + + image_tensor = image_resized_to_grid_as_tensor(image.convert("RGB")) + if image_tensor.dim() == 3: + image_tensor = einops.rearrange(image_tensor, "c h w -> 1 c h w") + + latents = self.vae_encode(vae_info, self.fp32, self.tiled, image_tensor) + + latents = latents.to("cpu") + name = context.tensors.save(tensor=latents) + return LatentsOutput.build(latents_name=name, latents=latents, seed=None) + + @singledispatchmethod + @staticmethod + def _encode_to_tensor(vae: AutoencoderKL, image_tensor: torch.FloatTensor) -> torch.FloatTensor: + assert isinstance(vae, torch.nn.Module) + image_tensor_dist = vae.encode(image_tensor).latent_dist + latents: torch.Tensor = image_tensor_dist.sample().to( + dtype=vae.dtype + ) # FIXME: uses torch.randn. make reproducible! + return latents + + @_encode_to_tensor.register + @staticmethod + def _(vae: AutoencoderTiny, image_tensor: torch.FloatTensor) -> torch.FloatTensor: + assert isinstance(vae, torch.nn.Module) + latents: torch.FloatTensor = vae.encode(image_tensor).latents + return latents diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py index 79d23199c58..c41dca195e7 100644 --- a/invokeai/app/invocations/latent.py +++ b/invokeai/app/invocations/latent.py @@ -1,10 +1,8 @@ # Copyright (c) 2023 Kyle Schouviller (https://github.com/kyle0654) import inspect from contextlib import ExitStack -from functools import singledispatchmethod from typing import Any, Dict, Iterator, List, Literal, Optional, Tuple, Union -import einops import numpy as np import torch import torchvision @@ -1115,102 +1113,3 @@ def invoke(self, context: InvocationContext) -> ImageOutput: image_dto = context.images.save(image=image) return ImageOutput.build(image_dto) - - -@invocation( - "i2l", - title="Image to Latents", - tags=["latents", "image", "vae", "i2l"], - category="latents", - version="1.0.2", -) -class ImageToLatentsInvocation(BaseInvocation): - """Encodes an image into latents.""" - - image: ImageField = InputField( - description="The image to encode", - ) - vae: VAEField = InputField( - description=FieldDescriptions.vae, - input=Input.Connection, - ) - tiled: bool = InputField(default=False, description=FieldDescriptions.tiled) - fp32: bool = InputField(default=DEFAULT_PRECISION == "float32", description=FieldDescriptions.fp32) - - @staticmethod - def vae_encode(vae_info: LoadedModel, upcast: bool, tiled: bool, image_tensor: torch.Tensor) -> torch.Tensor: - with vae_info as vae: - assert isinstance(vae, torch.nn.Module) - orig_dtype = vae.dtype - if upcast: - vae.to(dtype=torch.float32) - - use_torch_2_0_or_xformers = hasattr(vae.decoder, "mid_block") and isinstance( - vae.decoder.mid_block.attentions[0].processor, - ( - AttnProcessor2_0, - XFormersAttnProcessor, - LoRAXFormersAttnProcessor, - LoRAAttnProcessor2_0, - ), - ) - # if xformers or torch_2_0 is used attention block does not need - # to be in float32 which can save lots of memory - if use_torch_2_0_or_xformers: - vae.post_quant_conv.to(orig_dtype) - vae.decoder.conv_in.to(orig_dtype) - vae.decoder.mid_block.to(orig_dtype) - # else: - # latents = latents.float() - - else: - vae.to(dtype=torch.float16) - # latents = latents.half() - - if tiled: - vae.enable_tiling() - else: - vae.disable_tiling() - - # non_noised_latents_from_image - image_tensor = image_tensor.to(device=vae.device, dtype=vae.dtype) - with torch.inference_mode(): - latents = ImageToLatentsInvocation._encode_to_tensor(vae, image_tensor) - - latents = vae.config.scaling_factor * latents - latents = latents.to(dtype=orig_dtype) - - return latents - - @torch.no_grad() - def invoke(self, context: InvocationContext) -> LatentsOutput: - image = context.images.get_pil(self.image.image_name) - - vae_info = context.models.load(self.vae.vae) - - image_tensor = image_resized_to_grid_as_tensor(image.convert("RGB")) - if image_tensor.dim() == 3: - image_tensor = einops.rearrange(image_tensor, "c h w -> 1 c h w") - - latents = self.vae_encode(vae_info, self.fp32, self.tiled, image_tensor) - - latents = latents.to("cpu") - name = context.tensors.save(tensor=latents) - return LatentsOutput.build(latents_name=name, latents=latents, seed=None) - - @singledispatchmethod - @staticmethod - def _encode_to_tensor(vae: AutoencoderKL, image_tensor: torch.FloatTensor) -> torch.FloatTensor: - assert isinstance(vae, torch.nn.Module) - image_tensor_dist = vae.encode(image_tensor).latent_dist - latents: torch.Tensor = image_tensor_dist.sample().to( - dtype=vae.dtype - ) # FIXME: uses torch.randn. make reproducible! - return latents - - @_encode_to_tensor.register - @staticmethod - def _(vae: AutoencoderTiny, image_tensor: torch.FloatTensor) -> torch.FloatTensor: - assert isinstance(vae, torch.nn.Module) - latents: torch.FloatTensor = vae.encode(image_tensor).latents - return latents From 045caddee134d358f0f4f1b1f95b67ac56520879 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Wed, 5 Jun 2024 13:53:53 -0400 Subject: [PATCH 06/10] Move LatentsToImageInvocation to its own file. No functional changes. --- invokeai/app/invocations/latent.py | 93 +--------------- invokeai/app/invocations/latents_to_image.py | 107 +++++++++++++++++++ 2 files changed, 108 insertions(+), 92 deletions(-) create mode 100644 invokeai/app/invocations/latents_to_image.py diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py index c41dca195e7..7102a0a4eb9 100644 --- a/invokeai/app/invocations/latent.py +++ b/invokeai/app/invocations/latent.py @@ -8,16 +8,7 @@ import torchvision import torchvision.transforms as T from diffusers.configuration_utils import ConfigMixin -from diffusers.image_processor import VaeImageProcessor from diffusers.models.adapter import T2IAdapter -from diffusers.models.attention_processor import ( - AttnProcessor2_0, - LoRAAttnProcessor2_0, - LoRAXFormersAttnProcessor, - XFormersAttnProcessor, -) -from diffusers.models.autoencoders.autoencoder_kl import AutoencoderKL -from diffusers.models.autoencoders.autoencoder_tiny import AutoencoderTiny from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel from diffusers.schedulers.scheduling_dpmsolver_sde import DPMSolverSDEScheduler from diffusers.schedulers.scheduling_tcd import TCDScheduler @@ -38,11 +29,9 @@ LatentsField, OutputField, UIType, - WithBoard, - WithMetadata, ) from invokeai.app.invocations.ip_adapter import IPAdapterField -from invokeai.app.invocations.primitives import DenoiseMaskOutput, ImageOutput, LatentsOutput +from invokeai.app.invocations.primitives import DenoiseMaskOutput, LatentsOutput from invokeai.app.invocations.t2i_adapter import T2IAdapterField from invokeai.app.services.shared.invocation_context import InvocationContext from invokeai.app.util.controlnet_utils import prepare_control_image @@ -1033,83 +1022,3 @@ def _lora_loader() -> Iterator[Tuple[LoRAModelRaw, float]]: name = context.tensors.save(tensor=result_latents) return LatentsOutput.build(latents_name=name, latents=result_latents, seed=None) - - -@invocation( - "l2i", - title="Latents to Image", - tags=["latents", "image", "vae", "l2i"], - category="latents", - version="1.2.2", -) -class LatentsToImageInvocation(BaseInvocation, WithMetadata, WithBoard): - """Generates an image from latents.""" - - latents: LatentsField = InputField( - description=FieldDescriptions.latents, - input=Input.Connection, - ) - vae: VAEField = InputField( - description=FieldDescriptions.vae, - input=Input.Connection, - ) - tiled: bool = InputField(default=False, description=FieldDescriptions.tiled) - fp32: bool = InputField(default=DEFAULT_PRECISION == "float32", description=FieldDescriptions.fp32) - - @torch.no_grad() - def invoke(self, context: InvocationContext) -> ImageOutput: - latents = context.tensors.load(self.latents.latents_name) - - vae_info = context.models.load(self.vae.vae) - assert isinstance(vae_info.model, (UNet2DConditionModel, AutoencoderKL, AutoencoderTiny)) - with set_seamless(vae_info.model, self.vae.seamless_axes), vae_info as vae: - assert isinstance(vae, torch.nn.Module) - latents = latents.to(vae.device) - if self.fp32: - vae.to(dtype=torch.float32) - - use_torch_2_0_or_xformers = hasattr(vae.decoder, "mid_block") and isinstance( - vae.decoder.mid_block.attentions[0].processor, - ( - AttnProcessor2_0, - XFormersAttnProcessor, - LoRAXFormersAttnProcessor, - LoRAAttnProcessor2_0, - ), - ) - # if xformers or torch_2_0 is used attention block does not need - # to be in float32 which can save lots of memory - if use_torch_2_0_or_xformers: - vae.post_quant_conv.to(latents.dtype) - vae.decoder.conv_in.to(latents.dtype) - vae.decoder.mid_block.to(latents.dtype) - else: - latents = latents.float() - - else: - vae.to(dtype=torch.float16) - latents = latents.half() - - if self.tiled or context.config.get().force_tiled_decode: - vae.enable_tiling() - else: - vae.disable_tiling() - - # clear memory as vae decode can request a lot - TorchDevice.empty_cache() - - with torch.inference_mode(): - # copied from diffusers pipeline - latents = latents / vae.config.scaling_factor - image = vae.decode(latents, return_dict=False)[0] - image = (image / 2 + 0.5).clamp(0, 1) # denormalize - # we always cast to float32 as this does not cause significant overhead and is compatible with bfloat16 - np_image = image.cpu().permute(0, 2, 3, 1).float().numpy() - - image = VaeImageProcessor.numpy_to_pil(np_image)[0] - - TorchDevice.empty_cache() - - image_dto = context.images.save(image=image) - - return ImageOutput.build(image_dto) diff --git a/invokeai/app/invocations/latents_to_image.py b/invokeai/app/invocations/latents_to_image.py new file mode 100644 index 00000000000..e5038869dd2 --- /dev/null +++ b/invokeai/app/invocations/latents_to_image.py @@ -0,0 +1,107 @@ +import torch +from diffusers.image_processor import VaeImageProcessor +from diffusers.models.attention_processor import ( + AttnProcessor2_0, + LoRAAttnProcessor2_0, + LoRAXFormersAttnProcessor, + XFormersAttnProcessor, +) +from diffusers.models.autoencoders.autoencoder_kl import AutoencoderKL +from diffusers.models.autoencoders.autoencoder_tiny import AutoencoderTiny +from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel + +from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation +from invokeai.app.invocations.fields import ( + FieldDescriptions, + Input, + InputField, + LatentsField, + WithBoard, + WithMetadata, +) +from invokeai.app.invocations.latent import DEFAULT_PRECISION +from invokeai.app.invocations.model import VAEField +from invokeai.app.invocations.primitives import ImageOutput +from invokeai.app.services.shared.invocation_context import InvocationContext +from invokeai.backend.stable_diffusion import set_seamless +from invokeai.backend.util.devices import TorchDevice + + +@invocation( + "l2i", + title="Latents to Image", + tags=["latents", "image", "vae", "l2i"], + category="latents", + version="1.2.2", +) +class LatentsToImageInvocation(BaseInvocation, WithMetadata, WithBoard): + """Generates an image from latents.""" + + latents: LatentsField = InputField( + description=FieldDescriptions.latents, + input=Input.Connection, + ) + vae: VAEField = InputField( + description=FieldDescriptions.vae, + input=Input.Connection, + ) + tiled: bool = InputField(default=False, description=FieldDescriptions.tiled) + fp32: bool = InputField(default=DEFAULT_PRECISION == "float32", description=FieldDescriptions.fp32) + + @torch.no_grad() + def invoke(self, context: InvocationContext) -> ImageOutput: + latents = context.tensors.load(self.latents.latents_name) + + vae_info = context.models.load(self.vae.vae) + assert isinstance(vae_info.model, (UNet2DConditionModel, AutoencoderKL, AutoencoderTiny)) + with set_seamless(vae_info.model, self.vae.seamless_axes), vae_info as vae: + assert isinstance(vae, torch.nn.Module) + latents = latents.to(vae.device) + if self.fp32: + vae.to(dtype=torch.float32) + + use_torch_2_0_or_xformers = hasattr(vae.decoder, "mid_block") and isinstance( + vae.decoder.mid_block.attentions[0].processor, + ( + AttnProcessor2_0, + XFormersAttnProcessor, + LoRAXFormersAttnProcessor, + LoRAAttnProcessor2_0, + ), + ) + # if xformers or torch_2_0 is used attention block does not need + # to be in float32 which can save lots of memory + if use_torch_2_0_or_xformers: + vae.post_quant_conv.to(latents.dtype) + vae.decoder.conv_in.to(latents.dtype) + vae.decoder.mid_block.to(latents.dtype) + else: + latents = latents.float() + + else: + vae.to(dtype=torch.float16) + latents = latents.half() + + if self.tiled or context.config.get().force_tiled_decode: + vae.enable_tiling() + else: + vae.disable_tiling() + + # clear memory as vae decode can request a lot + TorchDevice.empty_cache() + + with torch.inference_mode(): + # copied from diffusers pipeline + latents = latents / vae.config.scaling_factor + image = vae.decode(latents, return_dict=False)[0] + image = (image / 2 + 0.5).clamp(0, 1) # denormalize + # we always cast to float32 as this does not cause significant overhead and is compatible with bfloat16 + np_image = image.cpu().permute(0, 2, 3, 1).float().numpy() + + image = VaeImageProcessor.numpy_to_pil(np_image)[0] + + TorchDevice.empty_cache() + + image_dto = context.images.save(image=image) + + return ImageOutput.build(image_dto) From fea9013cad127c82cbbf3a91a97121dcc9db263a Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Wed, 5 Jun 2024 14:48:32 -0400 Subject: [PATCH 07/10] Move CreateGradientMaskInvocation to its own file. No functional changes. --- .../app/invocations/create_gradient_mask.py | 138 ++++++++++++++++++ invokeai/app/invocations/latent.py | 122 +--------------- 2 files changed, 141 insertions(+), 119 deletions(-) create mode 100644 invokeai/app/invocations/create_gradient_mask.py diff --git a/invokeai/app/invocations/create_gradient_mask.py b/invokeai/app/invocations/create_gradient_mask.py new file mode 100644 index 00000000000..5d3212caf80 --- /dev/null +++ b/invokeai/app/invocations/create_gradient_mask.py @@ -0,0 +1,138 @@ +from typing import Literal, Optional + +import numpy as np +import torch +import torchvision.transforms as T +from PIL import Image, ImageFilter +from torchvision.transforms.functional import resize as tv_resize + +from invokeai.app.invocations.baseinvocation import BaseInvocation, BaseInvocationOutput, invocation, invocation_output +from invokeai.app.invocations.fields import ( + DenoiseMaskField, + FieldDescriptions, + ImageField, + Input, + InputField, + OutputField, +) +from invokeai.app.invocations.image_to_latents import ImageToLatentsInvocation +from invokeai.app.invocations.latent import DEFAULT_PRECISION +from invokeai.app.invocations.model import UNetField, VAEField +from invokeai.app.services.shared.invocation_context import InvocationContext +from invokeai.backend.model_manager import LoadedModel +from invokeai.backend.model_manager.config import MainConfigBase, ModelVariantType +from invokeai.backend.stable_diffusion.diffusers_pipeline import image_resized_to_grid_as_tensor + + +@invocation_output("gradient_mask_output") +class GradientMaskOutput(BaseInvocationOutput): + """Outputs a denoise mask and an image representing the total gradient of the mask.""" + + denoise_mask: DenoiseMaskField = OutputField(description="Mask for denoise model run") + expanded_mask_area: ImageField = OutputField( + description="Image representing the total gradient area of the mask. For paste-back purposes." + ) + + +@invocation( + "create_gradient_mask", + title="Create Gradient Mask", + tags=["mask", "denoise"], + category="latents", + version="1.1.0", +) +class CreateGradientMaskInvocation(BaseInvocation): + """Creates mask for denoising model run.""" + + mask: ImageField = InputField(default=None, description="Image which will be masked", ui_order=1) + edge_radius: int = InputField( + default=16, ge=0, description="How far to blur/expand the edges of the mask", ui_order=2 + ) + coherence_mode: Literal["Gaussian Blur", "Box Blur", "Staged"] = InputField(default="Gaussian Blur", ui_order=3) + minimum_denoise: float = InputField( + default=0.0, ge=0, le=1, description="Minimum denoise level for the coherence region", ui_order=4 + ) + image: Optional[ImageField] = InputField( + default=None, + description="OPTIONAL: Only connect for specialized Inpainting models, masked_latents will be generated from the image with the VAE", + title="[OPTIONAL] Image", + ui_order=6, + ) + unet: Optional[UNetField] = InputField( + description="OPTIONAL: If the Unet is a specialized Inpainting model, masked_latents will be generated from the image with the VAE", + default=None, + input=Input.Connection, + title="[OPTIONAL] UNet", + ui_order=5, + ) + vae: Optional[VAEField] = InputField( + default=None, + description="OPTIONAL: Only connect for specialized Inpainting models, masked_latents will be generated from the image with the VAE", + title="[OPTIONAL] VAE", + input=Input.Connection, + ui_order=7, + ) + tiled: bool = InputField(default=False, description=FieldDescriptions.tiled, ui_order=8) + fp32: bool = InputField( + default=DEFAULT_PRECISION == "float32", + description=FieldDescriptions.fp32, + ui_order=9, + ) + + @torch.no_grad() + def invoke(self, context: InvocationContext) -> GradientMaskOutput: + mask_image = context.images.get_pil(self.mask.image_name, mode="L") + if self.edge_radius > 0: + if self.coherence_mode == "Box Blur": + blur_mask = mask_image.filter(ImageFilter.BoxBlur(self.edge_radius)) + else: # Gaussian Blur OR Staged + # Gaussian Blur uses standard deviation. 1/2 radius is a good approximation + blur_mask = mask_image.filter(ImageFilter.GaussianBlur(self.edge_radius / 2)) + + blur_tensor: torch.Tensor = image_resized_to_grid_as_tensor(blur_mask, normalize=False) + + # redistribute blur so that the original edges are 0 and blur outwards to 1 + blur_tensor = (blur_tensor - 0.5) * 2 + + threshold = 1 - self.minimum_denoise + + if self.coherence_mode == "Staged": + # wherever the blur_tensor is less than fully masked, convert it to threshold + blur_tensor = torch.where((blur_tensor < 1) & (blur_tensor > 0), threshold, blur_tensor) + else: + # wherever the blur_tensor is above threshold but less than 1, drop it to threshold + blur_tensor = torch.where((blur_tensor > threshold) & (blur_tensor < 1), threshold, blur_tensor) + + else: + blur_tensor: torch.Tensor = image_resized_to_grid_as_tensor(mask_image, normalize=False) + + mask_name = context.tensors.save(tensor=blur_tensor.unsqueeze(1)) + + # compute a [0, 1] mask from the blur_tensor + expanded_mask = torch.where((blur_tensor < 1), 0, 1) + expanded_mask_image = Image.fromarray((expanded_mask.squeeze(0).numpy() * 255).astype(np.uint8), mode="L") + expanded_image_dto = context.images.save(expanded_mask_image) + + masked_latents_name = None + if self.unet is not None and self.vae is not None and self.image is not None: + # all three fields must be present at the same time + main_model_config = context.models.get_config(self.unet.unet.key) + assert isinstance(main_model_config, MainConfigBase) + if main_model_config.variant is ModelVariantType.Inpaint: + mask = blur_tensor + vae_info: LoadedModel = context.models.load(self.vae.vae) + image = context.images.get_pil(self.image.image_name) + image_tensor = image_resized_to_grid_as_tensor(image.convert("RGB")) + if image_tensor.dim() == 3: + image_tensor = image_tensor.unsqueeze(0) + img_mask = tv_resize(mask, image_tensor.shape[-2:], T.InterpolationMode.BILINEAR, antialias=False) + masked_image = image_tensor * torch.where(img_mask < 0.5, 0.0, 1.0) + masked_latents = ImageToLatentsInvocation.vae_encode( + vae_info, self.fp32, self.tiled, masked_image.clone() + ) + masked_latents_name = context.tensors.save(tensor=masked_latents) + + return GradientMaskOutput( + denoise_mask=DenoiseMaskField(mask_name=mask_name, masked_latents_name=masked_latents_name, gradient=True), + expanded_mask_area=ImageField(image_name=expanded_image_dto.image_name), + ) diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py index 7102a0a4eb9..5359d7f92a3 100644 --- a/invokeai/app/invocations/latent.py +++ b/invokeai/app/invocations/latent.py @@ -1,9 +1,8 @@ # Copyright (c) 2023 Kyle Schouviller (https://github.com/kyle0654) import inspect from contextlib import ExitStack -from typing import Any, Dict, Iterator, List, Literal, Optional, Tuple, Union +from typing import Any, Dict, Iterator, List, Optional, Tuple, Union -import numpy as np import torch import torchvision import torchvision.transforms as T @@ -13,7 +12,7 @@ from diffusers.schedulers.scheduling_dpmsolver_sde import DPMSolverSDEScheduler from diffusers.schedulers.scheduling_tcd import TCDScheduler from diffusers.schedulers.scheduling_utils import SchedulerMixin as Scheduler -from PIL import Image, ImageFilter +from PIL import Image from pydantic import field_validator from torchvision.transforms.functional import resize as tv_resize from transformers import CLIPVisionModelWithProjection @@ -37,8 +36,7 @@ from invokeai.app.util.controlnet_utils import prepare_control_image from invokeai.backend.ip_adapter.ip_adapter import IPAdapter from invokeai.backend.lora import LoRAModelRaw -from invokeai.backend.model_manager import BaseModelType, LoadedModel -from invokeai.backend.model_manager.config import MainConfigBase, ModelVariantType +from invokeai.backend.model_manager import BaseModelType from invokeai.backend.model_patcher import ModelPatcher from invokeai.backend.stable_diffusion import PipelineIntermediateState, set_seamless from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ( @@ -158,120 +156,6 @@ def invoke(self, context: InvocationContext) -> DenoiseMaskOutput: ) -@invocation_output("gradient_mask_output") -class GradientMaskOutput(BaseInvocationOutput): - """Outputs a denoise mask and an image representing the total gradient of the mask.""" - - denoise_mask: DenoiseMaskField = OutputField(description="Mask for denoise model run") - expanded_mask_area: ImageField = OutputField( - description="Image representing the total gradient area of the mask. For paste-back purposes." - ) - - -@invocation( - "create_gradient_mask", - title="Create Gradient Mask", - tags=["mask", "denoise"], - category="latents", - version="1.1.0", -) -class CreateGradientMaskInvocation(BaseInvocation): - """Creates mask for denoising model run.""" - - mask: ImageField = InputField(default=None, description="Image which will be masked", ui_order=1) - edge_radius: int = InputField( - default=16, ge=0, description="How far to blur/expand the edges of the mask", ui_order=2 - ) - coherence_mode: Literal["Gaussian Blur", "Box Blur", "Staged"] = InputField(default="Gaussian Blur", ui_order=3) - minimum_denoise: float = InputField( - default=0.0, ge=0, le=1, description="Minimum denoise level for the coherence region", ui_order=4 - ) - image: Optional[ImageField] = InputField( - default=None, - description="OPTIONAL: Only connect for specialized Inpainting models, masked_latents will be generated from the image with the VAE", - title="[OPTIONAL] Image", - ui_order=6, - ) - unet: Optional[UNetField] = InputField( - description="OPTIONAL: If the Unet is a specialized Inpainting model, masked_latents will be generated from the image with the VAE", - default=None, - input=Input.Connection, - title="[OPTIONAL] UNet", - ui_order=5, - ) - vae: Optional[VAEField] = InputField( - default=None, - description="OPTIONAL: Only connect for specialized Inpainting models, masked_latents will be generated from the image with the VAE", - title="[OPTIONAL] VAE", - input=Input.Connection, - ui_order=7, - ) - tiled: bool = InputField(default=False, description=FieldDescriptions.tiled, ui_order=8) - fp32: bool = InputField( - default=DEFAULT_PRECISION == "float32", - description=FieldDescriptions.fp32, - ui_order=9, - ) - - @torch.no_grad() - def invoke(self, context: InvocationContext) -> GradientMaskOutput: - mask_image = context.images.get_pil(self.mask.image_name, mode="L") - if self.edge_radius > 0: - if self.coherence_mode == "Box Blur": - blur_mask = mask_image.filter(ImageFilter.BoxBlur(self.edge_radius)) - else: # Gaussian Blur OR Staged - # Gaussian Blur uses standard deviation. 1/2 radius is a good approximation - blur_mask = mask_image.filter(ImageFilter.GaussianBlur(self.edge_radius / 2)) - - blur_tensor: torch.Tensor = image_resized_to_grid_as_tensor(blur_mask, normalize=False) - - # redistribute blur so that the original edges are 0 and blur outwards to 1 - blur_tensor = (blur_tensor - 0.5) * 2 - - threshold = 1 - self.minimum_denoise - - if self.coherence_mode == "Staged": - # wherever the blur_tensor is less than fully masked, convert it to threshold - blur_tensor = torch.where((blur_tensor < 1) & (blur_tensor > 0), threshold, blur_tensor) - else: - # wherever the blur_tensor is above threshold but less than 1, drop it to threshold - blur_tensor = torch.where((blur_tensor > threshold) & (blur_tensor < 1), threshold, blur_tensor) - - else: - blur_tensor: torch.Tensor = image_resized_to_grid_as_tensor(mask_image, normalize=False) - - mask_name = context.tensors.save(tensor=blur_tensor.unsqueeze(1)) - - # compute a [0, 1] mask from the blur_tensor - expanded_mask = torch.where((blur_tensor < 1), 0, 1) - expanded_mask_image = Image.fromarray((expanded_mask.squeeze(0).numpy() * 255).astype(np.uint8), mode="L") - expanded_image_dto = context.images.save(expanded_mask_image) - - masked_latents_name = None - if self.unet is not None and self.vae is not None and self.image is not None: - # all three fields must be present at the same time - main_model_config = context.models.get_config(self.unet.unet.key) - assert isinstance(main_model_config, MainConfigBase) - if main_model_config.variant is ModelVariantType.Inpaint: - mask = blur_tensor - vae_info: LoadedModel = context.models.load(self.vae.vae) - image = context.images.get_pil(self.image.image_name) - image_tensor = image_resized_to_grid_as_tensor(image.convert("RGB")) - if image_tensor.dim() == 3: - image_tensor = image_tensor.unsqueeze(0) - img_mask = tv_resize(mask, image_tensor.shape[-2:], T.InterpolationMode.BILINEAR, antialias=False) - masked_image = image_tensor * torch.where(img_mask < 0.5, 0.0, 1.0) - masked_latents = ImageToLatentsInvocation.vae_encode( - vae_info, self.fp32, self.tiled, masked_image.clone() - ) - masked_latents_name = context.tensors.save(tensor=masked_latents) - - return GradientMaskOutput( - denoise_mask=DenoiseMaskField(mask_name=mask_name, masked_latents_name=masked_latents_name, gradient=True), - expanded_mask_area=ImageField(image_name=expanded_image_dto.image_name), - ) - - def get_scheduler( context: InvocationContext, scheduler_info: ModelIdentifierField, From 854bca668a2258e8fbccde427bb8b1e69fcafba9 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Wed, 5 Jun 2024 14:59:45 -0400 Subject: [PATCH 08/10] Move CreateDenoiseMaskInvocation to its own file. No functional changes. --- .../app/invocations/create_denoise_mask.py | 80 +++++++++++++++++++ invokeai/app/invocations/latent.py | 72 +---------------- 2 files changed, 82 insertions(+), 70 deletions(-) create mode 100644 invokeai/app/invocations/create_denoise_mask.py diff --git a/invokeai/app/invocations/create_denoise_mask.py b/invokeai/app/invocations/create_denoise_mask.py new file mode 100644 index 00000000000..d6763a88c3a --- /dev/null +++ b/invokeai/app/invocations/create_denoise_mask.py @@ -0,0 +1,80 @@ +from typing import Optional + +import torch +import torchvision.transforms as T +from PIL import Image +from torchvision.transforms.functional import resize as tv_resize + +from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation +from invokeai.app.invocations.fields import FieldDescriptions, ImageField, Input, InputField +from invokeai.app.invocations.image_to_latents import ImageToLatentsInvocation +from invokeai.app.invocations.latent import DEFAULT_PRECISION +from invokeai.app.invocations.model import VAEField +from invokeai.app.invocations.primitives import DenoiseMaskOutput +from invokeai.app.services.shared.invocation_context import InvocationContext +from invokeai.backend.stable_diffusion.diffusers_pipeline import image_resized_to_grid_as_tensor + + +@invocation( + "create_denoise_mask", + title="Create Denoise Mask", + tags=["mask", "denoise"], + category="latents", + version="1.0.2", +) +class CreateDenoiseMaskInvocation(BaseInvocation): + """Creates mask for denoising model run.""" + + vae: VAEField = InputField(description=FieldDescriptions.vae, input=Input.Connection, ui_order=0) + image: Optional[ImageField] = InputField(default=None, description="Image which will be masked", ui_order=1) + mask: ImageField = InputField(description="The mask to use when pasting", ui_order=2) + tiled: bool = InputField(default=False, description=FieldDescriptions.tiled, ui_order=3) + fp32: bool = InputField( + default=DEFAULT_PRECISION == "float32", + description=FieldDescriptions.fp32, + ui_order=4, + ) + + def prep_mask_tensor(self, mask_image: Image.Image) -> torch.Tensor: + if mask_image.mode != "L": + mask_image = mask_image.convert("L") + mask_tensor: torch.Tensor = image_resized_to_grid_as_tensor(mask_image, normalize=False) + if mask_tensor.dim() == 3: + mask_tensor = mask_tensor.unsqueeze(0) + # if shape is not None: + # mask_tensor = tv_resize(mask_tensor, shape, T.InterpolationMode.BILINEAR) + return mask_tensor + + @torch.no_grad() + def invoke(self, context: InvocationContext) -> DenoiseMaskOutput: + if self.image is not None: + image = context.images.get_pil(self.image.image_name) + image_tensor = image_resized_to_grid_as_tensor(image.convert("RGB")) + if image_tensor.dim() == 3: + image_tensor = image_tensor.unsqueeze(0) + else: + image_tensor = None + + mask = self.prep_mask_tensor( + context.images.get_pil(self.mask.image_name), + ) + + if image_tensor is not None: + vae_info = context.models.load(self.vae.vae) + + img_mask = tv_resize(mask, image_tensor.shape[-2:], T.InterpolationMode.BILINEAR, antialias=False) + masked_image = image_tensor * torch.where(img_mask < 0.5, 0.0, 1.0) + # TODO: + masked_latents = ImageToLatentsInvocation.vae_encode(vae_info, self.fp32, self.tiled, masked_image.clone()) + + masked_latents_name = context.tensors.save(tensor=masked_latents) + else: + masked_latents_name = None + + mask_name = context.tensors.save(tensor=mask) + + return DenoiseMaskOutput.build( + mask_name=mask_name, + masked_latents_name=masked_latents_name, + gradient=False, + ) diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py index 5359d7f92a3..deeb484aead 100644 --- a/invokeai/app/invocations/latent.py +++ b/invokeai/app/invocations/latent.py @@ -12,7 +12,6 @@ from diffusers.schedulers.scheduling_dpmsolver_sde import DPMSolverSDEScheduler from diffusers.schedulers.scheduling_tcd import TCDScheduler from diffusers.schedulers.scheduling_utils import SchedulerMixin as Scheduler -from PIL import Image from pydantic import field_validator from torchvision.transforms.functional import resize as tv_resize from transformers import CLIPVisionModelWithProjection @@ -22,7 +21,6 @@ ConditioningField, DenoiseMaskField, FieldDescriptions, - ImageField, Input, InputField, LatentsField, @@ -30,7 +28,7 @@ UIType, ) from invokeai.app.invocations.ip_adapter import IPAdapterField -from invokeai.app.invocations.primitives import DenoiseMaskOutput, LatentsOutput +from invokeai.app.invocations.primitives import LatentsOutput from invokeai.app.invocations.t2i_adapter import T2IAdapterField from invokeai.app.services.shared.invocation_context import InvocationContext from invokeai.app.util.controlnet_utils import prepare_control_image @@ -55,13 +53,12 @@ ControlNetData, StableDiffusionGeneratorPipeline, T2IAdapterData, - image_resized_to_grid_as_tensor, ) from ...backend.stable_diffusion.schedulers import SCHEDULER_MAP from ...backend.util.devices import TorchDevice from .baseinvocation import BaseInvocation, BaseInvocationOutput, invocation, invocation_output from .controlnet_image_processors import ControlField -from .model import ModelIdentifierField, UNetField, VAEField +from .model import ModelIdentifierField, UNetField DEFAULT_PRECISION = TorchDevice.choose_torch_dtype() @@ -91,71 +88,6 @@ def invoke(self, context: InvocationContext) -> SchedulerOutput: return SchedulerOutput(scheduler=self.scheduler) -@invocation( - "create_denoise_mask", - title="Create Denoise Mask", - tags=["mask", "denoise"], - category="latents", - version="1.0.2", -) -class CreateDenoiseMaskInvocation(BaseInvocation): - """Creates mask for denoising model run.""" - - vae: VAEField = InputField(description=FieldDescriptions.vae, input=Input.Connection, ui_order=0) - image: Optional[ImageField] = InputField(default=None, description="Image which will be masked", ui_order=1) - mask: ImageField = InputField(description="The mask to use when pasting", ui_order=2) - tiled: bool = InputField(default=False, description=FieldDescriptions.tiled, ui_order=3) - fp32: bool = InputField( - default=DEFAULT_PRECISION == "float32", - description=FieldDescriptions.fp32, - ui_order=4, - ) - - def prep_mask_tensor(self, mask_image: Image.Image) -> torch.Tensor: - if mask_image.mode != "L": - mask_image = mask_image.convert("L") - mask_tensor: torch.Tensor = image_resized_to_grid_as_tensor(mask_image, normalize=False) - if mask_tensor.dim() == 3: - mask_tensor = mask_tensor.unsqueeze(0) - # if shape is not None: - # mask_tensor = tv_resize(mask_tensor, shape, T.InterpolationMode.BILINEAR) - return mask_tensor - - @torch.no_grad() - def invoke(self, context: InvocationContext) -> DenoiseMaskOutput: - if self.image is not None: - image = context.images.get_pil(self.image.image_name) - image_tensor = image_resized_to_grid_as_tensor(image.convert("RGB")) - if image_tensor.dim() == 3: - image_tensor = image_tensor.unsqueeze(0) - else: - image_tensor = None - - mask = self.prep_mask_tensor( - context.images.get_pil(self.mask.image_name), - ) - - if image_tensor is not None: - vae_info = context.models.load(self.vae.vae) - - img_mask = tv_resize(mask, image_tensor.shape[-2:], T.InterpolationMode.BILINEAR, antialias=False) - masked_image = image_tensor * torch.where(img_mask < 0.5, 0.0, 1.0) - # TODO: - masked_latents = ImageToLatentsInvocation.vae_encode(vae_info, self.fp32, self.tiled, masked_image.clone()) - - masked_latents_name = context.tensors.save(tensor=masked_latents) - else: - masked_latents_name = None - - mask_name = context.tensors.save(tensor=mask) - - return DenoiseMaskOutput.build( - mask_name=mask_name, - masked_latents_name=masked_latents_name, - gradient=False, - ) - - def get_scheduler( context: InvocationContext, scheduler_info: ModelIdentifierField, From 8f1afc032a5496186ebd7e5e97464f94c62313b8 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Wed, 5 Jun 2024 17:18:39 -0400 Subject: [PATCH 09/10] Move SchedulerInvocation to a new file. No functional changes. --- invokeai/app/invocations/latent.py | 28 +--------------------- invokeai/app/invocations/scheduler.py | 34 +++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 27 deletions(-) create mode 100644 invokeai/app/invocations/scheduler.py diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py index deeb484aead..3851caa6474 100644 --- a/invokeai/app/invocations/latent.py +++ b/invokeai/app/invocations/latent.py @@ -24,7 +24,6 @@ Input, InputField, LatentsField, - OutputField, UIType, ) from invokeai.app.invocations.ip_adapter import IPAdapterField @@ -56,38 +55,13 @@ ) from ...backend.stable_diffusion.schedulers import SCHEDULER_MAP from ...backend.util.devices import TorchDevice -from .baseinvocation import BaseInvocation, BaseInvocationOutput, invocation, invocation_output +from .baseinvocation import BaseInvocation, invocation from .controlnet_image_processors import ControlField from .model import ModelIdentifierField, UNetField DEFAULT_PRECISION = TorchDevice.choose_torch_dtype() -@invocation_output("scheduler_output") -class SchedulerOutput(BaseInvocationOutput): - scheduler: SCHEDULER_NAME_VALUES = OutputField(description=FieldDescriptions.scheduler, ui_type=UIType.Scheduler) - - -@invocation( - "scheduler", - title="Scheduler", - tags=["scheduler"], - category="latents", - version="1.0.0", -) -class SchedulerInvocation(BaseInvocation): - """Selects a scheduler.""" - - scheduler: SCHEDULER_NAME_VALUES = InputField( - default="euler", - description=FieldDescriptions.scheduler, - ui_type=UIType.Scheduler, - ) - - def invoke(self, context: InvocationContext) -> SchedulerOutput: - return SchedulerOutput(scheduler=self.scheduler) - - def get_scheduler( context: InvocationContext, scheduler_info: ModelIdentifierField, diff --git a/invokeai/app/invocations/scheduler.py b/invokeai/app/invocations/scheduler.py new file mode 100644 index 00000000000..52af20378ef --- /dev/null +++ b/invokeai/app/invocations/scheduler.py @@ -0,0 +1,34 @@ +from invokeai.app.invocations.baseinvocation import BaseInvocation, BaseInvocationOutput, invocation, invocation_output +from invokeai.app.invocations.constants import SCHEDULER_NAME_VALUES +from invokeai.app.invocations.fields import ( + FieldDescriptions, + InputField, + OutputField, + UIType, +) +from invokeai.app.services.shared.invocation_context import InvocationContext + + +@invocation_output("scheduler_output") +class SchedulerOutput(BaseInvocationOutput): + scheduler: SCHEDULER_NAME_VALUES = OutputField(description=FieldDescriptions.scheduler, ui_type=UIType.Scheduler) + + +@invocation( + "scheduler", + title="Scheduler", + tags=["scheduler"], + category="latents", + version="1.0.0", +) +class SchedulerInvocation(BaseInvocation): + """Selects a scheduler.""" + + scheduler: SCHEDULER_NAME_VALUES = InputField( + default="euler", + description=FieldDescriptions.scheduler, + ui_type=UIType.Scheduler, + ) + + def invoke(self, context: InvocationContext) -> SchedulerOutput: + return SchedulerOutput(scheduler=self.scheduler) From 52c0c4a32f989e1d25a244c5ad094b7ec45cca47 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Thu, 6 Jun 2024 13:43:58 -0400 Subject: [PATCH 10/10] Rename latent.py -> denoise_latents.py. --- invokeai/app/invocations/create_denoise_mask.py | 2 +- invokeai/app/invocations/create_gradient_mask.py | 2 +- invokeai/app/invocations/{latent.py => denoise_latents.py} | 0 invokeai/app/invocations/image_to_latents.py | 2 +- invokeai/app/invocations/latents_to_image.py | 2 +- invokeai/invocation_api/__init__.py | 4 ++-- pyproject.toml | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) rename invokeai/app/invocations/{latent.py => denoise_latents.py} (100%) diff --git a/invokeai/app/invocations/create_denoise_mask.py b/invokeai/app/invocations/create_denoise_mask.py index d6763a88c3a..d128e0efec6 100644 --- a/invokeai/app/invocations/create_denoise_mask.py +++ b/invokeai/app/invocations/create_denoise_mask.py @@ -6,9 +6,9 @@ from torchvision.transforms.functional import resize as tv_resize from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation +from invokeai.app.invocations.denoise_latents import DEFAULT_PRECISION from invokeai.app.invocations.fields import FieldDescriptions, ImageField, Input, InputField from invokeai.app.invocations.image_to_latents import ImageToLatentsInvocation -from invokeai.app.invocations.latent import DEFAULT_PRECISION from invokeai.app.invocations.model import VAEField from invokeai.app.invocations.primitives import DenoiseMaskOutput from invokeai.app.services.shared.invocation_context import InvocationContext diff --git a/invokeai/app/invocations/create_gradient_mask.py b/invokeai/app/invocations/create_gradient_mask.py index 5d3212caf80..2d2b13fdcc2 100644 --- a/invokeai/app/invocations/create_gradient_mask.py +++ b/invokeai/app/invocations/create_gradient_mask.py @@ -7,6 +7,7 @@ from torchvision.transforms.functional import resize as tv_resize from invokeai.app.invocations.baseinvocation import BaseInvocation, BaseInvocationOutput, invocation, invocation_output +from invokeai.app.invocations.denoise_latents import DEFAULT_PRECISION from invokeai.app.invocations.fields import ( DenoiseMaskField, FieldDescriptions, @@ -16,7 +17,6 @@ OutputField, ) from invokeai.app.invocations.image_to_latents import ImageToLatentsInvocation -from invokeai.app.invocations.latent import DEFAULT_PRECISION from invokeai.app.invocations.model import UNetField, VAEField from invokeai.app.services.shared.invocation_context import InvocationContext from invokeai.backend.model_manager import LoadedModel diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/denoise_latents.py similarity index 100% rename from invokeai/app/invocations/latent.py rename to invokeai/app/invocations/denoise_latents.py diff --git a/invokeai/app/invocations/image_to_latents.py b/invokeai/app/invocations/image_to_latents.py index 53c64d8b4eb..bf2eb414e17 100644 --- a/invokeai/app/invocations/image_to_latents.py +++ b/invokeai/app/invocations/image_to_latents.py @@ -12,13 +12,13 @@ from diffusers.models.autoencoders.autoencoder_tiny import AutoencoderTiny from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation +from invokeai.app.invocations.denoise_latents import DEFAULT_PRECISION from invokeai.app.invocations.fields import ( FieldDescriptions, ImageField, Input, InputField, ) -from invokeai.app.invocations.latent import DEFAULT_PRECISION from invokeai.app.invocations.model import VAEField from invokeai.app.invocations.primitives import LatentsOutput from invokeai.app.services.shared.invocation_context import InvocationContext diff --git a/invokeai/app/invocations/latents_to_image.py b/invokeai/app/invocations/latents_to_image.py index e5038869dd2..648ee7ac68d 100644 --- a/invokeai/app/invocations/latents_to_image.py +++ b/invokeai/app/invocations/latents_to_image.py @@ -11,6 +11,7 @@ from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation +from invokeai.app.invocations.denoise_latents import DEFAULT_PRECISION from invokeai.app.invocations.fields import ( FieldDescriptions, Input, @@ -19,7 +20,6 @@ WithBoard, WithMetadata, ) -from invokeai.app.invocations.latent import DEFAULT_PRECISION from invokeai.app.invocations.model import VAEField from invokeai.app.invocations.primitives import ImageOutput from invokeai.app.services.shared.invocation_context import InvocationContext diff --git a/invokeai/invocation_api/__init__.py b/invokeai/invocation_api/__init__.py index 4eb78cf1eea..8f7452bb4b5 100644 --- a/invokeai/invocation_api/__init__.py +++ b/invokeai/invocation_api/__init__.py @@ -12,6 +12,7 @@ invocation_output, ) from invokeai.app.invocations.constants import SCHEDULER_NAME_VALUES +from invokeai.app.invocations.denoise_latents import SchedulerOutput from invokeai.app.invocations.fields import ( BoardField, ColorField, @@ -31,7 +32,6 @@ WithMetadata, WithWorkflow, ) -from invokeai.app.invocations.latent import SchedulerOutput from invokeai.app.invocations.metadata import MetadataItemField, MetadataItemOutput, MetadataOutput from invokeai.app.invocations.model import ( CLIPField, @@ -108,7 +108,7 @@ "WithBoard", "WithMetadata", "WithWorkflow", - # invokeai.app.invocations.latent + # invokeai.app.invocations.scheduler "SchedulerOutput", # invokeai.app.invocations.metadata "MetadataItemField", diff --git a/pyproject.toml b/pyproject.toml index bb30747ba84..fcc0aff60cb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -224,7 +224,7 @@ follow_imports = "skip" # skips type checking of the modules listed below module = [ "invokeai.app.api.routers.models", "invokeai.app.invocations.compel", - "invokeai.app.invocations.latent", + "invokeai.app.invocations.denoise_latents", "invokeai.app.services.invocation_stats.invocation_stats_default", "invokeai.app.services.model_manager.model_manager_base", "invokeai.app.services.model_manager.model_manager_default",