From 5256f2650d7ab939a563c2091c7a8188cc18a920 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Tue, 6 Aug 2024 11:52:05 -0400 Subject: [PATCH 001/113] Bump diffusers version to include FLUX support. --- pyproject.toml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 37ff1936edf..23325a59ab7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,11 +33,12 @@ classifiers = [ ] dependencies = [ # Core generation dependencies, pinned for reproducible builds. - "accelerate==0.30.1", + "accelerate==0.33.0", "clip_anytorch==2.6.0", # replacing "clip @ https://github.com/openai/CLIP/archive/eaa22acb90a5876642d0507623e859909230a52d.zip", "compel==2.0.2", "controlnet-aux==0.0.7", - "diffusers[torch]==0.27.2", + # TODO(ryand): Bump this once the next diffusers release is ready. + "diffusers[torch] @ git+https://github.com/huggingface/diffusers.git@4c6152c2fb0ade468aadb417102605a07a8635d3", "invisible-watermark==0.2.0", # needed to install SDXL base and refiner using their repo_ids "mediapipe==0.10.7", # needed for "mediapipeface" controlnet model "numpy==1.26.4", # >1.24.0 is needed to use the 'strict' argument to np.testing.assert_array_equal() @@ -57,7 +58,7 @@ dependencies = [ # Core application dependencies, pinned for reproducible builds. "fastapi-events==0.11.1", "fastapi==0.111.0", - "huggingface-hub==0.23.1", + "huggingface-hub==0.24.5", "pydantic-settings==2.2.1", "pydantic==2.7.2", "python-socketio==5.11.1", From 562c2cce993fc4afd0e5d8bb2587ed9a17136423 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Tue, 6 Aug 2024 17:56:36 +0000 Subject: [PATCH 002/113] Update imports for compatibility with bumped diffusers version. --- invokeai/backend/util/hotfixes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/invokeai/backend/util/hotfixes.py b/invokeai/backend/util/hotfixes.py index 7e362fe9589..db9d19cf48c 100644 --- a/invokeai/backend/util/hotfixes.py +++ b/invokeai/backend/util/hotfixes.py @@ -3,7 +3,7 @@ import diffusers import torch from diffusers.configuration_utils import ConfigMixin, register_to_config -from diffusers.loaders import FromOriginalControlNetMixin +from diffusers.loaders.single_file_model import FromOriginalModelMixin from diffusers.models.attention_processor import AttentionProcessor, AttnProcessor from diffusers.models.controlnet import ControlNetConditioningEmbedding, ControlNetOutput, zero_module from diffusers.models.embeddings import ( @@ -32,7 +32,7 @@ logger = InvokeAILogger.get_logger(__name__) -class ControlNetModel(ModelMixin, ConfigMixin, FromOriginalControlNetMixin): +class ControlNetModel(ModelMixin, ConfigMixin, FromOriginalModelMixin): """ A ControlNet model. From b617631ee5805b739cbeeff81ee604d53083c95e Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Tue, 6 Aug 2024 19:34:49 +0000 Subject: [PATCH 003/113] Update HF download logic to work for black-forest-labs/FLUX.1-schnell. --- .../model_manager/util/select_hf_files.py | 24 +++++- .../util/test_hf_model_select.py | 77 +++++++++++++++++++ 2 files changed, 99 insertions(+), 2 deletions(-) diff --git a/invokeai/backend/model_manager/util/select_hf_files.py b/invokeai/backend/model_manager/util/select_hf_files.py index b0a95514378..2e86d9a62e7 100644 --- a/invokeai/backend/model_manager/util/select_hf_files.py +++ b/invokeai/backend/model_manager/util/select_hf_files.py @@ -54,6 +54,7 @@ def filter_files( "lora_weights.safetensors", "weights.pb", "onnx_data", + "spiece.model", # Added for `black-forest-labs/FLUX.1-schnell`. ) ): paths.append(file) @@ -62,7 +63,7 @@ def filter_files( # downloading random checkpoints that might also be in the repo. However there is no guarantee # that a checkpoint doesn't contain "model" in its name, and no guarantee that future diffusers models # will adhere to this naming convention, so this is an area to be careful of. - elif re.search(r"model(\.[^.]+)?\.(safetensors|bin|onnx|xml|pth|pt|ckpt|msgpack)$", file.name): + elif re.search(r"model.*\.(safetensors|bin|onnx|xml|pth|pt|ckpt|msgpack)$", file.name): paths.append(file) # limit search to subfolder if requested @@ -97,7 +98,9 @@ def _filter_by_variant(files: List[Path], variant: ModelRepoVariant) -> Set[Path if variant == ModelRepoVariant.Flax: result.add(path) - elif path.suffix in [".json", ".txt"]: + # Note: '.model' was added to support: + # https://huggingface.co/black-forest-labs/FLUX.1-schnell/blob/768d12a373ed5cc9ef9a9dea7504dc09fcc14842/tokenizer_2/spiece.model + elif path.suffix in [".json", ".txt", ".model"]: result.add(path) elif variant in [ @@ -140,6 +143,23 @@ def _filter_by_variant(files: List[Path], variant: ModelRepoVariant) -> Set[Path continue for candidate_list in subfolder_weights.values(): + # Check if at least one of the files has the explicit fp16 variant. + at_least_one_fp16 = False + for candidate in candidate_list: + if len(candidate.path.suffixes) == 2 and candidate.path.suffixes[0] == ".fp16": + at_least_one_fp16 = True + break + + if not at_least_one_fp16: + # If none of the candidates in this candidate_list have the explicit fp16 variant label, then this + # candidate_list probably doesn't adhere to the variant naming convention that we expected. In this case, + # we'll simply keep all the candidates. An example of a model that hits this case is + # `black-forest-labs/FLUX.1-schnell` (as of commit 012d2fd). + for candidate in candidate_list: + result.add(candidate.path) + + # The candidate_list seems to have the expected variant naming convention. We'll select the highest scoring + # candidate. highest_score_candidate = max(candidate_list, key=lambda candidate: candidate.score) if highest_score_candidate: result.add(highest_score_candidate.path) diff --git a/tests/backend/model_manager/util/test_hf_model_select.py b/tests/backend/model_manager/util/test_hf_model_select.py index a29827e8c43..8b5a395fdbe 100644 --- a/tests/backend/model_manager/util/test_hf_model_select.py +++ b/tests/backend/model_manager/util/test_hf_model_select.py @@ -326,3 +326,80 @@ def test_select_multiple_weights( ) -> None: filtered_files = filter_files(sd15_test_files, variant) assert set(filtered_files) == {Path(f) for f in expected_files} + + +@pytest.fixture +def flux_schnell_test_files() -> list[Path]: + return [ + Path(f) + for f in [ + "FLUX.1-schnell/.gitattributes", + "FLUX.1-schnell/README.md", + "FLUX.1-schnell/ae.safetensors", + "FLUX.1-schnell/flux1-schnell.safetensors", + "FLUX.1-schnell/model_index.json", + "FLUX.1-schnell/scheduler/scheduler_config.json", + "FLUX.1-schnell/schnell_grid.jpeg", + "FLUX.1-schnell/text_encoder/config.json", + "FLUX.1-schnell/text_encoder/model.safetensors", + "FLUX.1-schnell/text_encoder_2/config.json", + "FLUX.1-schnell/text_encoder_2/model-00001-of-00002.safetensors", + "FLUX.1-schnell/text_encoder_2/model-00002-of-00002.safetensors", + "FLUX.1-schnell/text_encoder_2/model.safetensors.index.json", + "FLUX.1-schnell/tokenizer/merges.txt", + "FLUX.1-schnell/tokenizer/special_tokens_map.json", + "FLUX.1-schnell/tokenizer/tokenizer_config.json", + "FLUX.1-schnell/tokenizer/vocab.json", + "FLUX.1-schnell/tokenizer_2/special_tokens_map.json", + "FLUX.1-schnell/tokenizer_2/spiece.model", + "FLUX.1-schnell/tokenizer_2/tokenizer.json", + "FLUX.1-schnell/tokenizer_2/tokenizer_config.json", + "FLUX.1-schnell/transformer/config.json", + "FLUX.1-schnell/transformer/diffusion_pytorch_model-00001-of-00003.safetensors", + "FLUX.1-schnell/transformer/diffusion_pytorch_model-00002-of-00003.safetensors", + "FLUX.1-schnell/transformer/diffusion_pytorch_model-00003-of-00003.safetensors", + "FLUX.1-schnell/transformer/diffusion_pytorch_model.safetensors.index.json", + "FLUX.1-schnell/vae/config.json", + "FLUX.1-schnell/vae/diffusion_pytorch_model.safetensors", + ] + ] + + +@pytest.mark.parametrize( + ["variant", "expected_files"], + [ + ( + ModelRepoVariant.Default, + [ + "FLUX.1-schnell/model_index.json", + "FLUX.1-schnell/scheduler/scheduler_config.json", + "FLUX.1-schnell/text_encoder/config.json", + "FLUX.1-schnell/text_encoder/model.safetensors", + "FLUX.1-schnell/text_encoder_2/config.json", + "FLUX.1-schnell/text_encoder_2/model-00001-of-00002.safetensors", + "FLUX.1-schnell/text_encoder_2/model-00002-of-00002.safetensors", + "FLUX.1-schnell/text_encoder_2/model.safetensors.index.json", + "FLUX.1-schnell/tokenizer/merges.txt", + "FLUX.1-schnell/tokenizer/special_tokens_map.json", + "FLUX.1-schnell/tokenizer/tokenizer_config.json", + "FLUX.1-schnell/tokenizer/vocab.json", + "FLUX.1-schnell/tokenizer_2/special_tokens_map.json", + "FLUX.1-schnell/tokenizer_2/spiece.model", + "FLUX.1-schnell/tokenizer_2/tokenizer.json", + "FLUX.1-schnell/tokenizer_2/tokenizer_config.json", + "FLUX.1-schnell/transformer/config.json", + "FLUX.1-schnell/transformer/diffusion_pytorch_model-00001-of-00003.safetensors", + "FLUX.1-schnell/transformer/diffusion_pytorch_model-00002-of-00003.safetensors", + "FLUX.1-schnell/transformer/diffusion_pytorch_model-00003-of-00003.safetensors", + "FLUX.1-schnell/transformer/diffusion_pytorch_model.safetensors.index.json", + "FLUX.1-schnell/vae/config.json", + "FLUX.1-schnell/vae/diffusion_pytorch_model.safetensors", + ], + ), + ], +) +def test_select_flux_schnell_files( + flux_schnell_test_files: list[Path], variant: ModelRepoVariant, expected_files: list[str] +) -> None: + filtered_files = filter_files(flux_schnell_test_files, variant) + assert set(filtered_files) == {Path(f) for f in expected_files} From 6a068cca1bce2614849a4f55aa2f6b5321eac9fd Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Tue, 6 Aug 2024 21:51:22 +0000 Subject: [PATCH 004/113] First draft of FluxTextToImageInvocation. --- .../app/invocations/flux_text_to_image.py | 201 ++++++++++++++++++ 1 file changed, 201 insertions(+) create mode 100644 invokeai/app/invocations/flux_text_to_image.py diff --git a/invokeai/app/invocations/flux_text_to_image.py b/invokeai/app/invocations/flux_text_to_image.py new file mode 100644 index 00000000000..f229542a9a9 --- /dev/null +++ b/invokeai/app/invocations/flux_text_to_image.py @@ -0,0 +1,201 @@ +from pathlib import Path +from typing import Literal + +import torch +from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler +from diffusers.models.transformers.transformer_flux import FluxTransformer2DModel +from diffusers.pipelines.flux import FluxPipeline +from PIL import Image +from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast + +from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation +from invokeai.app.invocations.fields import InputField, WithBoard, WithMetadata +from invokeai.app.invocations.primitives import ImageOutput +from invokeai.app.services.shared.invocation_context import InvocationContext +from invokeai.backend.util.devices import TorchDevice + +TFluxModelKeys = Literal["flux-schnell"] +FLUX_MODELS: dict[TFluxModelKeys, str] = {"flux-schnell": "black-forest-labs/FLUX.1-schnell"} + + +@invocation( + "flux_text_to_image", + title="FLUX Text to Image", + tags=["image"], + category="image", + version="1.0.0", +) +class FluxTextToImageInvocation(BaseInvocation, WithMetadata, WithBoard): + """Text-to-image generation using a FLUX model.""" + + model: TFluxModelKeys = InputField(description="The FLUX model to use for text-to-image generation.") + positive_prompt: str = InputField(description="Positive prompt for text-to-image generation.") + width: int = InputField(default=1024, multiple_of=16, description="Width of the generated image.") + height: int = InputField(default=1024, multiple_of=16, description="Height of the generated image.") + num_steps: int = InputField(default=4, description="Number of diffusion steps.") + guidance: float = InputField( + default=4.0, + description="The guidance strength. Higher values adhere more strictly to the prompt, and will produce less diverse images.", + ) + seed: int = InputField(default=0, description="Randomness seed for reproducibility.") + + @torch.no_grad() + def invoke(self, context: InvocationContext) -> ImageOutput: + model_path = context.models.download_and_cache_model(FLUX_MODELS[self.model]) + + clip_embeddings = self._run_clip_text_encoder(context, model_path) + t5_embeddings = self._run_t5_text_encoder(context, model_path) + latents = self._run_diffusion(context, model_path, clip_embeddings, t5_embeddings) + image = self._run_vae_decoding(context, model_path, latents) + image_dto = context.images.save(image=image) + return ImageOutput.build(image_dto) + + def _run_clip_text_encoder(self, context: InvocationContext, flux_model_dir: Path) -> torch.Tensor: + """Run the CLIP text encoder.""" + tokenizer_path = flux_model_dir / "tokenizer" + tokenizer = CLIPTokenizer.from_pretrained(tokenizer_path, local_files_only=True) + assert isinstance(tokenizer, CLIPTokenizer) + + text_encoder_path = flux_model_dir / "text_encoder" + with context.models.load_local_model( + model_path=text_encoder_path, loader=self._load_flux_text_encoder + ) as text_encoder: + assert isinstance(text_encoder, CLIPTextModel) + flux_pipeline_with_te = FluxPipeline( + scheduler=None, + vae=None, + text_encoder=text_encoder, + tokenizer=tokenizer, + text_encoder_2=None, + tokenizer_2=None, + transformer=None, + ) + + return flux_pipeline_with_te._get_clip_prompt_embeds( + prompt=self.positive_prompt, device=TorchDevice.choose_torch_device() + ) + + def _run_t5_text_encoder(self, context: InvocationContext, flux_model_dir: Path) -> torch.Tensor: + """Run the T5 text encoder.""" + + if self.model == "flux-schnell": + max_seq_len = 256 + # elif self.model == "flux-dev": + # max_seq_len = 512 + else: + raise ValueError(f"Unknown model: {self.model}") + + tokenizer_path = flux_model_dir / "tokenizer_2" + tokenizer_2 = T5TokenizerFast.from_pretrained(tokenizer_path, local_files_only=True) + assert isinstance(tokenizer_2, T5TokenizerFast) + + text_encoder_path = flux_model_dir / "text_encoder_2" + with context.models.load_local_model( + model_path=text_encoder_path, loader=self._load_flux_text_encoder_2 + ) as text_encoder_2: + flux_pipeline_with_te2 = FluxPipeline( + scheduler=None, + vae=None, + text_encoder=None, + tokenizer=None, + text_encoder_2=text_encoder_2, + tokenizer_2=tokenizer_2, + transformer=None, + ) + + return flux_pipeline_with_te2._get_t5_prompt_embeds( + prompt=self.positive_prompt, max_sequence_length=max_seq_len, device=TorchDevice.choose_torch_device() + ) + + def _run_diffusion( + self, + context: InvocationContext, + flux_model_dir: Path, + clip_embeddings: torch.Tensor, + t5_embeddings: torch.Tensor, + ): + scheduler = FlowMatchEulerDiscreteScheduler() + + transformer_path = flux_model_dir / "transformer" + with context.models.load_local_model( + model_path=transformer_path, loader=self._load_flux_transformer + ) as transformer: + assert isinstance(transformer, FluxTransformer2DModel) + + flux_pipeline_with_transformer = FluxPipeline( + scheduler=scheduler, + vae=None, + text_encoder=None, + tokenizer=None, + text_encoder_2=None, + tokenizer_2=None, + transformer=transformer, + ) + + return flux_pipeline_with_transformer( + height=self.height, + width=self.width, + num_inference_steps=self.num_steps, + guidance_scale=self.guidance, + generator=torch.Generator().manual_seed(self.seed), + prompt_embeds=t5_embeddings, + pooled_prompt_embeds=clip_embeddings, + output_type="latent", + return_dict=False, + )[0] + + def _run_vae_decoding( + self, + context: InvocationContext, + flux_model_dir: Path, + latent: torch.Tensor, + ) -> Image.Image: + vae_path = flux_model_dir / "vae" + with context.models.load_local_model(model_path=vae_path, loader=self._load_flux_vae) as vae: + assert isinstance(vae, AutoencoderKL) + + flux_pipeline_with_vae = FluxPipeline( + scheduler=None, + vae=vae, + text_encoder=None, + tokenizer=None, + text_encoder_2=None, + tokenizer_2=None, + transformer=None, + ) + + latents = flux_pipeline_with_vae._unpack_latents( + latents, self.height, self.width, flux_pipeline_with_vae.vae_scale_factor + ) + latents = ( + latents / flux_pipeline_with_vae.vae.config.scaling_factor + ) + flux_pipeline_with_vae.vae.config.shift_factor + image = flux_pipeline_with_vae.vae.decode(latents, return_dict=False)[0] + image = flux_pipeline_with_vae.image_processor.postprocess(image, output_type="pil") + + assert isinstance(image, Image.Image) + return image + + @staticmethod + def _load_flux_text_encoder(path: Path) -> CLIPTextModel: + model = CLIPTextModel.from_pretrained(path, local_files_only=True) + assert isinstance(model, CLIPTextModel) + return model + + @staticmethod + def _load_flux_text_encoder_2(path: Path) -> T5EncoderModel: + model = T5EncoderModel.from_pretrained(path, local_files_only=True) + assert isinstance(model, T5EncoderModel) + return model + + @staticmethod + def _load_flux_transformer(path: Path) -> FluxTransformer2DModel: + model = FluxTransformer2DModel.from_pretrained(path, local_files_only=True) + assert isinstance(model, FluxTransformer2DModel) + return model + + @staticmethod + def _load_flux_vae(path: Path) -> AutoencoderKL: + model = AutoencoderKL.from_pretrained(path, local_files_only=True) + assert isinstance(model, AutoencoderKL) + return model From 5149a3ef2507a3d6f92c72b9addf0841ffb4253f Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Wed, 7 Aug 2024 14:18:19 +0000 Subject: [PATCH 005/113] Add sentencepiece dependency for the T5 tokenizer. --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 23325a59ab7..1c4e087e544 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,6 +47,8 @@ dependencies = [ "opencv-python==4.9.0.80", "pytorch-lightning==2.1.3", "safetensors==0.4.3", + # sentencepiece is required to load T5TokenizerFast (used by FLUX). + "sentencepiece==0.2.0", "spandrel==0.3.4", "timm==0.6.13", # needed to override timm latest in controlnet_aux, see https://github.com/isl-org/ZoeDepth/issues/26 "torch==2.2.2", From 761e49fb836b3d23005af56ae400b17a05d8e6ae Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Wed, 7 Aug 2024 15:12:01 +0000 Subject: [PATCH 006/113] Use the FluxPipeline.encode_prompt() api rather than trying to run the two text encoders separately. --- .../app/invocations/flux_text_to_image.py | 86 +++++++++---------- 1 file changed, 41 insertions(+), 45 deletions(-) diff --git a/invokeai/app/invocations/flux_text_to_image.py b/invokeai/app/invocations/flux_text_to_image.py index f229542a9a9..0b992909ab6 100644 --- a/invokeai/app/invocations/flux_text_to_image.py +++ b/invokeai/app/invocations/flux_text_to_image.py @@ -43,41 +43,14 @@ class FluxTextToImageInvocation(BaseInvocation, WithMetadata, WithBoard): def invoke(self, context: InvocationContext) -> ImageOutput: model_path = context.models.download_and_cache_model(FLUX_MODELS[self.model]) - clip_embeddings = self._run_clip_text_encoder(context, model_path) - t5_embeddings = self._run_t5_text_encoder(context, model_path) + t5_embeddings, clip_embeddings = self._encode_prompt(context, model_path) latents = self._run_diffusion(context, model_path, clip_embeddings, t5_embeddings) image = self._run_vae_decoding(context, model_path, latents) image_dto = context.images.save(image=image) return ImageOutput.build(image_dto) - def _run_clip_text_encoder(self, context: InvocationContext, flux_model_dir: Path) -> torch.Tensor: - """Run the CLIP text encoder.""" - tokenizer_path = flux_model_dir / "tokenizer" - tokenizer = CLIPTokenizer.from_pretrained(tokenizer_path, local_files_only=True) - assert isinstance(tokenizer, CLIPTokenizer) - - text_encoder_path = flux_model_dir / "text_encoder" - with context.models.load_local_model( - model_path=text_encoder_path, loader=self._load_flux_text_encoder - ) as text_encoder: - assert isinstance(text_encoder, CLIPTextModel) - flux_pipeline_with_te = FluxPipeline( - scheduler=None, - vae=None, - text_encoder=text_encoder, - tokenizer=tokenizer, - text_encoder_2=None, - tokenizer_2=None, - transformer=None, - ) - - return flux_pipeline_with_te._get_clip_prompt_embeds( - prompt=self.positive_prompt, device=TorchDevice.choose_torch_device() - ) - - def _run_t5_text_encoder(self, context: InvocationContext, flux_model_dir: Path) -> torch.Tensor: - """Run the T5 text encoder.""" - + def _encode_prompt(self, context: InvocationContext, flux_model_dir: Path) -> tuple[torch.Tensor, torch.Tensor]: + # Determine the T5 max sequence lenght based on the model. if self.model == "flux-schnell": max_seq_len = 256 # elif self.model == "flux-dev": @@ -85,28 +58,51 @@ def _run_t5_text_encoder(self, context: InvocationContext, flux_model_dir: Path) else: raise ValueError(f"Unknown model: {self.model}") - tokenizer_path = flux_model_dir / "tokenizer_2" - tokenizer_2 = T5TokenizerFast.from_pretrained(tokenizer_path, local_files_only=True) - assert isinstance(tokenizer_2, T5TokenizerFast) - - text_encoder_path = flux_model_dir / "text_encoder_2" - with context.models.load_local_model( - model_path=text_encoder_path, loader=self._load_flux_text_encoder_2 - ) as text_encoder_2: - flux_pipeline_with_te2 = FluxPipeline( + # Load the CLIP tokenizer. + clip_tokenizer_path = flux_model_dir / "tokenizer" + clip_tokenizer = CLIPTokenizer.from_pretrained(clip_tokenizer_path, local_files_only=True) + assert isinstance(clip_tokenizer, CLIPTokenizer) + + # Load the T5 tokenizer. + t5_tokenizer_path = flux_model_dir / "tokenizer_2" + t5_tokenizer = T5TokenizerFast.from_pretrained(t5_tokenizer_path, local_files_only=True) + assert isinstance(t5_tokenizer, T5TokenizerFast) + + clip_text_encoder_path = flux_model_dir / "text_encoder" + t5_text_encoder_path = flux_model_dir / "text_encoder_2" + with ( + context.models.load_local_model( + model_path=clip_text_encoder_path, loader=self._load_flux_text_encoder + ) as clip_text_encoder, + context.models.load_local_model( + model_path=t5_text_encoder_path, loader=self._load_flux_text_encoder_2 + ) as t5_text_encoder, + ): + assert isinstance(clip_text_encoder, CLIPTextModel) + assert isinstance(t5_text_encoder, T5EncoderModel) + pipeline = FluxPipeline( scheduler=None, vae=None, - text_encoder=None, - tokenizer=None, - text_encoder_2=text_encoder_2, - tokenizer_2=tokenizer_2, + text_encoder=clip_text_encoder, + tokenizer=clip_tokenizer, + text_encoder_2=t5_text_encoder, + tokenizer_2=t5_tokenizer, transformer=None, ) - return flux_pipeline_with_te2._get_t5_prompt_embeds( - prompt=self.positive_prompt, max_sequence_length=max_seq_len, device=TorchDevice.choose_torch_device() + # prompt_embeds: T5 embeddings + # pooled_prompt_embeds: CLIP embeddings + prompt_embeds, pooled_prompt_embeds, text_ids = pipeline.encode_prompt( + prompt=self.positive_prompt, + prompt_2=self.positive_prompt, + device=TorchDevice.choose_torch_device(), + max_sequence_length=max_seq_len, ) + assert isinstance(prompt_embeds, torch.Tensor) + assert isinstance(pooled_prompt_embeds, torch.Tensor) + return prompt_embeds, pooled_prompt_embeds + def _run_diffusion( self, context: InvocationContext, From 5e1b3e924eb85d1abd17559122045d48008c933e Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Wed, 7 Aug 2024 19:50:03 +0000 Subject: [PATCH 007/113] Got FLUX schnell working with 8-bit quantization. Still lots of rough edges to clean up. --- .../app/invocations/flux_text_to_image.py | 53 ++++++++++++++++--- pyproject.toml | 5 +- 2 files changed, 49 insertions(+), 9 deletions(-) diff --git a/invokeai/app/invocations/flux_text_to_image.py b/invokeai/app/invocations/flux_text_to_image.py index 0b992909ab6..2f78713b0cb 100644 --- a/invokeai/app/invocations/flux_text_to_image.py +++ b/invokeai/app/invocations/flux_text_to_image.py @@ -1,11 +1,14 @@ +import json from pathlib import Path from typing import Literal import torch from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler from diffusers.models.transformers.transformer_flux import FluxTransformer2DModel -from diffusers.pipelines.flux import FluxPipeline +from diffusers.pipelines.flux.pipeline_flux import FluxPipeline +from optimum.quanto import freeze, qfloat8, quantization_map, quantize, requantize from PIL import Image +from safetensors.torch import load_file, save_file from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation @@ -29,6 +32,9 @@ class FluxTextToImageInvocation(BaseInvocation, WithMetadata, WithBoard): """Text-to-image generation using a FLUX model.""" model: TFluxModelKeys = InputField(description="The FLUX model to use for text-to-image generation.") + use_8bit: bool = InputField( + default=False, description="Whether to quantize the T5 model and transformer model to 8-bit precision." + ) positive_prompt: str = InputField(description="Positive prompt for text-to-image generation.") width: int = InputField(default=1024, multiple_of=16, description="Width of the generated image.") height: int = InputField(default=1024, multiple_of=16, description="Height of the generated image.") @@ -110,7 +116,10 @@ def _run_diffusion( clip_embeddings: torch.Tensor, t5_embeddings: torch.Tensor, ): - scheduler = FlowMatchEulerDiscreteScheduler() + scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(flux_model_dir / "scheduler", local_files_only=True) + + # HACK(ryand): Manually empty the cache. + context.models._services.model_manager.load.ram_cache.make_room(24 * 2**30) transformer_path = flux_model_dir / "transformer" with context.models.load_local_model( @@ -144,7 +153,7 @@ def _run_vae_decoding( self, context: InvocationContext, flux_model_dir: Path, - latent: torch.Tensor, + latents: torch.Tensor, ) -> Image.Image: vae_path = flux_model_dir / "vae" with context.models.load_local_model(model_path=vae_path, loader=self._load_flux_vae) as vae: @@ -166,8 +175,9 @@ def _run_vae_decoding( latents = ( latents / flux_pipeline_with_vae.vae.config.scaling_factor ) + flux_pipeline_with_vae.vae.config.shift_factor + latents = latents.to(dtype=vae.dtype) image = flux_pipeline_with_vae.vae.decode(latents, return_dict=False)[0] - image = flux_pipeline_with_vae.image_processor.postprocess(image, output_type="pil") + image = flux_pipeline_with_vae.image_processor.postprocess(image, output_type="pil")[0] assert isinstance(image, Image.Image) return image @@ -184,9 +194,38 @@ def _load_flux_text_encoder_2(path: Path) -> T5EncoderModel: assert isinstance(model, T5EncoderModel) return model - @staticmethod - def _load_flux_transformer(path: Path) -> FluxTransformer2DModel: - model = FluxTransformer2DModel.from_pretrained(path, local_files_only=True) + def _load_flux_transformer(self, path: Path) -> FluxTransformer2DModel: + if self.use_8bit: + model_8bit_path = path / "quantized" + model_8bit_weights_path = model_8bit_path / "weights.safetensors" + model_8bit_map_path = model_8bit_path / "quantization_map.json" + if model_8bit_path.exists(): + # The quantized model exists, load it. + with torch.device("meta"): + model = FluxTransformer2DModel.from_pretrained(path, local_files_only=True) + assert isinstance(model, FluxTransformer2DModel) + + state_dict = load_file(model_8bit_weights_path) + with open(model_8bit_map_path, "r") as f: + quant_map = json.load(f) + requantize(model=model, state_dict=state_dict, quantization_map=quant_map) + else: + # The quantized model does not exist yet, quantize and save it. + model = FluxTransformer2DModel.from_pretrained(path, local_files_only=True, torch_dtype=torch.bfloat16) + assert isinstance(model, FluxTransformer2DModel) + + quantize(model, weights=qfloat8) + freeze(model) + + model_8bit_path.mkdir(parents=True, exist_ok=True) + save_file(model.state_dict(), model_8bit_weights_path) + with open(model_8bit_map_path, "w") as f: + json.dump(quantization_map(model), f) + else: + model = FluxTransformer2DModel.from_pretrained( + path, local_files_only=True, torch_dtype=TorchDevice.choose_torch_dtype() + ) + assert isinstance(model, FluxTransformer2DModel) return model diff --git a/pyproject.toml b/pyproject.toml index 1c4e087e544..c6dc025a001 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,16 +45,17 @@ dependencies = [ "onnx==1.15.0", "onnxruntime==1.16.3", "opencv-python==4.9.0.80", + "optimum-quanto==0.2.4", "pytorch-lightning==2.1.3", "safetensors==0.4.3", # sentencepiece is required to load T5TokenizerFast (used by FLUX). "sentencepiece==0.2.0", "spandrel==0.3.4", "timm==0.6.13", # needed to override timm latest in controlnet_aux, see https://github.com/isl-org/ZoeDepth/issues/26 - "torch==2.2.2", + "torch==2.4.0", "torchmetrics==0.11.4", "torchsde==0.2.6", - "torchvision==0.17.2", + "torchvision==0.19.0", "transformers==4.41.1", # Core application dependencies, pinned for reproducible builds. From e71c7d0d051f02b67b6ea94d6333df09a5b1a3e8 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Wed, 7 Aug 2024 22:10:09 +0000 Subject: [PATCH 008/113] Minor improvements to FLUX workflow. --- .../app/invocations/flux_text_to_image.py | 31 +++++++++++++------ 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/invokeai/app/invocations/flux_text_to_image.py b/invokeai/app/invocations/flux_text_to_image.py index 2f78713b0cb..2efa76b4ec8 100644 --- a/invokeai/app/invocations/flux_text_to_image.py +++ b/invokeai/app/invocations/flux_text_to_image.py @@ -33,7 +33,7 @@ class FluxTextToImageInvocation(BaseInvocation, WithMetadata, WithBoard): model: TFluxModelKeys = InputField(description="The FLUX model to use for text-to-image generation.") use_8bit: bool = InputField( - default=False, description="Whether to quantize the T5 model and transformer model to 8-bit precision." + default=False, description="Whether to quantize the transformer model to 8-bit precision." ) positive_prompt: str = InputField(description="Positive prompt for text-to-image generation.") width: int = InputField(default=1024, multiple_of=16, description="Width of the generated image.") @@ -56,7 +56,7 @@ def invoke(self, context: InvocationContext) -> ImageOutput: return ImageOutput.build(image_dto) def _encode_prompt(self, context: InvocationContext, flux_model_dir: Path) -> tuple[torch.Tensor, torch.Tensor]: - # Determine the T5 max sequence lenght based on the model. + # Determine the T5 max sequence length based on the model. if self.model == "flux-schnell": max_seq_len = 256 # elif self.model == "flux-dev": @@ -118,7 +118,9 @@ def _run_diffusion( ): scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(flux_model_dir / "scheduler", local_files_only=True) - # HACK(ryand): Manually empty the cache. + # HACK(ryand): Manually empty the cache. Currently we don't check the size of the model before loading it from + # disk. Since the transformer model is large (24GB), there's a good chance that it will OOM on 32GB RAM systems + # if the cache is not empty. context.models._services.model_manager.load.ram_cache.make_room(24 * 2**30) transformer_path = flux_model_dir / "transformer" @@ -137,7 +139,7 @@ def _run_diffusion( transformer=transformer, ) - return flux_pipeline_with_transformer( + latents = flux_pipeline_with_transformer( height=self.height, width=self.width, num_inference_steps=self.num_steps, @@ -149,6 +151,9 @@ def _run_diffusion( return_dict=False, )[0] + assert isinstance(latents, torch.Tensor) + return latents + def _run_vae_decoding( self, context: InvocationContext, @@ -201,9 +206,14 @@ def _load_flux_transformer(self, path: Path) -> FluxTransformer2DModel: model_8bit_map_path = model_8bit_path / "quantization_map.json" if model_8bit_path.exists(): # The quantized model exists, load it. - with torch.device("meta"): - model = FluxTransformer2DModel.from_pretrained(path, local_files_only=True) - assert isinstance(model, FluxTransformer2DModel) + # TODO(ryand): Make loading from quantized model work properly. + # Reference: https://gist.github.com/AmericanPresidentJimmyCarter/873985638e1f3541ba8b00137e7dacd9?permalink_comment_id=5141210#gistcomment-5141210 + model = FluxTransformer2DModel.from_pretrained( + path, + local_files_only=True, + ) + assert isinstance(model, FluxTransformer2DModel) + model = model.to(device=torch.device("meta")) state_dict = load_file(model_8bit_weights_path) with open(model_8bit_map_path, "r") as f: @@ -211,6 +221,9 @@ def _load_flux_transformer(self, path: Path) -> FluxTransformer2DModel: requantize(model=model, state_dict=state_dict, quantization_map=quant_map) else: # The quantized model does not exist yet, quantize and save it. + # TODO(ryand): Loading in float16 and then quantizing seems to result in NaNs. In order to run this on + # GPUs that don't support bfloat16, we would need to host the quantized model instead of generating it + # here. model = FluxTransformer2DModel.from_pretrained(path, local_files_only=True, torch_dtype=torch.bfloat16) assert isinstance(model, FluxTransformer2DModel) @@ -222,9 +235,7 @@ def _load_flux_transformer(self, path: Path) -> FluxTransformer2DModel: with open(model_8bit_map_path, "w") as f: json.dump(quantization_map(model), f) else: - model = FluxTransformer2DModel.from_pretrained( - path, local_files_only=True, torch_dtype=TorchDevice.choose_torch_dtype() - ) + model = FluxTransformer2DModel.from_pretrained(path, local_files_only=True, torch_dtype=torch.bfloat16) assert isinstance(model, FluxTransformer2DModel) return model From f7753be223b78b4544b3f9a6c38fef68d6a1b12f Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Thu, 8 Aug 2024 16:40:11 +0000 Subject: [PATCH 009/113] Make 8-bit quantization save/reload work for the FLUX transformer. Reload is still very slow with the current optimum.quanto implementation. --- .../app/invocations/flux_text_to_image.py | 42 +++++++++---------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/invokeai/app/invocations/flux_text_to_image.py b/invokeai/app/invocations/flux_text_to_image.py index 2efa76b4ec8..caca495ccdf 100644 --- a/invokeai/app/invocations/flux_text_to_image.py +++ b/invokeai/app/invocations/flux_text_to_image.py @@ -1,4 +1,3 @@ -import json from pathlib import Path from typing import Literal @@ -6,9 +5,9 @@ from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler from diffusers.models.transformers.transformer_flux import FluxTransformer2DModel from diffusers.pipelines.flux.pipeline_flux import FluxPipeline -from optimum.quanto import freeze, qfloat8, quantization_map, quantize, requantize +from optimum.quanto import qfloat8 +from optimum.quanto.models import QuantizedDiffusersModel from PIL import Image -from safetensors.torch import load_file, save_file from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation @@ -21,6 +20,10 @@ FLUX_MODELS: dict[TFluxModelKeys, str] = {"flux-schnell": "black-forest-labs/FLUX.1-schnell"} +class QuantizedFluxTransformer2DModel(QuantizedDiffusersModel): + base_class = FluxTransformer2DModel + + @invocation( "flux_text_to_image", title="FLUX Text to Image", @@ -202,23 +205,16 @@ def _load_flux_text_encoder_2(path: Path) -> T5EncoderModel: def _load_flux_transformer(self, path: Path) -> FluxTransformer2DModel: if self.use_8bit: model_8bit_path = path / "quantized" - model_8bit_weights_path = model_8bit_path / "weights.safetensors" - model_8bit_map_path = model_8bit_path / "quantization_map.json" if model_8bit_path.exists(): # The quantized model exists, load it. - # TODO(ryand): Make loading from quantized model work properly. - # Reference: https://gist.github.com/AmericanPresidentJimmyCarter/873985638e1f3541ba8b00137e7dacd9?permalink_comment_id=5141210#gistcomment-5141210 - model = FluxTransformer2DModel.from_pretrained( - path, - local_files_only=True, - ) - assert isinstance(model, FluxTransformer2DModel) - model = model.to(device=torch.device("meta")) - - state_dict = load_file(model_8bit_weights_path) - with open(model_8bit_map_path, "r") as f: - quant_map = json.load(f) - requantize(model=model, state_dict=state_dict, quantization_map=quant_map) + # TODO(ryand): The requantize(...) operation in from_pretrained(...) is very slow. This seems like + # something that we should be able to make much faster. + q_model = QuantizedFluxTransformer2DModel.from_pretrained(model_8bit_path) + + # Access the underlying wrapped model. + # We access the wrapped model, even though it is private, because it simplifies the type checking by + # always returning a FluxTransformer2DModel from this function. + model = q_model._wrapped else: # The quantized model does not exist yet, quantize and save it. # TODO(ryand): Loading in float16 and then quantizing seems to result in NaNs. In order to run this on @@ -227,13 +223,13 @@ def _load_flux_transformer(self, path: Path) -> FluxTransformer2DModel: model = FluxTransformer2DModel.from_pretrained(path, local_files_only=True, torch_dtype=torch.bfloat16) assert isinstance(model, FluxTransformer2DModel) - quantize(model, weights=qfloat8) - freeze(model) + q_model = QuantizedFluxTransformer2DModel.quantize(model, weights=qfloat8) model_8bit_path.mkdir(parents=True, exist_ok=True) - save_file(model.state_dict(), model_8bit_weights_path) - with open(model_8bit_map_path, "w") as f: - json.dump(quantization_map(model), f) + q_model.save_pretrained(model_8bit_path) + + # (See earlier comment about accessing the wrapped model.) + model = q_model._wrapped else: model = FluxTransformer2DModel.from_pretrained(path, local_files_only=True, torch_dtype=torch.bfloat16) From 28654ec722406a9b56b4c4f08b663c40a3be5bc6 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Thu, 8 Aug 2024 18:23:20 +0000 Subject: [PATCH 010/113] Add support for 8-bit quantizatino of the FLUX T5XXL text encoder. --- .../app/invocations/flux_text_to_image.py | 39 +++++++++++++++++-- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/invokeai/app/invocations/flux_text_to_image.py b/invokeai/app/invocations/flux_text_to_image.py index caca495ccdf..b059ab23da3 100644 --- a/invokeai/app/invocations/flux_text_to_image.py +++ b/invokeai/app/invocations/flux_text_to_image.py @@ -6,9 +6,10 @@ from diffusers.models.transformers.transformer_flux import FluxTransformer2DModel from diffusers.pipelines.flux.pipeline_flux import FluxPipeline from optimum.quanto import qfloat8 -from optimum.quanto.models import QuantizedDiffusersModel +from optimum.quanto.models import QuantizedDiffusersModel, QuantizedTransformersModel from PIL import Image from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast +from transformers.models.auto import AutoModelForTextEncoding from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation from invokeai.app.invocations.fields import InputField, WithBoard, WithMetadata @@ -24,6 +25,10 @@ class QuantizedFluxTransformer2DModel(QuantizedDiffusersModel): base_class = FluxTransformer2DModel +class QuantizedModelForTextEncoding(QuantizedTransformersModel): + auto_class = AutoModelForTextEncoding + + @invocation( "flux_text_to_image", title="FLUX Text to Image", @@ -196,9 +201,35 @@ def _load_flux_text_encoder(path: Path) -> CLIPTextModel: assert isinstance(model, CLIPTextModel) return model - @staticmethod - def _load_flux_text_encoder_2(path: Path) -> T5EncoderModel: - model = T5EncoderModel.from_pretrained(path, local_files_only=True) + def _load_flux_text_encoder_2(self, path: Path) -> T5EncoderModel: + if self.use_8bit: + model_8bit_path = path / "quantized" + if model_8bit_path.exists(): + # The quantized model exists, load it. + # TODO(ryand): The requantize(...) operation in from_pretrained(...) is very slow. This seems like + # something that we should be able to make much faster. + q_model = QuantizedModelForTextEncoding.from_pretrained(model_8bit_path) + + # Access the underlying wrapped model. + # We access the wrapped model, even though it is private, because it simplifies the type checking by + # always returning a T5EncoderModel from this function. + model = q_model._wrapped + else: + # The quantized model does not exist yet, quantize and save it. + # TODO(ryand): dtype? + model = T5EncoderModel.from_pretrained(path, local_files_only=True) + assert isinstance(model, T5EncoderModel) + + q_model = QuantizedModelForTextEncoding.quantize(model, weights=qfloat8) + + model_8bit_path.mkdir(parents=True, exist_ok=True) + q_model.save_pretrained(model_8bit_path) + + # (See earlier comment about accessing the wrapped model.) + model = q_model._wrapped + else: + model = T5EncoderModel.from_pretrained(path, local_files_only=True) + assert isinstance(model, T5EncoderModel) return model From 40e9a4ea003d0d06705048cc1e38c298fe85f751 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Thu, 8 Aug 2024 18:12:04 -0400 Subject: [PATCH 011/113] Make float16 inference work with FLUX on 24GB GPU. --- invokeai/app/invocations/flux_text_to_image.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/invokeai/app/invocations/flux_text_to_image.py b/invokeai/app/invocations/flux_text_to_image.py index b059ab23da3..a680908461c 100644 --- a/invokeai/app/invocations/flux_text_to_image.py +++ b/invokeai/app/invocations/flux_text_to_image.py @@ -147,6 +147,9 @@ def _run_diffusion( transformer=transformer, ) + t5_embeddings = t5_embeddings.to(dtype=transformer.dtype) + clip_embeddings = clip_embeddings.to(dtype=transformer.dtype) + latents = flux_pipeline_with_transformer( height=self.height, width=self.width, From df1ac0748f39049383d711f263c4617b1e16d52c Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Fri, 9 Aug 2024 16:23:37 +0000 Subject: [PATCH 012/113] WIP - experimentation --- invokeai/backend/load_flux_model.py | 129 ++++++++++++++++++++++++++++ invokeai/backend/requantize.py | 54 ++++++++++++ 2 files changed, 183 insertions(+) create mode 100644 invokeai/backend/load_flux_model.py create mode 100644 invokeai/backend/requantize.py diff --git a/invokeai/backend/load_flux_model.py b/invokeai/backend/load_flux_model.py new file mode 100644 index 00000000000..92731223963 --- /dev/null +++ b/invokeai/backend/load_flux_model.py @@ -0,0 +1,129 @@ +import json +import os +import time +from pathlib import Path +from typing import Union + +import torch +from diffusers.models.model_loading_utils import load_state_dict +from diffusers.models.transformers.transformer_flux import FluxTransformer2DModel +from diffusers.utils import ( + CONFIG_NAME, + SAFE_WEIGHTS_INDEX_NAME, + SAFETENSORS_WEIGHTS_NAME, + _get_checkpoint_shard_files, + is_accelerate_available, +) +from optimum.quanto import qfloat8 +from optimum.quanto.models import QuantizedDiffusersModel +from optimum.quanto.models.shared_dict import ShardedStateDict + +from invokeai.backend.requantize import requantize + + +class QuantizedFluxTransformer2DModel(QuantizedDiffusersModel): + base_class = FluxTransformer2DModel + + @classmethod + def from_pretrained(cls, model_name_or_path: Union[str, os.PathLike]): + if cls.base_class is None: + raise ValueError("The `base_class` attribute needs to be configured.") + + if not is_accelerate_available(): + raise ValueError("Reloading a quantized diffusers model requires the accelerate library.") + from accelerate import init_empty_weights + + if os.path.isdir(model_name_or_path): + # Look for a quantization map + qmap_path = os.path.join(model_name_or_path, cls._qmap_name()) + if not os.path.exists(qmap_path): + raise ValueError(f"No quantization map found in {model_name_or_path}: is this a quantized model ?") + + # Look for original model config file. + model_config_path = os.path.join(model_name_or_path, CONFIG_NAME) + if not os.path.exists(model_config_path): + raise ValueError(f"{CONFIG_NAME} not found in {model_name_or_path}.") + + with open(qmap_path, "r", encoding="utf-8") as f: + qmap = json.load(f) + + with open(model_config_path, "r", encoding="utf-8") as f: + original_model_cls_name = json.load(f)["_class_name"] + configured_cls_name = cls.base_class.__name__ + if configured_cls_name != original_model_cls_name: + raise ValueError( + f"Configured base class ({configured_cls_name}) differs from what was derived from the provided configuration ({original_model_cls_name})." + ) + + # Create an empty model + config = cls.base_class.load_config(model_name_or_path) + with init_empty_weights(): + model = cls.base_class.from_config(config) + + # Look for the index of a sharded checkpoint + checkpoint_file = os.path.join(model_name_or_path, SAFE_WEIGHTS_INDEX_NAME) + if os.path.exists(checkpoint_file): + # Convert the checkpoint path to a list of shards + _, sharded_metadata = _get_checkpoint_shard_files(model_name_or_path, checkpoint_file) + # Create a mapping for the sharded safetensor files + state_dict = ShardedStateDict(model_name_or_path, sharded_metadata["weight_map"]) + else: + # Look for a single checkpoint file + checkpoint_file = os.path.join(model_name_or_path, SAFETENSORS_WEIGHTS_NAME) + if not os.path.exists(checkpoint_file): + raise ValueError(f"No safetensor weights found in {model_name_or_path}.") + # Get state_dict from model checkpoint + state_dict = load_state_dict(checkpoint_file) + + # Requantize and load quantized weights from state_dict + requantize(model, state_dict=state_dict, quantization_map=qmap) + model.eval() + return cls(model) + else: + raise NotImplementedError("Reloading quantized models directly from the hub is not supported yet.") + + +def load_flux_transformer(path: Path) -> FluxTransformer2DModel: + # model = FluxTransformer2DModel.from_pretrained(path, local_files_only=True, torch_dtype=torch.bfloat16) + model_8bit_path = path / "quantized" + if model_8bit_path.exists(): + # The quantized model exists, load it. + # TODO(ryand): The requantize(...) operation in from_pretrained(...) is very slow. This seems like + # something that we should be able to make much faster. + q_model = QuantizedFluxTransformer2DModel.from_pretrained(model_8bit_path) + + # Access the underlying wrapped model. + # We access the wrapped model, even though it is private, because it simplifies the type checking by + # always returning a FluxTransformer2DModel from this function. + model = q_model._wrapped + else: + # The quantized model does not exist yet, quantize and save it. + # TODO(ryand): Loading in float16 and then quantizing seems to result in NaNs. In order to run this on + # GPUs that don't support bfloat16, we would need to host the quantized model instead of generating it + # here. + model = FluxTransformer2DModel.from_pretrained(path, local_files_only=True, torch_dtype=torch.bfloat16) + assert isinstance(model, FluxTransformer2DModel) + + q_model = QuantizedFluxTransformer2DModel.quantize(model, weights=qfloat8) + + model_8bit_path.mkdir(parents=True, exist_ok=True) + q_model.save_pretrained(model_8bit_path) + + # (See earlier comment about accessing the wrapped model.) + model = q_model._wrapped + + assert isinstance(model, FluxTransformer2DModel) + return model + + +def main(): + start = time.time() + model = load_flux_transformer( + Path("/data/invokeai/models/.download_cache/black-forest-labs_flux.1-schnell/FLUX.1-schnell/transformer/") + ) + print(f"Time to load: {time.time() - start}s") + print("hi") + + +if __name__ == "__main__": + main() diff --git a/invokeai/backend/requantize.py b/invokeai/backend/requantize.py new file mode 100644 index 00000000000..0e9356b60be --- /dev/null +++ b/invokeai/backend/requantize.py @@ -0,0 +1,54 @@ +from typing import Any, Dict + +import torch +from optimum.quanto.nn import QModuleMixin +from optimum.quanto.quantize import _quantize_submodule, freeze + + +def custom_freeze(model: torch.nn.Module): + for name, m in model.named_modules(): + if isinstance(m, QModuleMixin): + m.freeze() + + +def requantize( + model: torch.nn.Module, + state_dict: Dict[str, Any], + quantization_map: Dict[str, Dict[str, str]], + device: torch.device = None, +): + if device is None: + device = next(model.parameters()).device + if device.type == "meta": + device = torch.device("cpu") + + # Quantize the model with parameters from the quantization map + for name, m in model.named_modules(): + qconfig = quantization_map.get(name, None) + if qconfig is not None: + weights = qconfig["weights"] + if weights == "none": + weights = None + activations = qconfig["activations"] + if activations == "none": + activations = None + _quantize_submodule(model, name, m, weights=weights, activations=activations) + + # Move model parameters and buffers to CPU before materializing quantized weights + for name, m in model.named_modules(): + + def move_tensor(t, device): + if t.device.type == "meta": + return torch.empty_like(t, device=device) + return t.to(device) + + for name, param in m.named_parameters(recurse=False): + setattr(m, name, torch.nn.Parameter(move_tensor(param, "cpu"))) + for name, param in m.named_buffers(recurse=False): + setattr(m, name, move_tensor(param, "cpu")) + # Freeze model and move to target device + freeze(model) + model.to(device) + + # Load the quantized model weights + model.load_state_dict(state_dict, strict=False) From 11279abf24853b442f99182e5b55a041dc87b325 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Fri, 9 Aug 2024 16:39:43 +0000 Subject: [PATCH 013/113] Make quantized loading fast. --- invokeai/backend/requantize.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/invokeai/backend/requantize.py b/invokeai/backend/requantize.py index 0e9356b60be..5f506f487d1 100644 --- a/invokeai/backend/requantize.py +++ b/invokeai/backend/requantize.py @@ -1,14 +1,13 @@ from typing import Any, Dict import torch -from optimum.quanto.nn import QModuleMixin -from optimum.quanto.quantize import _quantize_submodule, freeze +from optimum.quanto.quantize import _quantize_submodule - -def custom_freeze(model: torch.nn.Module): - for name, m in model.named_modules(): - if isinstance(m, QModuleMixin): - m.freeze() +# def custom_freeze(model: torch.nn.Module): +# for name, m in model.named_modules(): +# if isinstance(m, QModuleMixin): +# m.weight = +# m.freeze() def requantize( @@ -47,8 +46,8 @@ def move_tensor(t, device): for name, param in m.named_buffers(recurse=False): setattr(m, name, move_tensor(param, "cpu")) # Freeze model and move to target device - freeze(model) - model.to(device) + # freeze(model) + # model.to(device) # Load the quantized model weights model.load_state_dict(state_dict, strict=False) From bcb7f8ee47f9bcb379f9ad2dea54724a067e423d Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Fri, 9 Aug 2024 19:54:09 +0000 Subject: [PATCH 014/113] Make quantized loading fast for both T5XXL and FLUX transformer. --- .../app/invocations/flux_text_to_image.py | 7 +- .../fast_quantized_diffusion_model.py | 77 +++++++++++++++++++ .../fast_quantized_transformers_model.py | 61 +++++++++++++++ 3 files changed, 142 insertions(+), 3 deletions(-) create mode 100644 invokeai/backend/quantization/fast_quantized_diffusion_model.py create mode 100644 invokeai/backend/quantization/fast_quantized_transformers_model.py diff --git a/invokeai/app/invocations/flux_text_to_image.py b/invokeai/app/invocations/flux_text_to_image.py index a680908461c..0a7290214d5 100644 --- a/invokeai/app/invocations/flux_text_to_image.py +++ b/invokeai/app/invocations/flux_text_to_image.py @@ -6,7 +6,6 @@ from diffusers.models.transformers.transformer_flux import FluxTransformer2DModel from diffusers.pipelines.flux.pipeline_flux import FluxPipeline from optimum.quanto import qfloat8 -from optimum.quanto.models import QuantizedDiffusersModel, QuantizedTransformersModel from PIL import Image from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast from transformers.models.auto import AutoModelForTextEncoding @@ -15,17 +14,19 @@ from invokeai.app.invocations.fields import InputField, WithBoard, WithMetadata from invokeai.app.invocations.primitives import ImageOutput from invokeai.app.services.shared.invocation_context import InvocationContext +from invokeai.backend.quantization.fast_quantized_diffusion_model import FastQuantizedDiffusersModel +from invokeai.backend.quantization.fast_quantized_transformers_model import FastQuantizedTransformersModel from invokeai.backend.util.devices import TorchDevice TFluxModelKeys = Literal["flux-schnell"] FLUX_MODELS: dict[TFluxModelKeys, str] = {"flux-schnell": "black-forest-labs/FLUX.1-schnell"} -class QuantizedFluxTransformer2DModel(QuantizedDiffusersModel): +class QuantizedFluxTransformer2DModel(FastQuantizedDiffusersModel): base_class = FluxTransformer2DModel -class QuantizedModelForTextEncoding(QuantizedTransformersModel): +class QuantizedModelForTextEncoding(FastQuantizedTransformersModel): auto_class = AutoModelForTextEncoding diff --git a/invokeai/backend/quantization/fast_quantized_diffusion_model.py b/invokeai/backend/quantization/fast_quantized_diffusion_model.py new file mode 100644 index 00000000000..0759984bf93 --- /dev/null +++ b/invokeai/backend/quantization/fast_quantized_diffusion_model.py @@ -0,0 +1,77 @@ +import json +import os +from typing import Union + +from diffusers.models.model_loading_utils import load_state_dict +from diffusers.utils import ( + CONFIG_NAME, + SAFE_WEIGHTS_INDEX_NAME, + SAFETENSORS_WEIGHTS_NAME, + _get_checkpoint_shard_files, + is_accelerate_available, +) +from optimum.quanto.models import QuantizedDiffusersModel +from optimum.quanto.models.shared_dict import ShardedStateDict + +from invokeai.backend.requantize import requantize + + +class FastQuantizedDiffusersModel(QuantizedDiffusersModel): + @classmethod + def from_pretrained(cls, model_name_or_path: Union[str, os.PathLike]): + """We override the `from_pretrained()` method in order to use our custom `requantize()` implementation.""" + if cls.base_class is None: + raise ValueError("The `base_class` attribute needs to be configured.") + + if not is_accelerate_available(): + raise ValueError("Reloading a quantized diffusers model requires the accelerate library.") + from accelerate import init_empty_weights + + if os.path.isdir(model_name_or_path): + # Look for a quantization map + qmap_path = os.path.join(model_name_or_path, cls._qmap_name()) + if not os.path.exists(qmap_path): + raise ValueError(f"No quantization map found in {model_name_or_path}: is this a quantized model ?") + + # Look for original model config file. + model_config_path = os.path.join(model_name_or_path, CONFIG_NAME) + if not os.path.exists(model_config_path): + raise ValueError(f"{CONFIG_NAME} not found in {model_name_or_path}.") + + with open(qmap_path, "r", encoding="utf-8") as f: + qmap = json.load(f) + + with open(model_config_path, "r", encoding="utf-8") as f: + original_model_cls_name = json.load(f)["_class_name"] + configured_cls_name = cls.base_class.__name__ + if configured_cls_name != original_model_cls_name: + raise ValueError( + f"Configured base class ({configured_cls_name}) differs from what was derived from the provided configuration ({original_model_cls_name})." + ) + + # Create an empty model + config = cls.base_class.load_config(model_name_or_path) + with init_empty_weights(): + model = cls.base_class.from_config(config) + + # Look for the index of a sharded checkpoint + checkpoint_file = os.path.join(model_name_or_path, SAFE_WEIGHTS_INDEX_NAME) + if os.path.exists(checkpoint_file): + # Convert the checkpoint path to a list of shards + _, sharded_metadata = _get_checkpoint_shard_files(model_name_or_path, checkpoint_file) + # Create a mapping for the sharded safetensor files + state_dict = ShardedStateDict(model_name_or_path, sharded_metadata["weight_map"]) + else: + # Look for a single checkpoint file + checkpoint_file = os.path.join(model_name_or_path, SAFETENSORS_WEIGHTS_NAME) + if not os.path.exists(checkpoint_file): + raise ValueError(f"No safetensor weights found in {model_name_or_path}.") + # Get state_dict from model checkpoint + state_dict = load_state_dict(checkpoint_file) + + # Requantize and load quantized weights from state_dict + requantize(model, state_dict=state_dict, quantization_map=qmap) + model.eval() + return cls(model) + else: + raise NotImplementedError("Reloading quantized models directly from the hub is not supported yet.") diff --git a/invokeai/backend/quantization/fast_quantized_transformers_model.py b/invokeai/backend/quantization/fast_quantized_transformers_model.py new file mode 100644 index 00000000000..ce5cc7a3a9b --- /dev/null +++ b/invokeai/backend/quantization/fast_quantized_transformers_model.py @@ -0,0 +1,61 @@ +import json +import os +from typing import Union + +from optimum.quanto.models import QuantizedTransformersModel +from optimum.quanto.models.shared_dict import ShardedStateDict +from transformers import AutoConfig +from transformers.modeling_utils import get_checkpoint_shard_files, load_state_dict +from transformers.utils import SAFE_WEIGHTS_INDEX_NAME, SAFE_WEIGHTS_NAME, is_accelerate_available + +from invokeai.backend.requantize import requantize + + +class FastQuantizedTransformersModel(QuantizedTransformersModel): + @classmethod + def from_pretrained(cls, model_name_or_path: Union[str, os.PathLike]): + """We override the `from_pretrained()` method in order to use our custom `requantize()` implementation.""" + if cls.auto_class is None: + raise ValueError( + "Quantized models cannot be reloaded using {cls}: use a specialized quantized class such as QuantizedModelForCausalLM instead." + ) + if not is_accelerate_available(): + raise ValueError("Reloading a quantized transformers model requires the accelerate library.") + from accelerate import init_empty_weights + + if os.path.isdir(model_name_or_path): + # Look for a quantization map + qmap_path = os.path.join(model_name_or_path, cls._qmap_name()) + if not os.path.exists(qmap_path): + raise ValueError(f"No quantization map found in {model_name_or_path}: is this a quantized model ?") + with open(qmap_path, "r", encoding="utf-8") as f: + qmap = json.load(f) + # Create an empty model + config = AutoConfig.from_pretrained(model_name_or_path) + with init_empty_weights(): + model = cls.auto_class.from_config(config) + # Look for the index of a sharded checkpoint + checkpoint_file = os.path.join(model_name_or_path, SAFE_WEIGHTS_INDEX_NAME) + if os.path.exists(checkpoint_file): + # Convert the checkpoint path to a list of shards + checkpoint_file, sharded_metadata = get_checkpoint_shard_files(model_name_or_path, checkpoint_file) + # Create a mapping for the sharded safetensor files + state_dict = ShardedStateDict(model_name_or_path, sharded_metadata["weight_map"]) + else: + # Look for a single checkpoint file + checkpoint_file = os.path.join(model_name_or_path, SAFE_WEIGHTS_NAME) + if not os.path.exists(checkpoint_file): + raise ValueError(f"No safetensor weights found in {model_name_or_path}.") + # Get state_dict from model checkpoint + state_dict = load_state_dict(checkpoint_file) + # Requantize and load quantized weights from state_dict + requantize(model, state_dict=state_dict, quantization_map=qmap) + if getattr(model.config, "tie_word_embeddings", True): + # Tie output weight embeddings to input weight embeddings + # Note that if they were quantized they would NOT be tied + model.tie_weights() + # Set model in evaluation mode as it is done in transformers + model.eval() + return cls(model) + else: + raise NotImplementedError("Reloading quantized models directly from the hub is not supported yet.") From 3f1fbc612db2685e0f1f47d6a79993f3db671937 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Mon, 12 Aug 2024 18:23:02 +0000 Subject: [PATCH 015/113] Split a FluxTextEncoderInvocation out from the FluxTextToImageInvocation. This has the advantage that we benfit from automatic caching when the prompt isn't changed. --- invokeai/app/invocations/flux_text_encoder.py | 135 ++++++++++++++++++ .../app/invocations/flux_text_to_image.py | 117 +++------------ .../diffusion/conditioning_data.py | 16 ++- 3 files changed, 165 insertions(+), 103 deletions(-) create mode 100644 invokeai/app/invocations/flux_text_encoder.py diff --git a/invokeai/app/invocations/flux_text_encoder.py b/invokeai/app/invocations/flux_text_encoder.py new file mode 100644 index 00000000000..582ae6fabcc --- /dev/null +++ b/invokeai/app/invocations/flux_text_encoder.py @@ -0,0 +1,135 @@ +from pathlib import Path + +import torch +from diffusers.pipelines.flux.pipeline_flux import FluxPipeline +from optimum.quanto import qfloat8 +from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast + +from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation +from invokeai.app.invocations.fields import InputField +from invokeai.app.invocations.flux_text_to_image import FLUX_MODELS, QuantizedModelForTextEncoding, TFluxModelKeys +from invokeai.app.invocations.primitives import ConditioningOutput +from invokeai.app.services.shared.invocation_context import InvocationContext +from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ConditioningFieldData, FLUXConditioningInfo +from invokeai.backend.util.devices import TorchDevice + + +@invocation( + "flux_text_encoder", + title="FLUX Text Encoding", + tags=["image"], + category="image", + version="1.0.0", +) +class FluxTextEncoderInvocation(BaseInvocation): + model: TFluxModelKeys = InputField(description="The FLUX model to use for text-to-image generation.") + use_8bit: bool = InputField( + default=False, description="Whether to quantize the transformer model to 8-bit precision." + ) + positive_prompt: str = InputField(description="Positive prompt for text-to-image generation.") + + # TODO(ryand): Should we create a new return type for this invocation? This ConditioningOutput is clearly not + # compatible with other ConditioningOutputs. + @torch.no_grad() + def invoke(self, context: InvocationContext) -> ConditioningOutput: + model_path = context.models.download_and_cache_model(FLUX_MODELS[self.model]) + + t5_embeddings, clip_embeddings = self._encode_prompt(context, model_path) + conditioning_data = ConditioningFieldData( + conditionings=[FLUXConditioningInfo(clip_embeds=clip_embeddings, t5_embeds=t5_embeddings)] + ) + + conditioning_name = context.conditioning.save(conditioning_data) + return ConditioningOutput.build(conditioning_name) + + def _encode_prompt(self, context: InvocationContext, flux_model_dir: Path) -> tuple[torch.Tensor, torch.Tensor]: + # Determine the T5 max sequence length based on the model. + if self.model == "flux-schnell": + max_seq_len = 256 + # elif self.model == "flux-dev": + # max_seq_len = 512 + else: + raise ValueError(f"Unknown model: {self.model}") + + # Load the CLIP tokenizer. + clip_tokenizer_path = flux_model_dir / "tokenizer" + clip_tokenizer = CLIPTokenizer.from_pretrained(clip_tokenizer_path, local_files_only=True) + assert isinstance(clip_tokenizer, CLIPTokenizer) + + # Load the T5 tokenizer. + t5_tokenizer_path = flux_model_dir / "tokenizer_2" + t5_tokenizer = T5TokenizerFast.from_pretrained(t5_tokenizer_path, local_files_only=True) + assert isinstance(t5_tokenizer, T5TokenizerFast) + + clip_text_encoder_path = flux_model_dir / "text_encoder" + t5_text_encoder_path = flux_model_dir / "text_encoder_2" + with ( + context.models.load_local_model( + model_path=clip_text_encoder_path, loader=self._load_flux_text_encoder + ) as clip_text_encoder, + context.models.load_local_model( + model_path=t5_text_encoder_path, loader=self._load_flux_text_encoder_2 + ) as t5_text_encoder, + ): + assert isinstance(clip_text_encoder, CLIPTextModel) + assert isinstance(t5_text_encoder, T5EncoderModel) + pipeline = FluxPipeline( + scheduler=None, + vae=None, + text_encoder=clip_text_encoder, + tokenizer=clip_tokenizer, + text_encoder_2=t5_text_encoder, + tokenizer_2=t5_tokenizer, + transformer=None, + ) + + # prompt_embeds: T5 embeddings + # pooled_prompt_embeds: CLIP embeddings + prompt_embeds, pooled_prompt_embeds, text_ids = pipeline.encode_prompt( + prompt=self.positive_prompt, + prompt_2=self.positive_prompt, + device=TorchDevice.choose_torch_device(), + max_sequence_length=max_seq_len, + ) + + assert isinstance(prompt_embeds, torch.Tensor) + assert isinstance(pooled_prompt_embeds, torch.Tensor) + return prompt_embeds, pooled_prompt_embeds + + @staticmethod + def _load_flux_text_encoder(path: Path) -> CLIPTextModel: + model = CLIPTextModel.from_pretrained(path, local_files_only=True) + assert isinstance(model, CLIPTextModel) + return model + + def _load_flux_text_encoder_2(self, path: Path) -> T5EncoderModel: + if self.use_8bit: + model_8bit_path = path / "quantized" + if model_8bit_path.exists(): + # The quantized model exists, load it. + # TODO(ryand): The requantize(...) operation in from_pretrained(...) is very slow. This seems like + # something that we should be able to make much faster. + q_model = QuantizedModelForTextEncoding.from_pretrained(model_8bit_path) + + # Access the underlying wrapped model. + # We access the wrapped model, even though it is private, because it simplifies the type checking by + # always returning a T5EncoderModel from this function. + model = q_model._wrapped + else: + # The quantized model does not exist yet, quantize and save it. + # TODO(ryand): dtype? + model = T5EncoderModel.from_pretrained(path, local_files_only=True) + assert isinstance(model, T5EncoderModel) + + q_model = QuantizedModelForTextEncoding.quantize(model, weights=qfloat8) + + model_8bit_path.mkdir(parents=True, exist_ok=True) + q_model.save_pretrained(model_8bit_path) + + # (See earlier comment about accessing the wrapped model.) + model = q_model._wrapped + else: + model = T5EncoderModel.from_pretrained(path, local_files_only=True) + + assert isinstance(model, T5EncoderModel) + return model diff --git a/invokeai/app/invocations/flux_text_to_image.py b/invokeai/app/invocations/flux_text_to_image.py index 0a7290214d5..1b900484172 100644 --- a/invokeai/app/invocations/flux_text_to_image.py +++ b/invokeai/app/invocations/flux_text_to_image.py @@ -7,16 +7,22 @@ from diffusers.pipelines.flux.pipeline_flux import FluxPipeline from optimum.quanto import qfloat8 from PIL import Image -from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast from transformers.models.auto import AutoModelForTextEncoding from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation -from invokeai.app.invocations.fields import InputField, WithBoard, WithMetadata +from invokeai.app.invocations.fields import ( + ConditioningField, + FieldDescriptions, + Input, + InputField, + WithBoard, + WithMetadata, +) from invokeai.app.invocations.primitives import ImageOutput from invokeai.app.services.shared.invocation_context import InvocationContext from invokeai.backend.quantization.fast_quantized_diffusion_model import FastQuantizedDiffusersModel from invokeai.backend.quantization.fast_quantized_transformers_model import FastQuantizedTransformersModel -from invokeai.backend.util.devices import TorchDevice +from invokeai.backend.stable_diffusion.diffusion.conditioning_data import FLUXConditioningInfo TFluxModelKeys = Literal["flux-schnell"] FLUX_MODELS: dict[TFluxModelKeys, str] = {"flux-schnell": "black-forest-labs/FLUX.1-schnell"} @@ -44,7 +50,9 @@ class FluxTextToImageInvocation(BaseInvocation, WithMetadata, WithBoard): use_8bit: bool = InputField( default=False, description="Whether to quantize the transformer model to 8-bit precision." ) - positive_prompt: str = InputField(description="Positive prompt for text-to-image generation.") + positive_text_conditioning: ConditioningField = InputField( + description=FieldDescriptions.positive_cond, input=Input.Connection + ) width: int = InputField(default=1024, multiple_of=16, description="Width of the generated image.") height: int = InputField(default=1024, multiple_of=16, description="Height of the generated image.") num_steps: int = InputField(default=4, description="Number of diffusion steps.") @@ -58,66 +66,17 @@ class FluxTextToImageInvocation(BaseInvocation, WithMetadata, WithBoard): def invoke(self, context: InvocationContext) -> ImageOutput: model_path = context.models.download_and_cache_model(FLUX_MODELS[self.model]) - t5_embeddings, clip_embeddings = self._encode_prompt(context, model_path) - latents = self._run_diffusion(context, model_path, clip_embeddings, t5_embeddings) + # Load the conditioning data. + cond_data = context.conditioning.load(self.positive_text_conditioning.conditioning_name) + assert len(cond_data.conditionings) == 1 + flux_conditioning = cond_data.conditionings[0] + assert isinstance(flux_conditioning, FLUXConditioningInfo) + + latents = self._run_diffusion(context, model_path, flux_conditioning.clip_embeds, flux_conditioning.t5_embeds) image = self._run_vae_decoding(context, model_path, latents) image_dto = context.images.save(image=image) return ImageOutput.build(image_dto) - def _encode_prompt(self, context: InvocationContext, flux_model_dir: Path) -> tuple[torch.Tensor, torch.Tensor]: - # Determine the T5 max sequence length based on the model. - if self.model == "flux-schnell": - max_seq_len = 256 - # elif self.model == "flux-dev": - # max_seq_len = 512 - else: - raise ValueError(f"Unknown model: {self.model}") - - # Load the CLIP tokenizer. - clip_tokenizer_path = flux_model_dir / "tokenizer" - clip_tokenizer = CLIPTokenizer.from_pretrained(clip_tokenizer_path, local_files_only=True) - assert isinstance(clip_tokenizer, CLIPTokenizer) - - # Load the T5 tokenizer. - t5_tokenizer_path = flux_model_dir / "tokenizer_2" - t5_tokenizer = T5TokenizerFast.from_pretrained(t5_tokenizer_path, local_files_only=True) - assert isinstance(t5_tokenizer, T5TokenizerFast) - - clip_text_encoder_path = flux_model_dir / "text_encoder" - t5_text_encoder_path = flux_model_dir / "text_encoder_2" - with ( - context.models.load_local_model( - model_path=clip_text_encoder_path, loader=self._load_flux_text_encoder - ) as clip_text_encoder, - context.models.load_local_model( - model_path=t5_text_encoder_path, loader=self._load_flux_text_encoder_2 - ) as t5_text_encoder, - ): - assert isinstance(clip_text_encoder, CLIPTextModel) - assert isinstance(t5_text_encoder, T5EncoderModel) - pipeline = FluxPipeline( - scheduler=None, - vae=None, - text_encoder=clip_text_encoder, - tokenizer=clip_tokenizer, - text_encoder_2=t5_text_encoder, - tokenizer_2=t5_tokenizer, - transformer=None, - ) - - # prompt_embeds: T5 embeddings - # pooled_prompt_embeds: CLIP embeddings - prompt_embeds, pooled_prompt_embeds, text_ids = pipeline.encode_prompt( - prompt=self.positive_prompt, - prompt_2=self.positive_prompt, - device=TorchDevice.choose_torch_device(), - max_sequence_length=max_seq_len, - ) - - assert isinstance(prompt_embeds, torch.Tensor) - assert isinstance(pooled_prompt_embeds, torch.Tensor) - return prompt_embeds, pooled_prompt_embeds - def _run_diffusion( self, context: InvocationContext, @@ -199,44 +158,6 @@ def _run_vae_decoding( assert isinstance(image, Image.Image) return image - @staticmethod - def _load_flux_text_encoder(path: Path) -> CLIPTextModel: - model = CLIPTextModel.from_pretrained(path, local_files_only=True) - assert isinstance(model, CLIPTextModel) - return model - - def _load_flux_text_encoder_2(self, path: Path) -> T5EncoderModel: - if self.use_8bit: - model_8bit_path = path / "quantized" - if model_8bit_path.exists(): - # The quantized model exists, load it. - # TODO(ryand): The requantize(...) operation in from_pretrained(...) is very slow. This seems like - # something that we should be able to make much faster. - q_model = QuantizedModelForTextEncoding.from_pretrained(model_8bit_path) - - # Access the underlying wrapped model. - # We access the wrapped model, even though it is private, because it simplifies the type checking by - # always returning a T5EncoderModel from this function. - model = q_model._wrapped - else: - # The quantized model does not exist yet, quantize and save it. - # TODO(ryand): dtype? - model = T5EncoderModel.from_pretrained(path, local_files_only=True) - assert isinstance(model, T5EncoderModel) - - q_model = QuantizedModelForTextEncoding.quantize(model, weights=qfloat8) - - model_8bit_path.mkdir(parents=True, exist_ok=True) - q_model.save_pretrained(model_8bit_path) - - # (See earlier comment about accessing the wrapped model.) - model = q_model._wrapped - else: - model = T5EncoderModel.from_pretrained(path, local_files_only=True) - - assert isinstance(model, T5EncoderModel) - return model - def _load_flux_transformer(self, path: Path) -> FluxTransformer2DModel: if self.use_8bit: model_8bit_path = path / "quantized" diff --git a/invokeai/backend/stable_diffusion/diffusion/conditioning_data.py b/invokeai/backend/stable_diffusion/diffusion/conditioning_data.py index 5fe1483ebc9..c5fda909c72 100644 --- a/invokeai/backend/stable_diffusion/diffusion/conditioning_data.py +++ b/invokeai/backend/stable_diffusion/diffusion/conditioning_data.py @@ -25,11 +25,6 @@ def to(self, device, dtype=None): return self -@dataclass -class ConditioningFieldData: - conditionings: List[BasicConditioningInfo] - - @dataclass class SDXLConditioningInfo(BasicConditioningInfo): """SDXL text conditioning information produced by Compel.""" @@ -43,6 +38,17 @@ def to(self, device, dtype=None): return super().to(device=device, dtype=dtype) +@dataclass +class FLUXConditioningInfo: + clip_embeds: torch.Tensor + t5_embeds: torch.Tensor + + +@dataclass +class ConditioningFieldData: + conditionings: List[BasicConditioningInfo] | List[SDXLConditioningInfo] | List[FLUXConditioningInfo] + + @dataclass class IPAdapterConditioningInfo: cond_image_prompt_embeds: torch.Tensor From 06d35c3ff86472af6a7357d55d9aa530fa799e4d Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Wed, 14 Aug 2024 04:06:16 +0000 Subject: [PATCH 016/113] wip --- invokeai/backend/bnb.py | 616 ++++++++++++++++++ .../backend/load_flux_model_bnb_llm_int8.py | 124 ++++ invokeai/backend/load_flux_model_bnb_nf4.py | 100 +++ pyproject.toml | 1 + 4 files changed, 841 insertions(+) create mode 100644 invokeai/backend/bnb.py create mode 100644 invokeai/backend/load_flux_model_bnb_llm_int8.py create mode 100644 invokeai/backend/load_flux_model_bnb_nf4.py diff --git a/invokeai/backend/bnb.py b/invokeai/backend/bnb.py new file mode 100644 index 00000000000..766de08f6da --- /dev/null +++ b/invokeai/backend/bnb.py @@ -0,0 +1,616 @@ +from typing import Any, Optional, Set, Tuple, Type + +import accelerate +import bitsandbytes as bnb +import torch + +# The utils in this file take ideas from +# https://github.com/Lightning-AI/pytorch-lightning/blob/1551a16b94f5234a4a78801098f64d0732ef5cb5/src/lightning/fabric/plugins/precision/bitsandbytes.py + + +# Patterns: +# - Quantize: +# - Initialize model on meta device +# - Replace layers +# - Load state_dict to cpu +# - Load state_dict into model +# - Quantize on GPU +# - Extract state_dict +# - Save + +# - Load: +# - Initialize model on meta device +# - Replace layers +# - Load state_dict to cpu +# - Load state_dict into model on cpu +# - Move to GPU + + +# class InvokeInt8Params(bnb.nn.Int8Params): +# """Overrides `bnb.nn.Int8Params` to add the following functionality: +# - Make it possible to load a quantized state dict without putting the weight on a "cuda" device. +# """ + +# def quantize(self, device: Optional[torch.device] = None): +# device = device or torch.device("cuda") +# if device.type != "cuda": +# raise RuntimeError(f"Int8Params quantization is only supported on CUDA devices ({device=}).") + +# # https://github.com/TimDettmers/bitsandbytes/blob/0.41.0/bitsandbytes/nn/modules.py#L291-L302 +# B = self.data.contiguous().half().cuda(device) +# if self.has_fp16_weights: +# self.data = B +# else: +# # we store the 8-bit rows-major weight +# # we convert this weight to the turning/ampere weight during the first inference pass +# CB, CBt, SCB, SCBt, coo_tensorB = bnb.functional.double_quant(B) +# del CBt +# del SCBt +# self.data = CB +# self.CB = CB +# self.SCB = SCB + + +class Invoke2Linear8bitLt(torch.nn.Linear): + """This class is the base module for the [LLM.int8()](https://arxiv.org/abs/2208.07339) algorithm.""" + + def __init__( + self, + input_features: int, + output_features: int, + bias=True, + has_fp16_weights=True, + memory_efficient_backward=False, + threshold=0.0, + index=None, + device=None, + ): + """ + Initialize Linear8bitLt class. + + Args: + input_features (`int`): + Number of input features of the linear layer. + output_features (`int`): + Number of output features of the linear layer. + bias (`bool`, defaults to `True`): + Whether the linear class uses the bias term as well. + """ + super().__init__(input_features, output_features, bias, device) + assert not memory_efficient_backward, "memory_efficient_backward is no longer required and the argument is deprecated in 0.37.0 and will be removed in 0.39.0" + self.state = bnb.MatmulLtState() + self.index = index + + self.state.threshold = threshold + self.state.has_fp16_weights = has_fp16_weights + self.state.memory_efficient_backward = memory_efficient_backward + if threshold > 0.0 and not has_fp16_weights: + self.state.use_pool = True + + self.weight = Int8Params(self.weight.data, has_fp16_weights=has_fp16_weights, requires_grad=has_fp16_weights) + self._register_load_state_dict_pre_hook(maybe_rearrange_weight) + + def _save_to_state_dict(self, destination, prefix, keep_vars): + super()._save_to_state_dict(destination, prefix, keep_vars) + + # we only need to save SCB as extra data, because CB for quantized weights is already stored in weight.data + scb_name = "SCB" + + # case 1: .cuda was called, SCB is in self.weight + param_from_weight = getattr(self.weight, scb_name) + # case 2: self.init_8bit_state was called, SCB is in self.state + param_from_state = getattr(self.state, scb_name) + # case 3: SCB is in self.state, weight layout reordered after first forward() + layout_reordered = self.state.CxB is not None + + key_name = prefix + f"{scb_name}" + format_name = prefix + "weight_format" + + if not self.state.has_fp16_weights: + if param_from_weight is not None: + destination[key_name] = param_from_weight if keep_vars else param_from_weight.detach() + destination[format_name] = torch.tensor(0, dtype=torch.uint8) + elif param_from_state is not None and not layout_reordered: + destination[key_name] = param_from_state if keep_vars else param_from_state.detach() + destination[format_name] = torch.tensor(0, dtype=torch.uint8) + elif param_from_state is not None: + destination[key_name] = param_from_state if keep_vars else param_from_state.detach() + weights_format = self.state.formatB + # At this point `weights_format` is an str + if weights_format not in LINEAR_8BIT_WEIGHTS_FORMAT_MAPPING: + raise ValueError(f"Unrecognized weights format {weights_format}") + + weights_format = LINEAR_8BIT_WEIGHTS_FORMAT_MAPPING[weights_format] + + destination[format_name] = torch.tensor(weights_format, dtype=torch.uint8) + + def _load_from_state_dict( + self, + state_dict, + prefix, + local_metadata, + strict, + missing_keys, + unexpected_keys, + error_msgs, + ): + super()._load_from_state_dict( + state_dict, + prefix, + local_metadata, + strict, + missing_keys, + unexpected_keys, + error_msgs, + ) + unexpected_copy = list(unexpected_keys) + + for key in unexpected_copy: + input_name = key[len(prefix) :] + if input_name == "SCB": + if self.weight.SCB is None: + # buffers not yet initialized, can't access them directly without quantizing first + raise RuntimeError( + "Loading a quantized checkpoint into non-quantized Linear8bitLt is " + "not supported. Please call module.cuda() before module.load_state_dict()", + ) + + input_param = state_dict[key] + self.weight.SCB.copy_(input_param) + + if self.state.SCB is not None: + self.state.SCB = self.weight.SCB + + unexpected_keys.remove(key) + + def init_8bit_state(self): + self.state.CB = self.weight.CB + self.state.SCB = self.weight.SCB + self.weight.CB = None + self.weight.SCB = None + + def forward(self, x: torch.Tensor): + self.state.is_training = self.training + if self.weight.CB is not None: + self.init_8bit_state() + + # weights are cast automatically as Int8Params, but the bias has to be cast manually + if self.bias is not None and self.bias.dtype != x.dtype: + self.bias.data = self.bias.data.to(x.dtype) + + out = bnb.matmul(x, self.weight, bias=self.bias, state=self.state) + + if not self.state.has_fp16_weights: + if self.state.CB is not None and self.state.CxB is not None: + # we converted 8-bit row major to turing/ampere format in the first inference pass + # we no longer need the row-major weight + del self.state.CB + self.weight.data = self.state.CxB + return out + + +class InvokeLinear8bitLt(bnb.nn.Linear8bitLt): + """Wraps `bnb.nn.Linear8bitLt` and adds the following functionality: + - enables instantiation directly on the device + - re-quantizaton when loading the state dict + """ + + def __init__( + self, *args: Any, device: Optional[torch.device] = None, threshold: float = 6.0, **kwargs: Any + ) -> None: + super().__init__(*args, device=device, threshold=threshold, **kwargs) + # If the device is CUDA or we are under a CUDA context manager, quantize the weight here, so we don't end up + # filling the device memory with float32 weights which could lead to OOM + # if torch.tensor(0, device=device).device.type == "cuda": + # self.quantize_() + # self._register_load_state_dict_pre_hook(partial(_quantize_on_load_hook, self.quantize_)) + # self.register_load_state_dict_post_hook(_ignore_missing_weights_hook) + + def _load_from_state_dict( + self, + state_dict, + prefix, + local_metadata, + strict, + missing_keys, + unexpected_keys, + error_msgs, + ): + super()._load_from_state_dict( + state_dict, + prefix, + local_metadata, + strict, + missing_keys, + unexpected_keys, + error_msgs, + ) + unexpected_copy = list(unexpected_keys) + + for key in unexpected_copy: + input_name = key[len(prefix) :] + if input_name == "SCB": + if self.weight.SCB is None: + # buffers not yet initialized, can't access them directly without quantizing first + raise RuntimeError( + "Loading a quantized checkpoint into non-quantized Linear8bitLt is " + "not supported. Please call module.cuda() before module.load_state_dict()", + ) + + input_param = state_dict[key] + self.weight.SCB.copy_(input_param) + + if self.state.SCB is not None: + self.state.SCB = self.weight.SCB + + unexpected_keys.remove(key) + + def quantize_(self, weight: Optional[torch.Tensor] = None, device: Optional[torch.device] = None) -> None: + """Inplace quantize.""" + if weight is None: + weight = self.weight.data + if weight.data.dtype == torch.int8: + # already quantized + return + assert isinstance(self.weight, bnb.nn.Int8Params) + self.weight = self.quantize(self.weight, weight, device) + + @staticmethod + def quantize( + int8params: bnb.nn.Int8Params, weight: torch.Tensor, device: Optional[torch.device] + ) -> bnb.nn.Int8Params: + device = device or torch.device("cuda") + if device.type != "cuda": + raise RuntimeError(f"Unexpected device type: {device.type}") + # https://github.com/TimDettmers/bitsandbytes/blob/0.41.0/bitsandbytes/nn/modules.py#L291-L302 + B = weight.contiguous().to(device=device, dtype=torch.float16) + if int8params.has_fp16_weights: + int8params.data = B + else: + CB, CBt, SCB, SCBt, _ = bnb.functional.double_quant(B) + del CBt + del SCBt + int8params.data = CB + int8params.CB = CB + int8params.SCB = SCB + return int8params + + +# class _Linear4bit(bnb.nn.Linear4bit): +# """Wraps `bnb.nn.Linear4bit` to enable: instantiation directly on the device, re-quantizaton when loading the +# state dict, meta-device initialization, and materialization.""" + +# def __init__(self, *args: Any, device: Optional[torch.device] = None, **kwargs: Any) -> None: +# super().__init__(*args, device=device, **kwargs) +# self.weight = cast(bnb.nn.Params4bit, self.weight) # type: ignore[has-type] +# self.bias = cast(Optional[torch.nn.Parameter], self.bias) # type: ignore[has-type] +# # if the device is CUDA or we are under a CUDA context manager, quantize the weight here, so we don't end up +# # filling the device memory with float32 weights which could lead to OOM +# if torch.tensor(0, device=device).device.type == "cuda": +# self.quantize_() +# self._register_load_state_dict_pre_hook(partial(_quantize_on_load_hook, self.quantize_)) +# self.register_load_state_dict_post_hook(_ignore_missing_weights_hook) + +# def quantize_(self, weight: Optional[torch.Tensor] = None, device: Optional[torch.device] = None) -> None: +# """Inplace quantize.""" +# if weight is None: +# weight = self.weight.data +# if weight.data.dtype == torch.uint8: +# # already quantized +# return +# assert isinstance(self.weight, bnb.nn.Params4bit) +# self.weight = self.quantize(self.weight, weight, device) + +# @staticmethod +# def quantize( +# params4bit: bnb.nn.Params4bit, weight: torch.Tensor, device: Optional[torch.device] +# ) -> bnb.nn.Params4bit: +# device = device or torch.device("cuda") +# if device.type != "cuda": +# raise RuntimeError(f"Unexpected device type: {device.type}") +# # https://github.com/TimDettmers/bitsandbytes/blob/0.41.0/bitsandbytes/nn/modules.py#L156-L159 +# w = weight.contiguous().to(device=device, dtype=torch.half) +# w_4bit, quant_state = bnb.functional.quantize_4bit( +# w, +# blocksize=params4bit.blocksize, +# compress_statistics=params4bit.compress_statistics, +# quant_type=params4bit.quant_type, +# ) +# return _replace_param(params4bit, w_4bit, quant_state) + +# def to_empty(self, *, device: _DEVICE, recurse: bool = True) -> Self: +# if self.weight.dtype == torch.uint8: # was quantized +# # cannot init the quantized params directly +# weight = torch.empty(self.weight.quant_state.shape, device=device, dtype=torch.half) +# else: +# weight = torch.empty_like(self.weight.data, device=device) +# device = torch.device(device) +# if device.type == "cuda": # re-quantize +# self.quantize_(weight, device) +# else: +# self.weight = _replace_param(self.weight, weight) +# if self.bias is not None: +# self.bias = _replace_param(self.bias, torch.empty_like(self.bias, device=device)) +# return self + + +def convert_model_to_bnb_llm_int8(model: torch.nn.Module, ignore_modules: set[str]): + linear_cls = InvokeLinear8bitLt + _convert_linear_layers(model, linear_cls, ignore_modules) + + # TODO(ryand): Is this necessary? + # set the compute dtype if necessary + # for m in model.modules(): + # if isinstance(m, bnb.nn.Linear4bit): + # m.compute_dtype = self.dtype + # m.compute_type_is_set = False + + +# class BitsandbytesPrecision(Precision): +# """Plugin for quantizing weights with `bitsandbytes `__. + +# .. warning:: This is an :ref:`experimental ` feature. + +# .. note:: +# The optimizer is not automatically replaced with ``bitsandbytes.optim.Adam8bit`` or equivalent 8-bit optimizers. + +# Args: +# mode: The quantization mode to use. +# dtype: The compute dtype to use. +# ignore_modules: The submodules whose Linear layers should not be replaced, for example. ``{"lm_head"}``. +# This might be desirable for numerical stability. The string will be checked in as a prefix, so a value like +# "transformer.blocks" will ignore all linear layers in all of the transformer blocks. +# """ + +# def __init__( +# self, +# mode: Literal["nf4", "nf4-dq", "fp4", "fp4-dq", "int8", "int8-training"], +# dtype: Optional[torch.dtype] = None, +# ignore_modules: Optional[Set[str]] = None, +# ) -> None: +# if dtype is None: +# # try to be smart about the default selection +# if mode.startswith("int8"): +# dtype = torch.float16 +# else: +# dtype = ( +# torch.bfloat16 if torch.cuda.is_available() and torch.cuda.is_bf16_supported() else torch.float16 +# ) +# if mode.startswith("int8") and dtype is not torch.float16: +# # this limitation is mentioned in https://huggingface.co/blog/hf-bitsandbytes-integration#usage +# raise ValueError(f"{mode!r} only works with `dtype=torch.float16`, but you chose `{dtype}`") + +# globals_ = globals() +# mode_to_cls = { +# "nf4": globals_["_NF4Linear"], +# "nf4-dq": globals_["_NF4DQLinear"], +# "fp4": globals_["_FP4Linear"], +# "fp4-dq": globals_["_FP4DQLinear"], +# "int8-training": globals_["_Linear8bitLt"], +# "int8": globals_["_Int8LinearInference"], +# } +# self._linear_cls = mode_to_cls[mode] +# self.dtype = dtype +# self.ignore_modules = ignore_modules or set() + +# @override +# def convert_module(self, module: torch.nn.Module) -> torch.nn.Module: +# # avoid naive users thinking they quantized their model +# if not any(isinstance(m, torch.nn.Linear) for m in module.modules()): +# raise TypeError( +# "You are using the bitsandbytes precision plugin, but your model has no Linear layers. This plugin" +# " won't work for your model." +# ) + +# # convert modules if they haven't been converted already +# if not any(isinstance(m, (bnb.nn.Linear8bitLt, bnb.nn.Linear4bit)) for m in module.modules()): +# # this will not quantize the model but only replace the layer classes +# _convert_layers(module, self._linear_cls, self.ignore_modules) + +# # set the compute dtype if necessary +# for m in module.modules(): +# if isinstance(m, bnb.nn.Linear4bit): +# m.compute_dtype = self.dtype +# m.compute_type_is_set = False +# return module + + +# def _quantize_on_load_hook(quantize_fn: Callable[[torch.Tensor], None], state_dict: OrderedDict, *_: Any) -> None: +# # There is only one key that ends with `*.weight`, the other one is the bias +# weight_key = next((name for name in state_dict if name.endswith("weight")), None) +# if weight_key is None: +# return +# # Load the weight from the state dict and re-quantize it +# weight = state_dict.pop(weight_key) +# quantize_fn(weight) + + +# def _ignore_missing_weights_hook(module: torch.nn.Module, incompatible_keys: _IncompatibleKeys) -> None: +# # since we manually loaded the weight in the `_quantize_on_load_hook` hook, we need to avoid this missing key false +# # positive +# for key in reversed(incompatible_keys.missing_keys): +# if key.endswith("weight"): +# incompatible_keys.missing_keys.remove(key) + + +def _replace_param( + param: torch.nn.Parameter, data: torch.Tensor, quant_state: Optional[Tuple] = None +) -> torch.nn.Parameter: + # doing `param.data = weight` raises a RuntimeError if param.data was on meta-device, so + # we need to re-create the parameters instead of overwriting the data + if param.device.type == "meta": + if isinstance(param, bnb.nn.Params4bit): + return bnb.nn.Params4bit( + data, + requires_grad=data.requires_grad, + quant_state=quant_state, + compress_statistics=param.compress_statistics, + quant_type=param.quant_type, + ) + return torch.nn.Parameter(data, requires_grad=data.requires_grad) + param.data = data + if isinstance(param, bnb.nn.Params4bit): + param.quant_state = quant_state + return param + + +def _convert_linear_layers( + module: torch.nn.Module, linear_cls: Type, ignore_modules: Set[str], prefix: str = "" +) -> None: + for name, child in module.named_children(): + fullname = f"{prefix}.{name}" if prefix else name + if isinstance(child, torch.nn.Linear) and not any(fullname.startswith(s) for s in ignore_modules): + has_bias = child.bias is not None + # since we are going to copy over the child's data, the device doesn't matter. I chose CPU + # to avoid spiking CUDA memory even though initialization is slower + # 4bit layers support quantizing from meta-device params so this is only relevant for 8-bit + _Linear4bit = globals()["_Linear4bit"] + device = torch.device("meta" if issubclass(linear_cls, _Linear4bit) else "cpu") + replacement = linear_cls( + child.in_features, + child.out_features, + bias=has_bias, + device=device, + ) + if has_bias: + replacement.bias = _replace_param(replacement.bias, child.bias.data.clone()) + state = {"quant_state": replacement.weight.quant_state if issubclass(linear_cls, _Linear4bit) else None} + replacement.weight = _replace_param(replacement.weight, child.weight.data.clone(), **state) + module.__setattr__(name, replacement) + else: + _convert_linear_layers(child, linear_cls, ignore_modules, prefix=fullname) + + +def _convert_linear_layers_to_llm_8bit(module: torch.nn.Module, ignore_modules: Set[str], prefix: str = "") -> None: + for name, child in module.named_children(): + fullname = f"{prefix}.{name}" if prefix else name + if isinstance(child, torch.nn.Linear) and not any(fullname.startswith(s) for s in ignore_modules): + has_bias = child.bias is not None + replacement = InvokeLinear8bitLt( + child.in_features, + child.out_features, + bias=has_bias, + has_fp16_weights=False, + # device=device, + ) + replacement.weight.data = child.weight.data + if has_bias: + replacement.bias.data = child.bias.data + replacement.requires_grad_(False) + module.__setattr__(name, replacement) + else: + _convert_linear_layers_to_llm_8bit(child, ignore_modules, prefix=fullname) + + +def _convert_linear_layers_to_nf4( + module: torch.nn.Module, ignore_modules: Set[str], compute_dtype: torch.dtype, prefix: str = "" +) -> None: + for name, child in module.named_children(): + fullname = f"{prefix}.{name}" if prefix else name + if isinstance(child, torch.nn.Linear) and not any(fullname.startswith(s) for s in ignore_modules): + has_bias = child.bias is not None + replacement = bnb.nn.Linear4bit( + child.in_features, + child.out_features, + bias=has_bias, + compute_dtype=torch.float16, + # TODO(ryand): Test compress_statistics=True. + # compress_statistics=True, + ) + replacement.weight.data = child.weight.data + if has_bias: + replacement.bias.data = child.bias.data + replacement.requires_grad_(False) + module.__setattr__(name, replacement) + else: + _convert_linear_layers_to_nf4(child, ignore_modules, compute_dtype=compute_dtype, prefix=fullname) + + +# def _replace_linear_layers( +# model: torch.nn.Module, +# linear_layer_type: Literal["Linear8bitLt", "Linear4bit"], +# modules_to_not_convert: set[str], +# current_key_name: str | None = None, +# ): +# has_been_replaced = False +# for name, module in model.named_children(): +# if current_key_name is None: +# current_key_name = [] +# current_key_name.append(name) +# if isinstance(module, torch.nn.Linear) and name not in modules_to_not_convert: +# # Check if the current key is not in the `modules_to_not_convert` +# current_key_name_str = ".".join(current_key_name) +# proceed = True +# for key in modules_to_not_convert: +# if ( +# (key in current_key_name_str) and (key + "." in current_key_name_str) +# ) or key == current_key_name_str: +# proceed = False +# break +# if proceed: +# # Load bnb module with empty weight and replace ``nn.Linear` module +# if bnb_quantization_config.load_in_8bit: +# bnb_module = bnb.nn.Linear8bitLt( +# module.in_features, +# module.out_features, +# module.bias is not None, +# has_fp16_weights=False, +# threshold=bnb_quantization_config.llm_int8_threshold, +# ) +# elif bnb_quantization_config.load_in_4bit: +# bnb_module = bnb.nn.Linear4bit( +# module.in_features, +# module.out_features, +# module.bias is not None, +# bnb_quantization_config.bnb_4bit_compute_dtype, +# compress_statistics=bnb_quantization_config.bnb_4bit_use_double_quant, +# quant_type=bnb_quantization_config.bnb_4bit_quant_type, +# ) +# else: +# raise ValueError("load_in_8bit and load_in_4bit can't be both False") +# bnb_module.weight.data = module.weight.data +# if module.bias is not None: +# bnb_module.bias.data = module.bias.data +# bnb_module.requires_grad_(False) +# setattr(model, name, bnb_module) +# has_been_replaced = True +# if len(list(module.children())) > 0: +# _, _has_been_replaced = _replace_with_bnb_layers( +# module, bnb_quantization_config, modules_to_not_convert, current_key_name +# ) +# has_been_replaced = has_been_replaced | _has_been_replaced +# # Remove the last key for recursion +# current_key_name.pop(-1) +# return model, has_been_replaced + + +def get_parameter_device(parameter: torch.nn.Module): + return next(parameter.parameters()).device + + +def quantize_model_llm_int8(model: torch.nn.Module, modules_to_not_convert: set[str]): + """Apply bitsandbytes LLM.8bit() quantization to the model.""" + model_device = get_parameter_device(model) + if model_device.type != "meta": + # Note: This is not strictly required, but I can't think of a good reason to quantize a model that's not on the + # meta device, so we enforce it for now. + raise RuntimeError("The model should be on the meta device to apply LLM.8bit() quantization.") + + with accelerate.init_empty_weights(): + _convert_linear_layers_to_llm_8bit(module=model, ignore_modules=modules_to_not_convert) + + return model + + +def quantize_model_nf4(model: torch.nn.Module, modules_to_not_convert: set[str], compute_dtype: torch.dtype): + """Apply bitsandbytes nf4 quantization to the model.""" + # model_device = get_parameter_device(model) + # if model_device.type != "meta": + # # Note: This is not strictly required, but I can't think of a good reason to quantize a model that's not on the + # # meta device, so we enforce it for now. + # raise RuntimeError("The model should be on the meta device to apply LLM.8bit() quantization.") + + # with accelerate.init_empty_weights(): + _convert_linear_layers_to_nf4(module=model, ignore_modules=modules_to_not_convert, compute_dtype=compute_dtype) + + return model diff --git a/invokeai/backend/load_flux_model_bnb_llm_int8.py b/invokeai/backend/load_flux_model_bnb_llm_int8.py new file mode 100644 index 00000000000..f7e1471928e --- /dev/null +++ b/invokeai/backend/load_flux_model_bnb_llm_int8.py @@ -0,0 +1,124 @@ +import time +from pathlib import Path + +import accelerate +import torch +from accelerate.utils import BnbQuantizationConfig, load_and_quantize_model +from accelerate.utils.bnb import get_keys_to_not_convert +from diffusers.models.transformers.transformer_flux import FluxTransformer2DModel +from safetensors.torch import load_file + +from invokeai.backend.bnb import quantize_model_llm_int8 + +# Docs: +# https://huggingface.co/docs/accelerate/usage_guides/quantization +# https://huggingface.co/docs/bitsandbytes/v0.43.3/en/integrations#accelerate + + +def get_parameter_device(parameter: torch.nn.Module): + return next(parameter.parameters()).device + + +# def quantize_model_llm_int8(model: torch.nn.Module, modules_to_not_convert: set[str], llm_int8_threshold: int = 6): +# """Apply bitsandbytes LLM.8bit() quantization to the model.""" +# model_device = get_parameter_device(model) +# if model_device.type != "meta": +# # Note: This is not strictly required, but I can't think of a good reason to quantize a model that's not on the +# # meta device, so we enforce it for now. +# raise RuntimeError("The model should be on the meta device to apply LLM.8bit() quantization.") + +# bnb_quantization_config = BnbQuantizationConfig( +# load_in_8bit=True, +# llm_int8_threshold=llm_int8_threshold, +# ) + +# with accelerate.init_empty_weights(): +# model = replace_with_bnb_layers(model, bnb_quantization_config, modules_to_not_convert=modules_to_not_convert) + +# return model + + +def load_flux_transformer(path: Path) -> FluxTransformer2DModel: + model_config = FluxTransformer2DModel.load_config(path, local_files_only=True) + with accelerate.init_empty_weights(): + empty_model = FluxTransformer2DModel.from_config(model_config) + assert isinstance(empty_model, FluxTransformer2DModel) + + bnb_quantization_config = BnbQuantizationConfig( + load_in_8bit=True, + llm_int8_threshold=6, + ) + + model_8bit_path = path / "bnb_llm_int8" + if model_8bit_path.exists(): + # The quantized model already exists, load it and return it. + # Note that the model loading code is the same when loading from quantized vs original weights. The only + # difference is the weights_location. + # model = load_and_quantize_model( + # empty_model, + # weights_location=model_8bit_path, + # bnb_quantization_config=bnb_quantization_config, + # # device_map="auto", + # device_map={"": "cpu"}, + # ) + + # TODO: Handle the keys that were not quantized (get_keys_to_not_convert). + model = quantize_model_llm_int8(empty_model, modules_to_not_convert=set()) + + # model = quantize_model_llm_int8(empty_model, set()) + + # Load sharded state dict. + files = list(path.glob("*.safetensors")) + state_dict = dict() + for file in files: + sd = load_file(file) + state_dict.update(sd) + + else: + # The quantized model does not exist yet, quantize and save it. + model = load_and_quantize_model( + empty_model, + weights_location=path, + bnb_quantization_config=bnb_quantization_config, + device_map="auto", + ) + + keys_to_not_convert = get_keys_to_not_convert(empty_model) # TODO + + model_8bit_path.mkdir(parents=True, exist_ok=True) + accl = accelerate.Accelerator() + accl.save_model(model, model_8bit_path) + + # --------------------- + + # model = quantize_model_llm_int8(empty_model, set()) + + # # Load sharded state dict. + # files = list(path.glob("*.safetensors")) + # state_dict = dict() + # for file in files: + # sd = load_file(file) + # state_dict.update(sd) + + # # Load the state dict into the model. The bitsandbytes layers know how to load from both quantized and + # # non-quantized state dicts. + # result = model.load_state_dict(state_dict, strict=True) + # model = model.to("cuda") + + # --------------------- + + assert isinstance(model, FluxTransformer2DModel) + return model + + +def main(): + start = time.time() + model = load_flux_transformer( + Path("/data/invokeai/models/.download_cache/black-forest-labs_flux.1-schnell/FLUX.1-schnell/transformer/") + ) + print(f"Time to load: {time.time() - start}s") + print("hi") + + +if __name__ == "__main__": + main() diff --git a/invokeai/backend/load_flux_model_bnb_nf4.py b/invokeai/backend/load_flux_model_bnb_nf4.py new file mode 100644 index 00000000000..1629c5a01c6 --- /dev/null +++ b/invokeai/backend/load_flux_model_bnb_nf4.py @@ -0,0 +1,100 @@ +import time +from pathlib import Path + +import accelerate +import torch +from diffusers.models.transformers.transformer_flux import FluxTransformer2DModel +from safetensors.torch import load_file, save_file + +from invokeai.backend.bnb import quantize_model_nf4 + +# Docs: +# https://huggingface.co/docs/accelerate/usage_guides/quantization +# https://huggingface.co/docs/bitsandbytes/v0.43.3/en/integrations#accelerate + + +def get_parameter_device(parameter: torch.nn.Module): + return next(parameter.parameters()).device + + +def load_flux_transformer(path: Path) -> FluxTransformer2DModel: + model_config = FluxTransformer2DModel.load_config(path, local_files_only=True) + with accelerate.init_empty_weights(): + empty_model = FluxTransformer2DModel.from_config(model_config) + assert isinstance(empty_model, FluxTransformer2DModel) + + model_nf4_path = path / "bnb_nf4" + if model_nf4_path.exists(): + # The quantized model already exists, load it and return it. + # Note that the model loading code is the same when loading from quantized vs original weights. The only + # difference is the weights_location. + # model = load_and_quantize_model( + # empty_model, + # weights_location=model_8bit_path, + # bnb_quantization_config=bnb_quantization_config, + # # device_map="auto", + # device_map={"": "cpu"}, + # ) + + # TODO: Handle the keys that were not quantized (get_keys_to_not_convert). + with accelerate.init_empty_weights(): + model = quantize_model_nf4(empty_model, modules_to_not_convert=set(), compute_dtype=torch.bfloat16) + + model.to_empty(device="cpu") + sd = load_file(model_nf4_path / "model.safetensors") + model.load_state_dict(sd, strict=True) + + else: + # The quantized model does not exist yet, quantize and save it. + # model = load_and_quantize_model( + # empty_model, + # weights_location=path, + # bnb_quantization_config=bnb_quantization_config, + # device_map="auto", + # ) + + # keys_to_not_convert = get_keys_to_not_convert(empty_model) # TODO + + # model_8bit_path.mkdir(parents=True, exist_ok=True) + # accl = accelerate.Accelerator() + # accl.save_model(model, model_8bit_path) + + # --------------------- + + # Load sharded state dict. + files = list(path.glob("*.safetensors")) + state_dict = dict() + for file in files: + sd = load_file(file) + state_dict.update(sd) + + empty_model.load_state_dict(state_dict, strict=True, assign=True) + model = quantize_model_nf4(empty_model, modules_to_not_convert=set(), compute_dtype=torch.bfloat16) + + # Load the state dict into the model. The bitsandbytes layers know how to load from both quantized and + # non-quantized state dicts. + # model.to_empty(device="cpu") + # model.to(dtype=torch.float16) + # result = model.load_state_dict(state_dict, strict=True) + model = model.to("cuda") + + model_nf4_path.mkdir(parents=True, exist_ok=True) + save_file(model.state_dict(), model_nf4_path / "model.safetensors") + + # --------------------- + + assert isinstance(model, FluxTransformer2DModel) + return model + + +def main(): + start = time.time() + model = load_flux_transformer( + Path("/data/invokeai/models/.download_cache/black-forest-labs_flux.1-schnell/FLUX.1-schnell/transformer/") + ) + print(f"Time to load: {time.time() - start}s") + print("hi") + + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml index c6dc025a001..768d2184342 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,6 +34,7 @@ classifiers = [ dependencies = [ # Core generation dependencies, pinned for reproducible builds. "accelerate==0.33.0", + "bitsandbytes==0.43.3", "clip_anytorch==2.6.0", # replacing "clip @ https://github.com/openai/CLIP/archive/eaa22acb90a5876642d0507623e859909230a52d.zip", "compel==2.0.2", "controlnet-aux==0.0.7", From b1cf2f5858a49f299734627da102c1a61d35d7fa Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Wed, 14 Aug 2024 14:47:03 +0000 Subject: [PATCH 017/113] NF4 loading working... I think. --- invokeai/backend/bnb.py | 38 ++++++++++++++++++++- invokeai/backend/load_flux_model_bnb_nf4.py | 5 +-- 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/invokeai/backend/bnb.py b/invokeai/backend/bnb.py index 766de08f6da..d0cb6f7c995 100644 --- a/invokeai/backend/bnb.py +++ b/invokeai/backend/bnb.py @@ -51,6 +51,42 @@ # self.SCB = SCB +class InvokeLinear4Bit(bnb.nn.Linear4bit): + def _load_from_state_dict( + self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs + ): + """This method is based on the logic in the bitsandbytes serialization unit tests for `Linear4bit`: + https://github.com/bitsandbytes-foundation/bitsandbytes/blob/6d714a5cce3db5bd7f577bc447becc7a92d5ccc7/tests/test_linear4bit.py#L52-L71 + + I'm not sure why this was not included in the original `Linear4bit` implementation. + """ + # During serialization, the quant_state is stored as subkeys of "weight.". Here we extract those keys. + quant_state_keys = [k for k in state_dict.keys() if k.startswith(prefix + "weight.")] + + if len(quant_state_keys) > 0: + # We are loading a quantized state dict. + quant_state_sd = {k: state_dict.pop(k) for k in quant_state_keys} + weight = state_dict.pop(prefix + "weight") + bias = state_dict.pop(prefix + "bias", None) + + if len(state_dict) != 0: + raise RuntimeError(f"Unexpected keys in state_dict: {state_dict.keys()}") + + self.weight = bnb.nn.Params4bit.from_prequantized( + data=weight, quantized_stats=quant_state_sd, device=weight.device + ) + if bias is None: + self.bias = None + else: + self.bias = torch.nn.Parameter(bias) + + else: + # We are loading a non-quantized state dict. + return super()._load_from_state_dict( + state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs + ) + + class Invoke2Linear8bitLt(torch.nn.Linear): """This class is the base module for the [LLM.int8()](https://arxiv.org/abs/2208.07339) algorithm.""" @@ -509,7 +545,7 @@ def _convert_linear_layers_to_nf4( fullname = f"{prefix}.{name}" if prefix else name if isinstance(child, torch.nn.Linear) and not any(fullname.startswith(s) for s in ignore_modules): has_bias = child.bias is not None - replacement = bnb.nn.Linear4bit( + replacement = InvokeLinear4Bit( child.in_features, child.out_features, bias=has_bias, diff --git a/invokeai/backend/load_flux_model_bnb_nf4.py b/invokeai/backend/load_flux_model_bnb_nf4.py index 1629c5a01c6..1a4e67c1c70 100644 --- a/invokeai/backend/load_flux_model_bnb_nf4.py +++ b/invokeai/backend/load_flux_model_bnb_nf4.py @@ -4,7 +4,7 @@ import accelerate import torch from diffusers.models.transformers.transformer_flux import FluxTransformer2DModel -from safetensors.torch import load_file, save_file +from safetensors.torch import load_file from invokeai.backend.bnb import quantize_model_nf4 @@ -43,6 +43,7 @@ def load_flux_transformer(path: Path) -> FluxTransformer2DModel: model.to_empty(device="cpu") sd = load_file(model_nf4_path / "model.safetensors") model.load_state_dict(sd, strict=True) + model = model.to("cuda") else: # The quantized model does not exist yet, quantize and save it. @@ -79,7 +80,7 @@ def load_flux_transformer(path: Path) -> FluxTransformer2DModel: model = model.to("cuda") model_nf4_path.mkdir(parents=True, exist_ok=True) - save_file(model.state_dict(), model_nf4_path / "model.safetensors") + # save_file(model.state_dict(), model_nf4_path / "model.safetensors") # --------------------- From 31c8d76c940666d3e3279fb353d215791bbf7f95 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Wed, 14 Aug 2024 23:30:53 +0000 Subject: [PATCH 018/113] NF4 inference working --- .../app/invocations/flux_text_to_image.py | 78 ++++++++++++------- invokeai/backend/bnb.py | 50 +++++++----- invokeai/backend/load_flux_model_bnb_nf4.py | 12 ++- 3 files changed, 87 insertions(+), 53 deletions(-) diff --git a/invokeai/app/invocations/flux_text_to_image.py b/invokeai/app/invocations/flux_text_to_image.py index 1b900484172..542fa6d6b5f 100644 --- a/invokeai/app/invocations/flux_text_to_image.py +++ b/invokeai/app/invocations/flux_text_to_image.py @@ -1,12 +1,13 @@ from pathlib import Path from typing import Literal +import accelerate import torch from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler from diffusers.models.transformers.transformer_flux import FluxTransformer2DModel from diffusers.pipelines.flux.pipeline_flux import FluxPipeline -from optimum.quanto import qfloat8 from PIL import Image +from safetensors.torch import load_file from transformers.models.auto import AutoModelForTextEncoding from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation @@ -20,6 +21,7 @@ ) from invokeai.app.invocations.primitives import ImageOutput from invokeai.app.services.shared.invocation_context import InvocationContext +from invokeai.backend.bnb import quantize_model_nf4 from invokeai.backend.quantization.fast_quantized_diffusion_model import FastQuantizedDiffusersModel from invokeai.backend.quantization.fast_quantized_transformers_model import FastQuantizedTransformersModel from invokeai.backend.stable_diffusion.diffusion.conditioning_data import FLUXConditioningInfo @@ -107,8 +109,9 @@ def _run_diffusion( transformer=transformer, ) - t5_embeddings = t5_embeddings.to(dtype=transformer.dtype) - clip_embeddings = clip_embeddings.to(dtype=transformer.dtype) + dtype = torch.bfloat16 + t5_embeddings = t5_embeddings.to(dtype=dtype) + clip_embeddings = clip_embeddings.to(dtype=dtype) latents = flux_pipeline_with_transformer( height=self.height, @@ -160,32 +163,49 @@ def _run_vae_decoding( def _load_flux_transformer(self, path: Path) -> FluxTransformer2DModel: if self.use_8bit: - model_8bit_path = path / "quantized" - if model_8bit_path.exists(): - # The quantized model exists, load it. - # TODO(ryand): The requantize(...) operation in from_pretrained(...) is very slow. This seems like - # something that we should be able to make much faster. - q_model = QuantizedFluxTransformer2DModel.from_pretrained(model_8bit_path) - - # Access the underlying wrapped model. - # We access the wrapped model, even though it is private, because it simplifies the type checking by - # always returning a FluxTransformer2DModel from this function. - model = q_model._wrapped - else: - # The quantized model does not exist yet, quantize and save it. - # TODO(ryand): Loading in float16 and then quantizing seems to result in NaNs. In order to run this on - # GPUs that don't support bfloat16, we would need to host the quantized model instead of generating it - # here. - model = FluxTransformer2DModel.from_pretrained(path, local_files_only=True, torch_dtype=torch.bfloat16) - assert isinstance(model, FluxTransformer2DModel) - - q_model = QuantizedFluxTransformer2DModel.quantize(model, weights=qfloat8) - - model_8bit_path.mkdir(parents=True, exist_ok=True) - q_model.save_pretrained(model_8bit_path) - - # (See earlier comment about accessing the wrapped model.) - model = q_model._wrapped + model_config = FluxTransformer2DModel.load_config(path, local_files_only=True) + with accelerate.init_empty_weights(): + empty_model = FluxTransformer2DModel.from_config(model_config) + assert isinstance(empty_model, FluxTransformer2DModel) + + model_nf4_path = path / "bnb_nf4" + if model_nf4_path.exists(): + with accelerate.init_empty_weights(): + model = quantize_model_nf4(empty_model, modules_to_not_convert=set(), compute_dtype=torch.bfloat16) + + # model.to_empty(device="cpu") + # TODO(ryand): Right now, some of the weights are loaded in bfloat16. Think about how best to handle + # this on GPUs without bfloat16 support. + sd = load_file(model_nf4_path / "model.safetensors") + model.load_state_dict(sd, strict=True, assign=True) + # model = model.to("cuda") + + # model_8bit_path = path / "quantized" + # if model_8bit_path.exists(): + # # The quantized model exists, load it. + # # TODO(ryand): The requantize(...) operation in from_pretrained(...) is very slow. This seems like + # # something that we should be able to make much faster. + # q_model = QuantizedFluxTransformer2DModel.from_pretrained(model_8bit_path) + + # # Access the underlying wrapped model. + # # We access the wrapped model, even though it is private, because it simplifies the type checking by + # # always returning a FluxTransformer2DModel from this function. + # model = q_model._wrapped + # else: + # # The quantized model does not exist yet, quantize and save it. + # # TODO(ryand): Loading in float16 and then quantizing seems to result in NaNs. In order to run this on + # # GPUs that don't support bfloat16, we would need to host the quantized model instead of generating it + # # here. + # model = FluxTransformer2DModel.from_pretrained(path, local_files_only=True, torch_dtype=torch.bfloat16) + # assert isinstance(model, FluxTransformer2DModel) + + # q_model = QuantizedFluxTransformer2DModel.quantize(model, weights=qfloat8) + + # model_8bit_path.mkdir(parents=True, exist_ok=True) + # q_model.save_pretrained(model_8bit_path) + + # # (See earlier comment about accessing the wrapped model.) + # model = q_model._wrapped else: model = FluxTransformer2DModel.from_pretrained(path, local_files_only=True, torch_dtype=torch.bfloat16) diff --git a/invokeai/backend/bnb.py b/invokeai/backend/bnb.py index d0cb6f7c995..8c1f080e981 100644 --- a/invokeai/backend/bnb.py +++ b/invokeai/backend/bnb.py @@ -51,7 +51,7 @@ # self.SCB = SCB -class InvokeLinear4Bit(bnb.nn.Linear4bit): +class InvokeLinearNF4(bnb.nn.LinearNF4): def _load_from_state_dict( self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs ): @@ -60,31 +60,36 @@ def _load_from_state_dict( I'm not sure why this was not included in the original `Linear4bit` implementation. """ - # During serialization, the quant_state is stored as subkeys of "weight.". Here we extract those keys. - quant_state_keys = [k for k in state_dict.keys() if k.startswith(prefix + "weight.")] - if len(quant_state_keys) > 0: - # We are loading a quantized state dict. - quant_state_sd = {k: state_dict.pop(k) for k in quant_state_keys} - weight = state_dict.pop(prefix + "weight") - bias = state_dict.pop(prefix + "bias", None) - - if len(state_dict) != 0: - raise RuntimeError(f"Unexpected keys in state_dict: {state_dict.keys()}") + weight = state_dict.pop(prefix + "weight") + bias = state_dict.pop(prefix + "bias", None) + # During serialization, the quant_state is stored as subkeys of "weight.". + # We expect the remaining keys to be quant_state keys. We validate that they at least have the correct prefix. + quant_state_sd = state_dict + assert all(k.startswith(prefix + "weight.") for k in quant_state_sd.keys()) + if len(quant_state_sd) > 0: + # We are loading a quantized state dict. self.weight = bnb.nn.Params4bit.from_prequantized( data=weight, quantized_stats=quant_state_sd, device=weight.device ) - if bias is None: - self.bias = None - else: - self.bias = torch.nn.Parameter(bias) + self.bias = bias if bias is None else torch.nn.Parameter(bias, requires_grad=False) else: # We are loading a non-quantized state dict. - return super()._load_from_state_dict( - state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs + + # We could simply call the `super()._load_from_state_dict` method here, but then we wouldn't be able to load + # into from a state_dict into a model on the "meta" device. By initializing a new `Params4bit` object, we + # work around this issue. + self.weight = bnb.nn.Params4bit( + data=weight, + requires_grad=self.weight.requires_grad, + compress_statistics=self.weight.compress_statistics, + quant_type=self.weight.quant_type, + quant_storage=self.weight.quant_storage, + module=self, ) + self.bias = bias if bias is None else torch.nn.Parameter(bias) class Invoke2Linear8bitLt(torch.nn.Linear): @@ -545,7 +550,7 @@ def _convert_linear_layers_to_nf4( fullname = f"{prefix}.{name}" if prefix else name if isinstance(child, torch.nn.Linear) and not any(fullname.startswith(s) for s in ignore_modules): has_bias = child.bias is not None - replacement = InvokeLinear4Bit( + replacement = InvokeLinearNF4( child.in_features, child.out_features, bias=has_bias, @@ -553,9 +558,14 @@ def _convert_linear_layers_to_nf4( # TODO(ryand): Test compress_statistics=True. # compress_statistics=True, ) - replacement.weight.data = child.weight.data + # replacement.weight.data = child.weight.data + # if has_bias: + # replacement.bias.data = child.bias.data if has_bias: - replacement.bias.data = child.bias.data + replacement.bias = _replace_param(replacement.bias, child.bias.data) + replacement.weight = _replace_param( + replacement.weight, child.weight.data, quant_state=replacement.weight.quant_state + ) replacement.requires_grad_(False) module.__setattr__(name, replacement) else: diff --git a/invokeai/backend/load_flux_model_bnb_nf4.py b/invokeai/backend/load_flux_model_bnb_nf4.py index 1a4e67c1c70..5cff6f07d41 100644 --- a/invokeai/backend/load_flux_model_bnb_nf4.py +++ b/invokeai/backend/load_flux_model_bnb_nf4.py @@ -4,7 +4,7 @@ import accelerate import torch from diffusers.models.transformers.transformer_flux import FluxTransformer2DModel -from safetensors.torch import load_file +from safetensors.torch import load_file, save_file from invokeai.backend.bnb import quantize_model_nf4 @@ -62,6 +62,9 @@ def load_flux_transformer(path: Path) -> FluxTransformer2DModel: # --------------------- + with accelerate.init_empty_weights(): + model = quantize_model_nf4(empty_model, modules_to_not_convert=set(), compute_dtype=torch.bfloat16) + # Load sharded state dict. files = list(path.glob("*.safetensors")) state_dict = dict() @@ -69,8 +72,9 @@ def load_flux_transformer(path: Path) -> FluxTransformer2DModel: sd = load_file(file) state_dict.update(sd) - empty_model.load_state_dict(state_dict, strict=True, assign=True) - model = quantize_model_nf4(empty_model, modules_to_not_convert=set(), compute_dtype=torch.bfloat16) + # model.to_empty(device="cpu") + # model.to(dtype=torch.float16) + model.load_state_dict(state_dict, strict=True, assign=True) # Load the state dict into the model. The bitsandbytes layers know how to load from both quantized and # non-quantized state dicts. @@ -80,7 +84,7 @@ def load_flux_transformer(path: Path) -> FluxTransformer2DModel: model = model.to("cuda") model_nf4_path.mkdir(parents=True, exist_ok=True) - # save_file(model.state_dict(), model_nf4_path / "model.safetensors") + save_file(model.state_dict(), model_nf4_path / "model.safetensors") # --------------------- From 17f5952ed35b51a35e66a55f49716bcb6d5674ce Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Thu, 15 Aug 2024 16:30:47 +0000 Subject: [PATCH 019/113] Clean up NF4 implementation. --- .../app/invocations/flux_text_to_image.py | 2 +- invokeai/backend/bnb.py | 107 +----------- invokeai/backend/load_flux_model_bnb_nf4.py | 131 +++++++-------- invokeai/backend/quantization/bnb_nf4.py | 152 ++++++++++++++++++ 4 files changed, 214 insertions(+), 178 deletions(-) create mode 100644 invokeai/backend/quantization/bnb_nf4.py diff --git a/invokeai/app/invocations/flux_text_to_image.py b/invokeai/app/invocations/flux_text_to_image.py index 542fa6d6b5f..930f4c40ce2 100644 --- a/invokeai/app/invocations/flux_text_to_image.py +++ b/invokeai/app/invocations/flux_text_to_image.py @@ -21,7 +21,7 @@ ) from invokeai.app.invocations.primitives import ImageOutput from invokeai.app.services.shared.invocation_context import InvocationContext -from invokeai.backend.bnb import quantize_model_nf4 +from invokeai.backend.quantization.bnb_nf4 import quantize_model_nf4 from invokeai.backend.quantization.fast_quantized_diffusion_model import FastQuantizedDiffusersModel from invokeai.backend.quantization.fast_quantized_transformers_model import FastQuantizedTransformersModel from invokeai.backend.stable_diffusion.diffusion.conditioning_data import FLUXConditioningInfo diff --git a/invokeai/backend/bnb.py b/invokeai/backend/bnb.py index 8c1f080e981..168bb1b686d 100644 --- a/invokeai/backend/bnb.py +++ b/invokeai/backend/bnb.py @@ -1,4 +1,4 @@ -from typing import Any, Optional, Set, Tuple, Type +from typing import Any, Optional, Set, Type import accelerate import bitsandbytes as bnb @@ -51,47 +51,6 @@ # self.SCB = SCB -class InvokeLinearNF4(bnb.nn.LinearNF4): - def _load_from_state_dict( - self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs - ): - """This method is based on the logic in the bitsandbytes serialization unit tests for `Linear4bit`: - https://github.com/bitsandbytes-foundation/bitsandbytes/blob/6d714a5cce3db5bd7f577bc447becc7a92d5ccc7/tests/test_linear4bit.py#L52-L71 - - I'm not sure why this was not included in the original `Linear4bit` implementation. - """ - - weight = state_dict.pop(prefix + "weight") - bias = state_dict.pop(prefix + "bias", None) - # During serialization, the quant_state is stored as subkeys of "weight.". - # We expect the remaining keys to be quant_state keys. We validate that they at least have the correct prefix. - quant_state_sd = state_dict - assert all(k.startswith(prefix + "weight.") for k in quant_state_sd.keys()) - - if len(quant_state_sd) > 0: - # We are loading a quantized state dict. - self.weight = bnb.nn.Params4bit.from_prequantized( - data=weight, quantized_stats=quant_state_sd, device=weight.device - ) - self.bias = bias if bias is None else torch.nn.Parameter(bias, requires_grad=False) - - else: - # We are loading a non-quantized state dict. - - # We could simply call the `super()._load_from_state_dict` method here, but then we wouldn't be able to load - # into from a state_dict into a model on the "meta" device. By initializing a new `Params4bit` object, we - # work around this issue. - self.weight = bnb.nn.Params4bit( - data=weight, - requires_grad=self.weight.requires_grad, - compress_statistics=self.weight.compress_statistics, - quant_type=self.weight.quant_type, - quant_storage=self.weight.quant_storage, - module=self, - ) - self.bias = bias if bias is None else torch.nn.Parameter(bias) - - class Invoke2Linear8bitLt(torch.nn.Linear): """This class is the base module for the [LLM.int8()](https://arxiv.org/abs/2208.07339) algorithm.""" @@ -474,27 +433,6 @@ def convert_model_to_bnb_llm_int8(model: torch.nn.Module, ignore_modules: set[st # incompatible_keys.missing_keys.remove(key) -def _replace_param( - param: torch.nn.Parameter, data: torch.Tensor, quant_state: Optional[Tuple] = None -) -> torch.nn.Parameter: - # doing `param.data = weight` raises a RuntimeError if param.data was on meta-device, so - # we need to re-create the parameters instead of overwriting the data - if param.device.type == "meta": - if isinstance(param, bnb.nn.Params4bit): - return bnb.nn.Params4bit( - data, - requires_grad=data.requires_grad, - quant_state=quant_state, - compress_statistics=param.compress_statistics, - quant_type=param.quant_type, - ) - return torch.nn.Parameter(data, requires_grad=data.requires_grad) - param.data = data - if isinstance(param, bnb.nn.Params4bit): - param.quant_state = quant_state - return param - - def _convert_linear_layers( module: torch.nn.Module, linear_cls: Type, ignore_modules: Set[str], prefix: str = "" ) -> None: @@ -543,35 +481,6 @@ def _convert_linear_layers_to_llm_8bit(module: torch.nn.Module, ignore_modules: _convert_linear_layers_to_llm_8bit(child, ignore_modules, prefix=fullname) -def _convert_linear_layers_to_nf4( - module: torch.nn.Module, ignore_modules: Set[str], compute_dtype: torch.dtype, prefix: str = "" -) -> None: - for name, child in module.named_children(): - fullname = f"{prefix}.{name}" if prefix else name - if isinstance(child, torch.nn.Linear) and not any(fullname.startswith(s) for s in ignore_modules): - has_bias = child.bias is not None - replacement = InvokeLinearNF4( - child.in_features, - child.out_features, - bias=has_bias, - compute_dtype=torch.float16, - # TODO(ryand): Test compress_statistics=True. - # compress_statistics=True, - ) - # replacement.weight.data = child.weight.data - # if has_bias: - # replacement.bias.data = child.bias.data - if has_bias: - replacement.bias = _replace_param(replacement.bias, child.bias.data) - replacement.weight = _replace_param( - replacement.weight, child.weight.data, quant_state=replacement.weight.quant_state - ) - replacement.requires_grad_(False) - module.__setattr__(name, replacement) - else: - _convert_linear_layers_to_nf4(child, ignore_modules, compute_dtype=compute_dtype, prefix=fullname) - - # def _replace_linear_layers( # model: torch.nn.Module, # linear_layer_type: Literal["Linear8bitLt", "Linear4bit"], @@ -646,17 +555,3 @@ def quantize_model_llm_int8(model: torch.nn.Module, modules_to_not_convert: set[ _convert_linear_layers_to_llm_8bit(module=model, ignore_modules=modules_to_not_convert) return model - - -def quantize_model_nf4(model: torch.nn.Module, modules_to_not_convert: set[str], compute_dtype: torch.dtype): - """Apply bitsandbytes nf4 quantization to the model.""" - # model_device = get_parameter_device(model) - # if model_device.type != "meta": - # # Note: This is not strictly required, but I can't think of a good reason to quantize a model that's not on the - # # meta device, so we enforce it for now. - # raise RuntimeError("The model should be on the meta device to apply LLM.8bit() quantization.") - - # with accelerate.init_empty_weights(): - _convert_linear_layers_to_nf4(module=model, ignore_modules=modules_to_not_convert, compute_dtype=compute_dtype) - - return model diff --git a/invokeai/backend/load_flux_model_bnb_nf4.py b/invokeai/backend/load_flux_model_bnb_nf4.py index 5cff6f07d41..b55c56a0321 100644 --- a/invokeai/backend/load_flux_model_bnb_nf4.py +++ b/invokeai/backend/load_flux_model_bnb_nf4.py @@ -1,4 +1,5 @@ import time +from contextlib import contextmanager from pathlib import Path import accelerate @@ -6,100 +7,88 @@ from diffusers.models.transformers.transformer_flux import FluxTransformer2DModel from safetensors.torch import load_file, save_file -from invokeai.backend.bnb import quantize_model_nf4 +from invokeai.backend.quantization.bnb_nf4 import quantize_model_nf4 -# Docs: -# https://huggingface.co/docs/accelerate/usage_guides/quantization -# https://huggingface.co/docs/bitsandbytes/v0.43.3/en/integrations#accelerate +@contextmanager +def log_time(name: str): + """Helper context manager to log the time taken by a block of code.""" + start = time.time() + try: + yield None + finally: + end = time.time() + print(f"'{name}' took {end - start:.4f} secs") -def get_parameter_device(parameter: torch.nn.Module): - return next(parameter.parameters()).device +def main(): + # Load the FLUX transformer model onto the meta device. + model_path = Path( + "/data/invokeai/models/.download_cache/black-forest-labs_flux.1-schnell/FLUX.1-schnell/transformer/" + ) -def load_flux_transformer(path: Path) -> FluxTransformer2DModel: - model_config = FluxTransformer2DModel.load_config(path, local_files_only=True) - with accelerate.init_empty_weights(): - empty_model = FluxTransformer2DModel.from_config(model_config) - assert isinstance(empty_model, FluxTransformer2DModel) + with log_time("Intialize FLUX transformer on meta device"): + model_config = FluxTransformer2DModel.load_config(model_path, local_files_only=True) + with accelerate.init_empty_weights(): + empty_model = FluxTransformer2DModel.from_config(model_config) + assert isinstance(empty_model, FluxTransformer2DModel) - model_nf4_path = path / "bnb_nf4" + # TODO(ryand): We may want to add some modules to not quantize here (e.g. the proj_out layer). See the accelerate + # `get_keys_to_not_convert(...)` function for a heuristic to determine which modules to not quantize. + modules_to_not_convert: set[str] = set() + + model_nf4_path = model_path / "bnb_nf4" if model_nf4_path.exists(): # The quantized model already exists, load it and return it. - # Note that the model loading code is the same when loading from quantized vs original weights. The only - # difference is the weights_location. - # model = load_and_quantize_model( - # empty_model, - # weights_location=model_8bit_path, - # bnb_quantization_config=bnb_quantization_config, - # # device_map="auto", - # device_map={"": "cpu"}, - # ) - - # TODO: Handle the keys that were not quantized (get_keys_to_not_convert). - with accelerate.init_empty_weights(): - model = quantize_model_nf4(empty_model, modules_to_not_convert=set(), compute_dtype=torch.bfloat16) + print(f"A pre-quantized model already exists at '{model_nf4_path}'. Attempting to load it...") - model.to_empty(device="cpu") - sd = load_file(model_nf4_path / "model.safetensors") - model.load_state_dict(sd, strict=True) - model = model.to("cuda") + # Replace the linear layers with NF4 quantized linear layers (still on the meta device). + with log_time("Replace linear layers with NF4 layers"), accelerate.init_empty_weights(): + model = quantize_model_nf4( + empty_model, modules_to_not_convert=modules_to_not_convert, compute_dtype=torch.bfloat16 + ) - else: - # The quantized model does not exist yet, quantize and save it. - # model = load_and_quantize_model( - # empty_model, - # weights_location=path, - # bnb_quantization_config=bnb_quantization_config, - # device_map="auto", - # ) + with log_time("Load state dict into model"): + sd = load_file(model_nf4_path / "model.safetensors") + model.load_state_dict(sd, strict=True, assign=True) - # keys_to_not_convert = get_keys_to_not_convert(empty_model) # TODO + with log_time("Move model to cuda"): + model = model.to("cuda") - # model_8bit_path.mkdir(parents=True, exist_ok=True) - # accl = accelerate.Accelerator() - # accl.save_model(model, model_8bit_path) + print(f"Successfully loaded pre-quantized model from '{model_nf4_path}'.") - # --------------------- + else: + # The quantized model does not exist, quantize the model and save it. + print(f"No pre-quantized model found at '{model_nf4_path}'. Quantizing the model...") - with accelerate.init_empty_weights(): - model = quantize_model_nf4(empty_model, modules_to_not_convert=set(), compute_dtype=torch.bfloat16) + with log_time("Replace linear layers with NF4 layers"), accelerate.init_empty_weights(): + model = quantize_model_nf4( + empty_model, modules_to_not_convert=modules_to_not_convert, compute_dtype=torch.bfloat16 + ) - # Load sharded state dict. - files = list(path.glob("*.safetensors")) - state_dict = dict() - for file in files: - sd = load_file(file) - state_dict.update(sd) + with log_time("Load state dict into model"): + # Load sharded state dict. + files = list(model_path.glob("*.safetensors")) + state_dict = dict() + for file in files: + sd = load_file(file) + state_dict.update(sd) - # model.to_empty(device="cpu") - # model.to(dtype=torch.float16) - model.load_state_dict(state_dict, strict=True, assign=True) + model.load_state_dict(state_dict, strict=True, assign=True) - # Load the state dict into the model. The bitsandbytes layers know how to load from both quantized and - # non-quantized state dicts. - # model.to_empty(device="cpu") - # model.to(dtype=torch.float16) - # result = model.load_state_dict(state_dict, strict=True) - model = model.to("cuda") + with log_time("Move model to cuda and quantize"): + model = model.to("cuda") - model_nf4_path.mkdir(parents=True, exist_ok=True) - save_file(model.state_dict(), model_nf4_path / "model.safetensors") + with log_time("Save quantized model"): + model_nf4_path.mkdir(parents=True, exist_ok=True) + output_path = model_nf4_path / "model.safetensors" + save_file(model.state_dict(), output_path) - # --------------------- + print(f"Successfully quantized and saved model to '{output_path}'.") assert isinstance(model, FluxTransformer2DModel) return model -def main(): - start = time.time() - model = load_flux_transformer( - Path("/data/invokeai/models/.download_cache/black-forest-labs_flux.1-schnell/FLUX.1-schnell/transformer/") - ) - print(f"Time to load: {time.time() - start}s") - print("hi") - - if __name__ == "__main__": main() diff --git a/invokeai/backend/quantization/bnb_nf4.py b/invokeai/backend/quantization/bnb_nf4.py new file mode 100644 index 00000000000..02a2a732bf9 --- /dev/null +++ b/invokeai/backend/quantization/bnb_nf4.py @@ -0,0 +1,152 @@ +import bitsandbytes as bnb +import torch + +# This file contains utils for working with models that use bitsandbytes NF4 quantization. +# The utils in this file are partially inspired by: +# https://github.com/Lightning-AI/pytorch-lightning/blob/1551a16b94f5234a4a78801098f64d0732ef5cb5/src/lightning/fabric/plugins/precision/bitsandbytes.py + + +class InvokeLinearNF4(bnb.nn.LinearNF4): + """A class that extends `bnb.nn.LinearNF4` to add the following functionality: + - Ability to load Linear NF4 layers from a pre-quantized state_dict. + - Ability to load Linear NF4 layers from a state_dict when the model is on the "meta" device. + """ + + def _load_from_state_dict( + self, + state_dict: dict[str, torch.Tensor], + prefix: str, + local_metadata, + strict, + missing_keys, + unexpected_keys, + error_msgs, + ): + """This method is based on the logic in the bitsandbytes serialization unit tests for `Linear4bit`: + https://github.com/bitsandbytes-foundation/bitsandbytes/blob/6d714a5cce3db5bd7f577bc447becc7a92d5ccc7/tests/test_linear4bit.py#L52-L71 + """ + weight = state_dict.pop(prefix + "weight") + bias = state_dict.pop(prefix + "bias", None) + # We expect the remaining keys to be quant_state keys. + quant_state_sd = state_dict + + # During serialization, the quant_state is stored as subkeys of "weight." (See + # `bnb.nn.LinearNF4._save_to_state_dict()`). We validate that they at least have the correct prefix. + # TODO(ryand): Technically, we should be using `strict`, `missing_keys`, `unexpected_keys`, and `error_msgs` + # rather than raising an exception to correctly implement this API. + assert all(k.startswith(prefix + "weight.") for k in quant_state_sd.keys()) + + if len(quant_state_sd) > 0: + # We are loading a pre-quantized state dict. + self.weight = bnb.nn.Params4bit.from_prequantized( + data=weight, quantized_stats=quant_state_sd, device=weight.device + ) + self.bias = bias if bias is None else torch.nn.Parameter(bias, requires_grad=False) + else: + # We are loading a non-quantized state dict. + + # We could simply call the `super()._load_from_state_dict()` method here, but then we wouldn't be able to + # load from a state_dict into a model on the "meta" device. Attempting to load into a model on the "meta" + # device requires setting `assign=True`, doing this with the default `super()._load_from_state_dict()` + # implementation causes `Params4Bit` to be replaced by a `torch.nn.Parameter`. By initializing a new + # `Params4bit` object, we work around this issue. It's a bit hacky, but it gets the job done. + self.weight = bnb.nn.Params4bit( + data=weight, + requires_grad=self.weight.requires_grad, + compress_statistics=self.weight.compress_statistics, + quant_type=self.weight.quant_type, + quant_storage=self.weight.quant_storage, + module=self, + ) + self.bias = bias if bias is None else torch.nn.Parameter(bias) + + +def _replace_param( + param: torch.nn.Parameter | bnb.nn.Params4bit, + data: torch.Tensor, +) -> torch.nn.Parameter: + """A helper function to replace the data of a model parameter with new data in a way that allows replacing params on + the "meta" device. + + Supports both `torch.nn.Parameter` and `bnb.nn.Params4bit` parameters. + """ + if param.device.type == "meta": + # Doing `param.data = data` raises a RuntimeError if param.data was on the "meta" device, so we need to + # re-create the param instead of overwriting the data. + if isinstance(param, bnb.nn.Params4bit): + return bnb.nn.Params4bit( + data, + requires_grad=data.requires_grad, + quant_state=param.quant_state, + compress_statistics=param.compress_statistics, + quant_type=param.quant_type, + ) + return torch.nn.Parameter(data, requires_grad=data.requires_grad) + + param.data = data + return param + + +def _convert_linear_layers_to_nf4( + module: torch.nn.Module, + ignore_modules: set[str], + compute_dtype: torch.dtype, + compress_statistics: bool = False, + prefix: str = "", +) -> None: + """Convert all linear layers in the model to NF4 quantized linear layers. + + Args: + module: All linear layers in this module will be converted. + ignore_modules: A set of module prefixes to ignore when converting linear layers. + compute_dtype: The dtype to use for computation in the quantized linear layers. + compress_statistics: Whether to enable nested quantization (aka double quantization) where the quantization + constants from the first quantization are quantized again. + prefix: The prefix of the current module in the model. Used to call this function recursively. + """ + for name, child in module.named_children(): + fullname = f"{prefix}.{name}" if prefix else name + if isinstance(child, torch.nn.Linear) and not any(fullname.startswith(s) for s in ignore_modules): + has_bias = child.bias is not None + replacement = InvokeLinearNF4( + child.in_features, + child.out_features, + bias=has_bias, + compute_dtype=torch.float16, + compress_statistics=compress_statistics, + ) + if has_bias: + replacement.bias = _replace_param(replacement.bias, child.bias.data) + replacement.weight = _replace_param(replacement.weight, child.weight.data) + replacement.requires_grad_(False) + module.__setattr__(name, replacement) + else: + _convert_linear_layers_to_nf4(child, ignore_modules, compute_dtype=compute_dtype, prefix=fullname) + + +def quantize_model_nf4(model: torch.nn.Module, modules_to_not_convert: set[str], compute_dtype: torch.dtype): + """Apply bitsandbytes nf4 quantization to the model. + + You likely want to call this function inside a `accelerate.init_empty_weights()` context. + + Example usage: + ``` + # Initialize the model from a config on the meta device. + with accelerate.init_empty_weights(): + model = ModelClass.from_config(...) + + # Add NF4 quantization linear layers to the model - still on the meta device. + with accelerate.init_empty_weights(): + model = quantize_model_nf4(model, modules_to_not_convert=set(), compute_dtype=torch.float16) + + # Load a state_dict into the model. (Could be either a prequantized or non-quantized state_dict.) + model.load_state_dict(state_dict, strict=True, assign=True) + + # Move the model to the "cuda" device. If the model was non-quantized, this is where the weight quantization takes + # place. + model.to("cuda") + ``` + """ + _convert_linear_layers_to_nf4(module=model, ignore_modules=modules_to_not_convert, compute_dtype=compute_dtype) + + return model From 52ff3c78f9e1ea97dac3e2d6ed5ea0677b259688 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Thu, 15 Aug 2024 19:34:34 +0000 Subject: [PATCH 020/113] LLM.int8() quantization is working, but still some rough edges to solve. --- .../app/invocations/flux_text_to_image.py | 67 +++++------- invokeai/backend/bnb.py | 40 ------- ...py => load_flux_model_bnb_llm_int8_old.py} | 0 invokeai/backend/quantization/bnb_llm_int8.py | 102 ++++++++++++++++++ invokeai/backend/quantization/bnb_nf4.py | 4 + .../load_flux_model_bnb_llm_int8.py | 89 +++++++++++++++ .../load_flux_model_bnb_nf4.py | 0 7 files changed, 223 insertions(+), 79 deletions(-) rename invokeai/backend/{load_flux_model_bnb_llm_int8.py => load_flux_model_bnb_llm_int8_old.py} (100%) create mode 100644 invokeai/backend/quantization/bnb_llm_int8.py create mode 100644 invokeai/backend/quantization/load_flux_model_bnb_llm_int8.py rename invokeai/backend/{ => quantization}/load_flux_model_bnb_nf4.py (100%) diff --git a/invokeai/app/invocations/flux_text_to_image.py b/invokeai/app/invocations/flux_text_to_image.py index 930f4c40ce2..de34a6eb5e3 100644 --- a/invokeai/app/invocations/flux_text_to_image.py +++ b/invokeai/app/invocations/flux_text_to_image.py @@ -21,6 +21,7 @@ ) from invokeai.app.invocations.primitives import ImageOutput from invokeai.app.services.shared.invocation_context import InvocationContext +from invokeai.backend.quantization.bnb_llm_int8 import quantize_model_llm_int8 from invokeai.backend.quantization.bnb_nf4 import quantize_model_nf4 from invokeai.backend.quantization.fast_quantized_diffusion_model import FastQuantizedDiffusersModel from invokeai.backend.quantization.fast_quantized_transformers_model import FastQuantizedTransformersModel @@ -49,6 +50,9 @@ class FluxTextToImageInvocation(BaseInvocation, WithMetadata, WithBoard): """Text-to-image generation using a FLUX model.""" model: TFluxModelKeys = InputField(description="The FLUX model to use for text-to-image generation.") + quantization_type: Literal["raw", "NF4", "llm_int8"] = InputField( + default="raw", description="The type of quantization to use for the transformer model." + ) use_8bit: bool = InputField( default=False, description="Whether to quantize the transformer model to 8-bit precision." ) @@ -162,52 +166,37 @@ def _run_vae_decoding( return image def _load_flux_transformer(self, path: Path) -> FluxTransformer2DModel: - if self.use_8bit: + if self.quantization_type == "raw": + model = FluxTransformer2DModel.from_pretrained(path, local_files_only=True, torch_dtype=torch.bfloat16) + elif self.quantization_type == "NF4": model_config = FluxTransformer2DModel.load_config(path, local_files_only=True) with accelerate.init_empty_weights(): empty_model = FluxTransformer2DModel.from_config(model_config) assert isinstance(empty_model, FluxTransformer2DModel) model_nf4_path = path / "bnb_nf4" - if model_nf4_path.exists(): - with accelerate.init_empty_weights(): - model = quantize_model_nf4(empty_model, modules_to_not_convert=set(), compute_dtype=torch.bfloat16) - - # model.to_empty(device="cpu") - # TODO(ryand): Right now, some of the weights are loaded in bfloat16. Think about how best to handle - # this on GPUs without bfloat16 support. - sd = load_file(model_nf4_path / "model.safetensors") - model.load_state_dict(sd, strict=True, assign=True) - # model = model.to("cuda") - - # model_8bit_path = path / "quantized" - # if model_8bit_path.exists(): - # # The quantized model exists, load it. - # # TODO(ryand): The requantize(...) operation in from_pretrained(...) is very slow. This seems like - # # something that we should be able to make much faster. - # q_model = QuantizedFluxTransformer2DModel.from_pretrained(model_8bit_path) - - # # Access the underlying wrapped model. - # # We access the wrapped model, even though it is private, because it simplifies the type checking by - # # always returning a FluxTransformer2DModel from this function. - # model = q_model._wrapped - # else: - # # The quantized model does not exist yet, quantize and save it. - # # TODO(ryand): Loading in float16 and then quantizing seems to result in NaNs. In order to run this on - # # GPUs that don't support bfloat16, we would need to host the quantized model instead of generating it - # # here. - # model = FluxTransformer2DModel.from_pretrained(path, local_files_only=True, torch_dtype=torch.bfloat16) - # assert isinstance(model, FluxTransformer2DModel) - - # q_model = QuantizedFluxTransformer2DModel.quantize(model, weights=qfloat8) - - # model_8bit_path.mkdir(parents=True, exist_ok=True) - # q_model.save_pretrained(model_8bit_path) - - # # (See earlier comment about accessing the wrapped model.) - # model = q_model._wrapped + assert model_nf4_path.exists() + with accelerate.init_empty_weights(): + model = quantize_model_nf4(empty_model, modules_to_not_convert=set(), compute_dtype=torch.bfloat16) + + # TODO(ryand): Right now, some of the weights are loaded in bfloat16. Think about how best to handle + # this on GPUs without bfloat16 support. + sd = load_file(model_nf4_path / "model.safetensors") + model.load_state_dict(sd, strict=True, assign=True) + elif self.quantization_type == "llm_int8": + model_config = FluxTransformer2DModel.load_config(path, local_files_only=True) + with accelerate.init_empty_weights(): + empty_model = FluxTransformer2DModel.from_config(model_config) + assert isinstance(empty_model, FluxTransformer2DModel) + model_int8_path = path / "bnb_llm_int8" + assert model_int8_path.exists() + with accelerate.init_empty_weights(): + model = quantize_model_llm_int8(empty_model, modules_to_not_convert=set()) + + sd = load_file(model_int8_path / "model.safetensors") + model.load_state_dict(sd, strict=True, assign=True) else: - model = FluxTransformer2DModel.from_pretrained(path, local_files_only=True, torch_dtype=torch.bfloat16) + raise ValueError(f"Unsupported quantization type: {self.quantization_type}") assert isinstance(model, FluxTransformer2DModel) return model diff --git a/invokeai/backend/bnb.py b/invokeai/backend/bnb.py index 168bb1b686d..1022a1d1dcc 100644 --- a/invokeai/backend/bnb.py +++ b/invokeai/backend/bnb.py @@ -1,6 +1,5 @@ from typing import Any, Optional, Set, Type -import accelerate import bitsandbytes as bnb import torch @@ -460,27 +459,6 @@ def _convert_linear_layers( _convert_linear_layers(child, linear_cls, ignore_modules, prefix=fullname) -def _convert_linear_layers_to_llm_8bit(module: torch.nn.Module, ignore_modules: Set[str], prefix: str = "") -> None: - for name, child in module.named_children(): - fullname = f"{prefix}.{name}" if prefix else name - if isinstance(child, torch.nn.Linear) and not any(fullname.startswith(s) for s in ignore_modules): - has_bias = child.bias is not None - replacement = InvokeLinear8bitLt( - child.in_features, - child.out_features, - bias=has_bias, - has_fp16_weights=False, - # device=device, - ) - replacement.weight.data = child.weight.data - if has_bias: - replacement.bias.data = child.bias.data - replacement.requires_grad_(False) - module.__setattr__(name, replacement) - else: - _convert_linear_layers_to_llm_8bit(child, ignore_modules, prefix=fullname) - - # def _replace_linear_layers( # model: torch.nn.Module, # linear_layer_type: Literal["Linear8bitLt", "Linear4bit"], @@ -537,21 +515,3 @@ def _convert_linear_layers_to_llm_8bit(module: torch.nn.Module, ignore_modules: # # Remove the last key for recursion # current_key_name.pop(-1) # return model, has_been_replaced - - -def get_parameter_device(parameter: torch.nn.Module): - return next(parameter.parameters()).device - - -def quantize_model_llm_int8(model: torch.nn.Module, modules_to_not_convert: set[str]): - """Apply bitsandbytes LLM.8bit() quantization to the model.""" - model_device = get_parameter_device(model) - if model_device.type != "meta": - # Note: This is not strictly required, but I can't think of a good reason to quantize a model that's not on the - # meta device, so we enforce it for now. - raise RuntimeError("The model should be on the meta device to apply LLM.8bit() quantization.") - - with accelerate.init_empty_weights(): - _convert_linear_layers_to_llm_8bit(module=model, ignore_modules=modules_to_not_convert) - - return model diff --git a/invokeai/backend/load_flux_model_bnb_llm_int8.py b/invokeai/backend/load_flux_model_bnb_llm_int8_old.py similarity index 100% rename from invokeai/backend/load_flux_model_bnb_llm_int8.py rename to invokeai/backend/load_flux_model_bnb_llm_int8_old.py diff --git a/invokeai/backend/quantization/bnb_llm_int8.py b/invokeai/backend/quantization/bnb_llm_int8.py new file mode 100644 index 00000000000..900c55a085a --- /dev/null +++ b/invokeai/backend/quantization/bnb_llm_int8.py @@ -0,0 +1,102 @@ +import bitsandbytes as bnb +import torch + +# This file contains utils for working with models that use bitsandbytes LLM.int8() quantization. +# The utils in this file are partially inspired by: +# https://github.com/Lightning-AI/pytorch-lightning/blob/1551a16b94f5234a4a78801098f64d0732ef5cb5/src/lightning/fabric/plugins/precision/bitsandbytes.py + + +# NOTE(ryand): All of the custom state_dict manipulation logic in this file is pretty hacky. This could be made much +# cleaner by re-implementing bnb.nn.Linear8bitLt with proper use of buffers and less magic. But, for now, we try to +# stick close to the bitsandbytes classes to make interoperability easier with other models that might use bitsandbytes. + + +class InvokeLinear8bitLt(bnb.nn.Linear8bitLt): + def _load_from_state_dict( + self, + state_dict: dict[str, torch.Tensor], + prefix: str, + local_metadata, + strict, + missing_keys, + unexpected_keys, + error_msgs, + ): + weight = state_dict.pop(prefix + "weight") + bias = state_dict.pop(prefix + "bias", None) + + # See `bnb.nn.Linear8bitLt._save_to_state_dict()` for the serialization logic of SCB and weight_format. + scb = state_dict.pop(prefix + "SCB", None) + # weight_format is unused, but we pop it so we can validate that there are no unexpected keys. + _weight_format = state_dict.pop(prefix + "weight_format", None) + + # TODO(ryand): Technically, we should be using `strict`, `missing_keys`, `unexpected_keys`, and `error_msgs` + # rather than raising an exception to correctly implement this API. + assert len(state_dict) == 0 + + if scb is not None: + # We are loading a pre-quantized state dict. + self.weight = bnb.nn.Int8Params( + data=weight, + requires_grad=self.weight.requires_grad, + has_fp16_weights=False, + # Note: After quantization, CB is the same as weight. + CB=weight, + SCB=scb, + ) + self.bias = bias if bias is None else torch.nn.Parameter(bias) + else: + # We are loading a non-quantized state dict. + + # We could simply call the `super()._load_from_state_dict()` method here, but then we wouldn't be able to + # load from a state_dict into a model on the "meta" device. Attempting to load into a model on the "meta" + # device requires setting `assign=True`, doing this with the default `super()._load_from_state_dict()` + # implementation causes `Params4Bit` to be replaced by a `torch.nn.Parameter`. By initializing a new + # `Params4bit` object, we work around this issue. It's a bit hacky, but it gets the job done. + self.weight = bnb.nn.Int8Params( + data=weight, + requires_grad=self.weight.requires_grad, + has_fp16_weights=False, + CB=None, + SCB=None, + ) + self.bias = bias if bias is None else torch.nn.Parameter(bias) + + +def _convert_linear_layers_to_llm_8bit( + module: torch.nn.Module, ignore_modules: set[str], outlier_threshold: float, prefix: str = "" +) -> None: + """Convert all linear layers in the module to bnb.nn.Linear8bitLt layers.""" + for name, child in module.named_children(): + fullname = f"{prefix}.{name}" if prefix else name + if isinstance(child, torch.nn.Linear) and not any(fullname.startswith(s) for s in ignore_modules): + has_bias = child.bias is not None + replacement = InvokeLinear8bitLt( + child.in_features, + child.out_features, + bias=has_bias, + has_fp16_weights=False, + threshold=outlier_threshold, + ) + replacement.weight.data = child.weight.data + if has_bias: + replacement.bias.data = child.bias.data + replacement.requires_grad_(False) + module.__setattr__(name, replacement) + else: + _convert_linear_layers_to_llm_8bit( + child, ignore_modules, outlier_threshold=outlier_threshold, prefix=fullname + ) + + +def get_parameter_device(parameter: torch.nn.Module): + return next(parameter.parameters()).device + + +def quantize_model_llm_int8(model: torch.nn.Module, modules_to_not_convert: set[str], outlier_threshold: float = 6.0): + """Apply bitsandbytes LLM.8bit() quantization to the model.""" + _convert_linear_layers_to_llm_8bit( + module=model, ignore_modules=modules_to_not_convert, outlier_threshold=outlier_threshold + ) + + return model diff --git a/invokeai/backend/quantization/bnb_nf4.py b/invokeai/backend/quantization/bnb_nf4.py index 02a2a732bf9..28a0861449b 100644 --- a/invokeai/backend/quantization/bnb_nf4.py +++ b/invokeai/backend/quantization/bnb_nf4.py @@ -5,6 +5,10 @@ # The utils in this file are partially inspired by: # https://github.com/Lightning-AI/pytorch-lightning/blob/1551a16b94f5234a4a78801098f64d0732ef5cb5/src/lightning/fabric/plugins/precision/bitsandbytes.py +# NOTE(ryand): All of the custom state_dict manipulation logic in this file is pretty hacky. This could be made much +# cleaner by re-implementing bnb.nn.LinearNF4 with proper use of buffers and less magic. But, for now, we try to stick +# close to the bitsandbytes classes to make interoperability easier with other models that might use bitsandbytes. + class InvokeLinearNF4(bnb.nn.LinearNF4): """A class that extends `bnb.nn.LinearNF4` to add the following functionality: diff --git a/invokeai/backend/quantization/load_flux_model_bnb_llm_int8.py b/invokeai/backend/quantization/load_flux_model_bnb_llm_int8.py new file mode 100644 index 00000000000..fd54210cbe5 --- /dev/null +++ b/invokeai/backend/quantization/load_flux_model_bnb_llm_int8.py @@ -0,0 +1,89 @@ +import time +from contextlib import contextmanager +from pathlib import Path + +import accelerate +from diffusers.models.transformers.transformer_flux import FluxTransformer2DModel +from safetensors.torch import load_file, save_file + +from invokeai.backend.quantization.bnb_llm_int8 import quantize_model_llm_int8 + + +@contextmanager +def log_time(name: str): + """Helper context manager to log the time taken by a block of code.""" + start = time.time() + try: + yield None + finally: + end = time.time() + print(f"'{name}' took {end - start:.4f} secs") + + +def main(): + # Load the FLUX transformer model onto the meta device. + model_path = Path( + "/data/invokeai/models/.download_cache/black-forest-labs_flux.1-schnell/FLUX.1-schnell/transformer/" + ) + + with log_time("Initialize FLUX transformer on meta device"): + model_config = FluxTransformer2DModel.load_config(model_path, local_files_only=True) + with accelerate.init_empty_weights(): + empty_model = FluxTransformer2DModel.from_config(model_config) + assert isinstance(empty_model, FluxTransformer2DModel) + + # TODO(ryand): We may want to add some modules to not quantize here (e.g. the proj_out layer). See the accelerate + # `get_keys_to_not_convert(...)` function for a heuristic to determine which modules to not quantize. + modules_to_not_convert: set[str] = set() + + model_int8_path = model_path / "bnb_llm_int8" + if model_int8_path.exists(): + # The quantized model already exists, load it and return it. + print(f"A pre-quantized model already exists at '{model_int8_path}'. Attempting to load it...") + + # Replace the linear layers with LLM.int8() quantized linear layers (still on the meta device). + with log_time("Replace linear layers with LLM.int8() layers"), accelerate.init_empty_weights(): + model = quantize_model_llm_int8(empty_model, modules_to_not_convert=modules_to_not_convert) + + with log_time("Load state dict into model"): + sd = load_file(model_int8_path / "model.safetensors") + model.load_state_dict(sd, strict=True, assign=True) + + with log_time("Move model to cuda"): + model = model.to("cuda") + + print(f"Successfully loaded pre-quantized model from '{model_int8_path}'.") + + else: + # The quantized model does not exist, quantize the model and save it. + print(f"No pre-quantized model found at '{model_int8_path}'. Quantizing the model...") + + with log_time("Replace linear layers with LLM.int8() layers"), accelerate.init_empty_weights(): + model = quantize_model_llm_int8(empty_model, modules_to_not_convert=modules_to_not_convert) + + with log_time("Load state dict into model"): + # Load sharded state dict. + files = list(model_path.glob("*.safetensors")) + state_dict = dict() + for file in files: + sd = load_file(file) + state_dict.update(sd) + + model.load_state_dict(state_dict, strict=True, assign=True) + + with log_time("Move model to cuda and quantize"): + model = model.to("cuda") + + with log_time("Save quantized model"): + model_int8_path.mkdir(parents=True, exist_ok=True) + output_path = model_int8_path / "model.safetensors" + save_file(model.state_dict(), output_path) + + print(f"Successfully quantized and saved model to '{output_path}'.") + + assert isinstance(model, FluxTransformer2DModel) + return model + + +if __name__ == "__main__": + main() diff --git a/invokeai/backend/load_flux_model_bnb_nf4.py b/invokeai/backend/quantization/load_flux_model_bnb_nf4.py similarity index 100% rename from invokeai/backend/load_flux_model_bnb_nf4.py rename to invokeai/backend/quantization/load_flux_model_bnb_nf4.py From 307a1300d8c581acdc04518d2d8b126c823546ca Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Thu, 15 Aug 2024 19:59:31 +0000 Subject: [PATCH 021/113] More improvements for LLM.int8() - not fully tested. --- invokeai/backend/quantization/bnb_llm_int8.py | 35 +++++++++++++++---- 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/invokeai/backend/quantization/bnb_llm_int8.py b/invokeai/backend/quantization/bnb_llm_int8.py index 900c55a085a..f196ebc43e9 100644 --- a/invokeai/backend/quantization/bnb_llm_int8.py +++ b/invokeai/backend/quantization/bnb_llm_int8.py @@ -11,6 +11,33 @@ # stick close to the bitsandbytes classes to make interoperability easier with other models that might use bitsandbytes. +class InvokeInt8Params(bnb.nn.Int8Params): + """We override cuda() to avoid re-quantizing the weights in the following cases: + - We loaded quantized weights from a state_dict on the cpu, and then moved the model to the gpu. + - We are moving the model back-and-forth between the cpu and gpu. + """ + + def cuda(self, device): + if self.has_fp16_weights: + return super().cuda(device) + elif self.CB is not None and self.SCB is not None: + self.data = self.data.cuda() + self.CB = self.CB.cuda() + self.SCB = self.SCB.cuda() + else: + # we store the 8-bit rows-major weight + # we convert this weight to the turning/ampere weight during the first inference pass + B = self.data.contiguous().half().cuda(device) + CB, CBt, SCB, SCBt, coo_tensorB = bnb.functional.double_quant(B) + del CBt + del SCBt + self.data = CB + self.CB = CB + self.SCB = SCB + + return self + + class InvokeLinear8bitLt(bnb.nn.Linear8bitLt): def _load_from_state_dict( self, @@ -36,7 +63,7 @@ def _load_from_state_dict( if scb is not None: # We are loading a pre-quantized state dict. - self.weight = bnb.nn.Int8Params( + self.weight = InvokeInt8Params( data=weight, requires_grad=self.weight.requires_grad, has_fp16_weights=False, @@ -53,7 +80,7 @@ def _load_from_state_dict( # device requires setting `assign=True`, doing this with the default `super()._load_from_state_dict()` # implementation causes `Params4Bit` to be replaced by a `torch.nn.Parameter`. By initializing a new # `Params4bit` object, we work around this issue. It's a bit hacky, but it gets the job done. - self.weight = bnb.nn.Int8Params( + self.weight = InvokeInt8Params( data=weight, requires_grad=self.weight.requires_grad, has_fp16_weights=False, @@ -89,10 +116,6 @@ def _convert_linear_layers_to_llm_8bit( ) -def get_parameter_device(parameter: torch.nn.Module): - return next(parameter.parameters()).device - - def quantize_model_llm_int8(model: torch.nn.Module, modules_to_not_convert: set[str], outlier_threshold: float = 6.0): """Apply bitsandbytes LLM.8bit() quantization to the model.""" _convert_linear_layers_to_llm_8bit( From 27f33bb5175f330bc443a47079f6a0e299c608ad Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Fri, 16 Aug 2024 20:22:49 +0000 Subject: [PATCH 022/113] WIP on moving from diffusers to FLUX --- .../app/invocations/flux_text_to_image.py | 222 +++++++++++------- .../quantization/load_flux_model_bnb_nf4.py | 43 ++-- pyproject.toml | 1 + 3 files changed, 155 insertions(+), 111 deletions(-) diff --git a/invokeai/app/invocations/flux_text_to_image.py b/invokeai/app/invocations/flux_text_to_image.py index de34a6eb5e3..19829c47a4c 100644 --- a/invokeai/app/invocations/flux_text_to_image.py +++ b/invokeai/app/invocations/flux_text_to_image.py @@ -3,9 +3,12 @@ import accelerate import torch -from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler from diffusers.models.transformers.transformer_flux import FluxTransformer2DModel -from diffusers.pipelines.flux.pipeline_flux import FluxPipeline +from einops import rearrange, repeat +from flux.model import Flux +from flux.modules.autoencoder import AutoEncoder +from flux.sampling import denoise, get_noise, get_schedule, unpack +from flux.util import configs as flux_configs from PIL import Image from safetensors.torch import load_file from transformers.models.auto import AutoModelForTextEncoding @@ -21,11 +24,11 @@ ) from invokeai.app.invocations.primitives import ImageOutput from invokeai.app.services.shared.invocation_context import InvocationContext -from invokeai.backend.quantization.bnb_llm_int8 import quantize_model_llm_int8 from invokeai.backend.quantization.bnb_nf4 import quantize_model_nf4 from invokeai.backend.quantization.fast_quantized_diffusion_model import FastQuantizedDiffusersModel from invokeai.backend.quantization.fast_quantized_transformers_model import FastQuantizedTransformersModel from invokeai.backend.stable_diffusion.diffusion.conditioning_data import FLUXConditioningInfo +from invokeai.backend.util.devices import TorchDevice TFluxModelKeys = Literal["flux-schnell"] FLUX_MODELS: dict[TFluxModelKeys, str] = {"flux-schnell": "black-forest-labs/FLUX.1-schnell"} @@ -70,7 +73,13 @@ class FluxTextToImageInvocation(BaseInvocation, WithMetadata, WithBoard): @torch.no_grad() def invoke(self, context: InvocationContext) -> ImageOutput: - model_path = context.models.download_and_cache_model(FLUX_MODELS[self.model]) + # model_path = context.models.download_and_cache_model(FLUX_MODELS[self.model]) + flux_transformer_path = context.models.download_and_cache_model( + "https://huggingface.co/black-forest-labs/FLUX.1-schnell/resolve/main/flux1-schnell.safetensors" + ) + flux_ae_path = context.models.download_and_cache_model( + "https://huggingface.co/black-forest-labs/FLUX.1-schnell/resolve/main/ae.safetensors" + ) # Load the conditioning data. cond_data = context.conditioning.load(self.positive_text_conditioning.conditioning_name) @@ -78,123 +87,155 @@ def invoke(self, context: InvocationContext) -> ImageOutput: flux_conditioning = cond_data.conditionings[0] assert isinstance(flux_conditioning, FLUXConditioningInfo) - latents = self._run_diffusion(context, model_path, flux_conditioning.clip_embeds, flux_conditioning.t5_embeds) - image = self._run_vae_decoding(context, model_path, latents) + latents = self._run_diffusion( + context, flux_transformer_path, flux_conditioning.clip_embeds, flux_conditioning.t5_embeds + ) + image = self._run_vae_decoding(context, flux_ae_path, latents) image_dto = context.images.save(image=image) return ImageOutput.build(image_dto) def _run_diffusion( self, context: InvocationContext, - flux_model_dir: Path, + flux_transformer_path: Path, clip_embeddings: torch.Tensor, t5_embeddings: torch.Tensor, ): - scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(flux_model_dir / "scheduler", local_files_only=True) + inference_dtype = TorchDevice.choose_torch_dtype() + + # Prepare input noise. + # TODO(ryand): Does the seed behave the same on different devices? Should we re-implement this to always use a + # CPU RNG? + x = get_noise( + num_samples=1, + height=self.height, + width=self.width, + device=TorchDevice.choose_torch_device(), + dtype=inference_dtype, + seed=self.seed, + ) + + img, img_ids = self._prepare_latent_img_patches(x) + + # HACK(ryand): Find a better way to determine if this is a schnell model or not. + is_schnell = "shnell" in str(flux_transformer_path) + timesteps = get_schedule( + num_steps=self.num_steps, + image_seq_len=img.shape[1], + shift=not is_schnell, + ) + + bs, t5_seq_len, _ = t5_embeddings.shape + txt_ids = torch.zeros(bs, t5_seq_len, 3, dtype=inference_dtype, device=TorchDevice.choose_torch_device()) # HACK(ryand): Manually empty the cache. Currently we don't check the size of the model before loading it from # disk. Since the transformer model is large (24GB), there's a good chance that it will OOM on 32GB RAM systems # if the cache is not empty. context.models._services.model_manager.load.ram_cache.make_room(24 * 2**30) - transformer_path = flux_model_dir / "transformer" with context.models.load_local_model( - model_path=transformer_path, loader=self._load_flux_transformer + model_path=flux_transformer_path, loader=self._load_flux_transformer ) as transformer: - assert isinstance(transformer, FluxTransformer2DModel) - - flux_pipeline_with_transformer = FluxPipeline( - scheduler=scheduler, - vae=None, - text_encoder=None, - tokenizer=None, - text_encoder_2=None, - tokenizer_2=None, - transformer=transformer, + assert isinstance(transformer, Flux) + + x = denoise( + model=transformer, + img=img, + img_ids=img_ids, + txt=t5_embeddings, + txt_ids=txt_ids, + vec=clip_embeddings, + timesteps=timesteps, + guidance=self.guidance, ) - dtype = torch.bfloat16 - t5_embeddings = t5_embeddings.to(dtype=dtype) - clip_embeddings = clip_embeddings.to(dtype=dtype) - - latents = flux_pipeline_with_transformer( - height=self.height, - width=self.width, - num_inference_steps=self.num_steps, - guidance_scale=self.guidance, - generator=torch.Generator().manual_seed(self.seed), - prompt_embeds=t5_embeddings, - pooled_prompt_embeds=clip_embeddings, - output_type="latent", - return_dict=False, - )[0] - - assert isinstance(latents, torch.Tensor) - return latents + x = unpack(x.float(), self.height, self.width) + + return x + + def _prepare_latent_img_patches(self, latent_img: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: + """Convert an input image in latent space to patches for diffusion. + + This implementation was extracted from: + https://github.com/black-forest-labs/flux/blob/c00d7c60b085fce8058b9df845e036090873f2ce/src/flux/sampling.py#L32 + + Returns: + tuple[Tensor, Tensor]: (img, img_ids), as defined in the original flux repo. + """ + bs, c, h, w = latent_img.shape + + # Pixel unshuffle with a scale of 2, and flatten the height/width dimensions to get an array of patches. + img = rearrange(latent_img, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=2, pw=2) + if img.shape[0] == 1 and bs > 1: + img = repeat(img, "1 ... -> bs ...", bs=bs) + + # Generate patch position ids. + img_ids = torch.zeros(h // 2, w // 2, 3) + img_ids[..., 1] = img_ids[..., 1] + torch.arange(h // 2)[:, None] + img_ids[..., 2] = img_ids[..., 2] + torch.arange(w // 2)[None, :] + img_ids = repeat(img_ids, "h w c -> b (h w) c", b=bs) + + return img, img_ids def _run_vae_decoding( self, context: InvocationContext, - flux_model_dir: Path, + flux_ae_path: Path, latents: torch.Tensor, ) -> Image.Image: - vae_path = flux_model_dir / "vae" - with context.models.load_local_model(model_path=vae_path, loader=self._load_flux_vae) as vae: - assert isinstance(vae, AutoencoderKL) - - flux_pipeline_with_vae = FluxPipeline( - scheduler=None, - vae=vae, - text_encoder=None, - tokenizer=None, - text_encoder_2=None, - tokenizer_2=None, - transformer=None, - ) + with context.models.load_local_model(model_path=flux_ae_path, loader=self._load_flux_vae) as vae: + assert isinstance(vae, AutoEncoder) + # TODO(ryand): Test that this works with both float16 and bfloat16. + with torch.autocast(device_type=latents.device.type, dtype=TorchDevice.choose_torch_dtype()): + img = vae.decode(latents) - latents = flux_pipeline_with_vae._unpack_latents( - latents, self.height, self.width, flux_pipeline_with_vae.vae_scale_factor - ) - latents = ( - latents / flux_pipeline_with_vae.vae.config.scaling_factor - ) + flux_pipeline_with_vae.vae.config.shift_factor - latents = latents.to(dtype=vae.dtype) - image = flux_pipeline_with_vae.vae.decode(latents, return_dict=False)[0] - image = flux_pipeline_with_vae.image_processor.postprocess(image, output_type="pil")[0] + img.clamp(-1, 1) + img = rearrange(img[0], "c h w -> h w c") + img_pil = Image.fromarray((127.5 * (img + 1.0)).byte().cpu().numpy()) - assert isinstance(image, Image.Image) - return image + return img_pil def _load_flux_transformer(self, path: Path) -> FluxTransformer2DModel: + inference_dtype = TorchDevice.choose_torch_dtype() if self.quantization_type == "raw": - model = FluxTransformer2DModel.from_pretrained(path, local_files_only=True, torch_dtype=torch.bfloat16) - elif self.quantization_type == "NF4": - model_config = FluxTransformer2DModel.load_config(path, local_files_only=True) + # TODO(ryand): Determine if this is a schnell model or a dev model and load the appropriate config. + params = flux_configs["flux-schnell"].params + + # Initialize the model on the "meta" device. with accelerate.init_empty_weights(): - empty_model = FluxTransformer2DModel.from_config(model_config) - assert isinstance(empty_model, FluxTransformer2DModel) + model = Flux(params).to(inference_dtype) - model_nf4_path = path / "bnb_nf4" - assert model_nf4_path.exists() + state_dict = load_file(path) + # TODO(ryand): Cast the state_dict to the appropriate dtype? + model.load_state_dict(state_dict, strict=True, assign=True) + elif self.quantization_type == "NF4": + model_path = path.parent / "bnb_nf4.safetensors" + + # TODO(ryand): Determine if this is a schnell model or a dev model and load the appropriate config. + params = flux_configs["flux-schnell"].params + # Initialize the model on the "meta" device. with accelerate.init_empty_weights(): - model = quantize_model_nf4(empty_model, modules_to_not_convert=set(), compute_dtype=torch.bfloat16) + model = Flux(params) + model = quantize_model_nf4(model, modules_to_not_convert=set(), compute_dtype=torch.bfloat16) # TODO(ryand): Right now, some of the weights are loaded in bfloat16. Think about how best to handle # this on GPUs without bfloat16 support. - sd = load_file(model_nf4_path / "model.safetensors") - model.load_state_dict(sd, strict=True, assign=True) - elif self.quantization_type == "llm_int8": - model_config = FluxTransformer2DModel.load_config(path, local_files_only=True) - with accelerate.init_empty_weights(): - empty_model = FluxTransformer2DModel.from_config(model_config) - assert isinstance(empty_model, FluxTransformer2DModel) - model_int8_path = path / "bnb_llm_int8" - assert model_int8_path.exists() - with accelerate.init_empty_weights(): - model = quantize_model_llm_int8(empty_model, modules_to_not_convert=set()) + state_dict = load_file(model_path) + model.load_state_dict(state_dict, strict=True, assign=True) - sd = load_file(model_int8_path / "model.safetensors") - model.load_state_dict(sd, strict=True, assign=True) + elif self.quantization_type == "llm_int8": + raise NotImplementedError("LLM int8 quantization is not yet supported.") + # model_config = FluxTransformer2DModel.load_config(path, local_files_only=True) + # with accelerate.init_empty_weights(): + # empty_model = FluxTransformer2DModel.from_config(model_config) + # assert isinstance(empty_model, FluxTransformer2DModel) + # model_int8_path = path / "bnb_llm_int8" + # assert model_int8_path.exists() + # with accelerate.init_empty_weights(): + # model = quantize_model_llm_int8(empty_model, modules_to_not_convert=set()) + + # sd = load_file(model_int8_path / "model.safetensors") + # model.load_state_dict(sd, strict=True, assign=True) else: raise ValueError(f"Unsupported quantization type: {self.quantization_type}") @@ -202,7 +243,12 @@ def _load_flux_transformer(self, path: Path) -> FluxTransformer2DModel: return model @staticmethod - def _load_flux_vae(path: Path) -> AutoencoderKL: - model = AutoencoderKL.from_pretrained(path, local_files_only=True) - assert isinstance(model, AutoencoderKL) - return model + def _load_flux_vae(path: Path) -> AutoEncoder: + # TODO(ryand): Determine if this is a schnell model or a dev model and load the appropriate config. + ae_params = flux_configs["flux1-schnell"].ae_params + with accelerate.init_empty_weights(): + ae = AutoEncoder(ae_params) + + state_dict = load_file(path) + ae.load_state_dict(state_dict, strict=True, assign=True) + return ae diff --git a/invokeai/backend/quantization/load_flux_model_bnb_nf4.py b/invokeai/backend/quantization/load_flux_model_bnb_nf4.py index b55c56a0321..80f3e71901e 100644 --- a/invokeai/backend/quantization/load_flux_model_bnb_nf4.py +++ b/invokeai/backend/quantization/load_flux_model_bnb_nf4.py @@ -4,7 +4,8 @@ import accelerate import torch -from diffusers.models.transformers.transformer_flux import FluxTransformer2DModel +from flux.model import Flux +from flux.util import configs as flux_configs from safetensors.torch import load_file, save_file from invokeai.backend.quantization.bnb_nf4 import quantize_model_nf4 @@ -22,22 +23,24 @@ def log_time(name: str): def main(): - # Load the FLUX transformer model onto the meta device. model_path = Path( - "/data/invokeai/models/.download_cache/black-forest-labs_flux.1-schnell/FLUX.1-schnell/transformer/" + "/data/invokeai/models/.download_cache/https__huggingface.co_black-forest-labs_flux.1-schnell_resolve_main_flux1-schnell.safetensors/flux1-schnell.safetensors" ) + # inference_dtype = torch.bfloat16 with log_time("Intialize FLUX transformer on meta device"): - model_config = FluxTransformer2DModel.load_config(model_path, local_files_only=True) + # TODO(ryand): Determine if this is a schnell model or a dev model and load the appropriate config. + params = flux_configs["flux-schnell"].params + + # Initialize the model on the "meta" device. with accelerate.init_empty_weights(): - empty_model = FluxTransformer2DModel.from_config(model_config) - assert isinstance(empty_model, FluxTransformer2DModel) + model = Flux(params) # TODO(ryand): We may want to add some modules to not quantize here (e.g. the proj_out layer). See the accelerate # `get_keys_to_not_convert(...)` function for a heuristic to determine which modules to not quantize. modules_to_not_convert: set[str] = set() - model_nf4_path = model_path / "bnb_nf4" + model_nf4_path = model_path.parent / "bnb_nf4.safetensors" if model_nf4_path.exists(): # The quantized model already exists, load it and return it. print(f"A pre-quantized model already exists at '{model_nf4_path}'. Attempting to load it...") @@ -45,12 +48,12 @@ def main(): # Replace the linear layers with NF4 quantized linear layers (still on the meta device). with log_time("Replace linear layers with NF4 layers"), accelerate.init_empty_weights(): model = quantize_model_nf4( - empty_model, modules_to_not_convert=modules_to_not_convert, compute_dtype=torch.bfloat16 + model, modules_to_not_convert=modules_to_not_convert, compute_dtype=torch.bfloat16 ) with log_time("Load state dict into model"): - sd = load_file(model_nf4_path / "model.safetensors") - model.load_state_dict(sd, strict=True, assign=True) + state_dict = load_file(model_nf4_path) + model.load_state_dict(state_dict, strict=True, assign=True) with log_time("Move model to cuda"): model = model.to("cuda") @@ -63,30 +66,24 @@ def main(): with log_time("Replace linear layers with NF4 layers"), accelerate.init_empty_weights(): model = quantize_model_nf4( - empty_model, modules_to_not_convert=modules_to_not_convert, compute_dtype=torch.bfloat16 + model, modules_to_not_convert=modules_to_not_convert, compute_dtype=torch.bfloat16 ) with log_time("Load state dict into model"): - # Load sharded state dict. - files = list(model_path.glob("*.safetensors")) - state_dict = dict() - for file in files: - sd = load_file(file) - state_dict.update(sd) - + state_dict = load_file(model_path) + # TODO(ryand): Cast the state_dict to the appropriate dtype? model.load_state_dict(state_dict, strict=True, assign=True) with log_time("Move model to cuda and quantize"): model = model.to("cuda") with log_time("Save quantized model"): - model_nf4_path.mkdir(parents=True, exist_ok=True) - output_path = model_nf4_path / "model.safetensors" - save_file(model.state_dict(), output_path) + model_nf4_path.parent.mkdir(parents=True, exist_ok=True) + save_file(model.state_dict(), model_nf4_path) - print(f"Successfully quantized and saved model to '{output_path}'.") + print(f"Successfully quantized and saved model to '{model_nf4_path}'.") - assert isinstance(model, FluxTransformer2DModel) + assert isinstance(model, Flux) return model diff --git a/pyproject.toml b/pyproject.toml index 768d2184342..6b22b45babf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,6 +40,7 @@ dependencies = [ "controlnet-aux==0.0.7", # TODO(ryand): Bump this once the next diffusers release is ready. "diffusers[torch] @ git+https://github.com/huggingface/diffusers.git@4c6152c2fb0ade468aadb417102605a07a8635d3", + "flux @ git+https://github.com/black-forest-labs/flux.git@c23ae247225daba30fbd56058d247cc1b1fc20a3", "invisible-watermark==0.2.0", # needed to install SDXL base and refiner using their repo_ids "mediapipe==0.10.7", # needed for "mediapipeface" controlnet model "numpy==1.26.4", # >1.24.0 is needed to use the 'strict' argument to np.testing.assert_array_equal() From e157ff3a23d2948cda1c555d77737319d26fede2 Mon Sep 17 00:00:00 2001 From: Brandon Rising Date: Mon, 12 Aug 2024 14:04:23 -0400 Subject: [PATCH 023/113] Setup flux model loading in the UI --- invokeai/app/invocations/fields.py | 3 + .../app/invocations/flux_text_to_image.py | 15 +- invokeai/app/invocations/model.py | 49 ++++ invokeai/backend/model_manager/config.py | 2 + invokeai/backend/model_manager/probe.py | 5 + .../ModelManagerPanel/ModelBaseBadge.tsx | 1 + .../Invocation/fields/InputFieldRenderer.tsx | 7 + .../FluxMainModelFieldInputComponent.tsx | 55 ++++ .../web/src/features/nodes/types/common.ts | 3 +- .../web/src/features/nodes/types/constants.ts | 3 + .../web/src/features/nodes/types/field.ts | 32 +++ .../features/nodes/types/v1/fieldTypeMap.ts | 5 + .../src/features/nodes/types/v1/workflowV1.ts | 9 +- .../web/src/features/nodes/types/v2/common.ts | 2 +- .../web/src/features/nodes/types/v2/field.ts | 17 ++ .../util/schema/buildFieldInputInstance.ts | 1 + .../util/schema/buildFieldInputTemplate.ts | 16 ++ .../nodes/util/workflow/validateWorkflow.ts | 1 + .../features/parameters/types/constants.ts | 6 + .../src/services/api/hooks/modelsByType.ts | 2 + .../frontend/web/src/services/api/schema.ts | 243 +++++++++++++++++- .../frontend/web/src/services/api/types.ts | 4 + 22 files changed, 463 insertions(+), 18 deletions(-) create mode 100644 invokeai/frontend/web/src/features/nodes/components/flow/nodes/Invocation/fields/inputs/FluxMainModelFieldInputComponent.tsx diff --git a/invokeai/app/invocations/fields.py b/invokeai/app/invocations/fields.py index 9efcf2148f7..91dfcb51a7f 100644 --- a/invokeai/app/invocations/fields.py +++ b/invokeai/app/invocations/fields.py @@ -40,6 +40,7 @@ class UIType(str, Enum, metaclass=MetaEnum): # region Model Field Types MainModel = "MainModelField" + FluxMainModel = "FluxMainModelField" SDXLMainModel = "SDXLMainModelField" SDXLRefinerModel = "SDXLRefinerModelField" ONNXModel = "ONNXModelField" @@ -126,12 +127,14 @@ class FieldDescriptions: noise = "Noise tensor" clip = "CLIP (tokenizer, text encoder, LoRAs) and skipped layer count" unet = "UNet (scheduler, LoRAs)" + transformer = "Transformer" vae = "VAE" cond = "Conditioning tensor" controlnet_model = "ControlNet model to load" vae_model = "VAE model to load" lora_model = "LoRA model to load" main_model = "Main model (UNet, VAE, CLIP) to load" + flux_model = "Flux model (Transformer, VAE, CLIP) to load" sdxl_main_model = "SDXL Main model (UNet, VAE, CLIP1, CLIP2) to load" sdxl_refiner_model = "SDXL Refiner Main Modde (UNet, VAE, CLIP2) to load" onnx_main_model = "ONNX Main model (UNet, VAE, CLIP) to load" diff --git a/invokeai/app/invocations/flux_text_to_image.py b/invokeai/app/invocations/flux_text_to_image.py index 19829c47a4c..7a577215f86 100644 --- a/invokeai/app/invocations/flux_text_to_image.py +++ b/invokeai/app/invocations/flux_text_to_image.py @@ -1,14 +1,13 @@ from pathlib import Path from typing import Literal +from pydantic import Field import accelerate import torch from diffusers.models.transformers.transformer_flux import FluxTransformer2DModel -from einops import rearrange, repeat -from flux.model import Flux -from flux.modules.autoencoder import AutoEncoder -from flux.sampling import denoise, get_noise, get_schedule, unpack -from flux.util import configs as flux_configs +from diffusers.pipelines.flux.pipeline_flux import FluxPipeline +from invokeai.app.invocations.model import ModelIdentifierField +from optimum.quanto import qfloat8 from PIL import Image from safetensors.torch import load_file from transformers.models.auto import AutoModelForTextEncoding @@ -21,6 +20,7 @@ InputField, WithBoard, WithMetadata, + UIType, ) from invokeai.app.invocations.primitives import ImageOutput from invokeai.app.services.shared.invocation_context import InvocationContext @@ -52,6 +52,11 @@ class QuantizedModelForTextEncoding(FastQuantizedTransformersModel): class FluxTextToImageInvocation(BaseInvocation, WithMetadata, WithBoard): """Text-to-image generation using a FLUX model.""" + flux_model: ModelIdentifierField = InputField( + description="The Flux model", + input=Input.Any, + ui_type=UIType.FluxMainModel + ) model: TFluxModelKeys = InputField(description="The FLUX model to use for text-to-image generation.") quantization_type: Literal["raw", "NF4", "llm_int8"] = InputField( default="raw", description="The type of quantization to use for the transformer model." diff --git a/invokeai/app/invocations/model.py b/invokeai/app/invocations/model.py index c0d067c0a7a..dd121092699 100644 --- a/invokeai/app/invocations/model.py +++ b/invokeai/app/invocations/model.py @@ -60,6 +60,12 @@ class CLIPField(BaseModel): loras: List[LoRAField] = Field(description="LoRAs to apply on model loading") + +class TransformerField(BaseModel): + transformer: ModelIdentifierField = Field(description="Info to load Transformer submodel") + scheduler: ModelIdentifierField = Field(description="Info to load scheduler submodel") + + class VAEField(BaseModel): vae: ModelIdentifierField = Field(description="Info to load vae submodel") seamless_axes: List[str] = Field(default_factory=list, description='Axes("x" and "y") to which apply seamless') @@ -122,6 +128,49 @@ def invoke(self, context: InvocationContext) -> ModelIdentifierOutput: return ModelIdentifierOutput(model=self.model) +@invocation_output("flux_model_loader_output") +class FluxModelLoaderOutput(BaseInvocationOutput): + """Flux base model loader output""" + + transformer: TransformerField = OutputField(description=FieldDescriptions.transformer, title="Transformer") + clip: CLIPField = OutputField(description=FieldDescriptions.clip, title="CLIP 1") + clip2: CLIPField = OutputField(description=FieldDescriptions.clip, title="CLIP 2") + vae: VAEField = OutputField(description=FieldDescriptions.vae, title="VAE") + + +@invocation("flux_model_loader", title="Flux Main Model", tags=["model", "flux"], category="model", version="1.0.3") +class FluxModelLoaderInvocation(BaseInvocation): + """Loads a flux base model, outputting its submodels.""" + + model: ModelIdentifierField = InputField( + description=FieldDescriptions.flux_model, + ui_type=UIType.FluxMainModel, + input=Input.Direct, + ) + + def invoke(self, context: InvocationContext) -> FluxModelLoaderOutput: + model_key = self.model.key + + # TODO: not found exceptions + if not context.models.exists(model_key): + raise Exception(f"Unknown model: {model_key}") + + transformer = self.model.model_copy(update={"submodel_type": SubModelType.Transformer}) + scheduler = self.model.model_copy(update={"submodel_type": SubModelType.Scheduler}) + tokenizer = self.model.model_copy(update={"submodel_type": SubModelType.Tokenizer}) + text_encoder = self.model.model_copy(update={"submodel_type": SubModelType.TextEncoder}) + tokenizer2 = self.model.model_copy(update={"submodel_type": SubModelType.Tokenizer2}) + text_encoder2 = self.model.model_copy(update={"submodel_type": SubModelType.TextEncoder2}) + vae = self.model.model_copy(update={"submodel_type": SubModelType.VAE}) + + return FluxModelLoaderOutput( + transformer=TransformerField(transformer=transformer, scheduler=scheduler), + clip=CLIPField(tokenizer=tokenizer, text_encoder=text_encoder, loras=[], skipped_layers=0), + clip2=CLIPField(tokenizer=tokenizer2, text_encoder=text_encoder2, loras=[], skipped_layers=0), + vae=VAEField(vae=vae), + ) + + @invocation( "main_model_loader", title="Main Model", diff --git a/invokeai/backend/model_manager/config.py b/invokeai/backend/model_manager/config.py index 332ac6c8faf..29ef9536668 100644 --- a/invokeai/backend/model_manager/config.py +++ b/invokeai/backend/model_manager/config.py @@ -52,6 +52,7 @@ class BaseModelType(str, Enum): StableDiffusion2 = "sd-2" StableDiffusionXL = "sdxl" StableDiffusionXLRefiner = "sdxl-refiner" + Flux = "flux" # Kandinsky2_1 = "kandinsky-2.1" @@ -74,6 +75,7 @@ class SubModelType(str, Enum): """Submodel type.""" UNet = "unet" + Transformer = "transformer" TextEncoder = "text_encoder" TextEncoder2 = "text_encoder_2" Tokenizer = "tokenizer" diff --git a/invokeai/backend/model_manager/probe.py b/invokeai/backend/model_manager/probe.py index 1929b3f4fd8..82053149ad1 100644 --- a/invokeai/backend/model_manager/probe.py +++ b/invokeai/backend/model_manager/probe.py @@ -95,6 +95,7 @@ class ModelProbe(object): } CLASS2TYPE = { + "FluxPipeline": ModelType.Main, "StableDiffusionPipeline": ModelType.Main, "StableDiffusionInpaintPipeline": ModelType.Main, "StableDiffusionXLPipeline": ModelType.Main, @@ -626,6 +627,10 @@ def get_repo_variant(self) -> ModelRepoVariant: class PipelineFolderProbe(FolderProbeBase): def get_base_type(self) -> BaseModelType: + with open(f"{self.model_path}/model_index.json", "r") as file: + conf = json.load(file) + if "_class_name" in conf and conf.get("_class_name") == "FluxPipeline": + return BaseModelType.Flux with open(self.model_path / "unet" / "config.json", "r") as file: unet_conf = json.load(file) if unet_conf["cross_attention_dim"] == 768: diff --git a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelManagerPanel/ModelBaseBadge.tsx b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelManagerPanel/ModelBaseBadge.tsx index bf07bad58cd..2cf4e25354f 100644 --- a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelManagerPanel/ModelBaseBadge.tsx +++ b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelManagerPanel/ModelBaseBadge.tsx @@ -13,6 +13,7 @@ const BASE_COLOR_MAP: Record = { 'sd-2': 'teal', sdxl: 'invokeBlue', 'sdxl-refiner': 'invokeBlue', + flux: 'invokeBlue', }; const ModelBaseBadge = ({ base }: Props) => { diff --git a/invokeai/frontend/web/src/features/nodes/components/flow/nodes/Invocation/fields/InputFieldRenderer.tsx b/invokeai/frontend/web/src/features/nodes/components/flow/nodes/Invocation/fields/InputFieldRenderer.tsx index d863def9737..6ec51aba130 100644 --- a/invokeai/frontend/web/src/features/nodes/components/flow/nodes/Invocation/fields/InputFieldRenderer.tsx +++ b/invokeai/frontend/web/src/features/nodes/components/flow/nodes/Invocation/fields/InputFieldRenderer.tsx @@ -14,6 +14,8 @@ import { isEnumFieldInputTemplate, isFloatFieldInputInstance, isFloatFieldInputTemplate, + isFluxMainModelFieldInputInstance, + isFluxMainModelFieldInputTemplate, isImageFieldInputInstance, isImageFieldInputTemplate, isIntegerFieldInputInstance, @@ -48,6 +50,7 @@ import BooleanFieldInputComponent from './inputs/BooleanFieldInputComponent'; import ColorFieldInputComponent from './inputs/ColorFieldInputComponent'; import ControlNetModelFieldInputComponent from './inputs/ControlNetModelFieldInputComponent'; import EnumFieldInputComponent from './inputs/EnumFieldInputComponent'; +import FluxMainModelFieldInputComponent from './inputs/FluxMainModelFieldInputComponent'; import ImageFieldInputComponent from './inputs/ImageFieldInputComponent'; import IPAdapterModelFieldInputComponent from './inputs/IPAdapterModelFieldInputComponent'; import LoRAModelFieldInputComponent from './inputs/LoRAModelFieldInputComponent'; @@ -69,6 +72,7 @@ type InputFieldProps = { const InputFieldRenderer = ({ nodeId, fieldName }: InputFieldProps) => { const fieldInstance = useFieldInputInstance(nodeId, fieldName); const fieldTemplate = useFieldInputTemplate(nodeId, fieldName); + window.console.log("Hit 0") if (isStringFieldInputInstance(fieldInstance) && isStringFieldInputTemplate(fieldTemplate)) { return ; @@ -145,6 +149,9 @@ const InputFieldRenderer = ({ nodeId, fieldName }: InputFieldProps) => { if (isColorFieldInputInstance(fieldInstance) && isColorFieldInputTemplate(fieldTemplate)) { return ; } + if (isFluxMainModelFieldInputInstance(fieldInstance) && isFluxMainModelFieldInputTemplate(fieldTemplate)) { + return ; + } if (isSDXLMainModelFieldInputInstance(fieldInstance) && isSDXLMainModelFieldInputTemplate(fieldTemplate)) { return ; diff --git a/invokeai/frontend/web/src/features/nodes/components/flow/nodes/Invocation/fields/inputs/FluxMainModelFieldInputComponent.tsx b/invokeai/frontend/web/src/features/nodes/components/flow/nodes/Invocation/fields/inputs/FluxMainModelFieldInputComponent.tsx new file mode 100644 index 00000000000..3a0ddb211ec --- /dev/null +++ b/invokeai/frontend/web/src/features/nodes/components/flow/nodes/Invocation/fields/inputs/FluxMainModelFieldInputComponent.tsx @@ -0,0 +1,55 @@ +import { Combobox, Flex, FormControl } from '@invoke-ai/ui-library'; +import { useAppDispatch } from 'app/store/storeHooks'; +import { useGroupedModelCombobox } from 'common/hooks/useGroupedModelCombobox'; +import { fieldMainModelValueChanged } from 'features/nodes/store/nodesSlice'; +import type { FluxMainModelFieldInputInstance, FluxMainModelFieldInputTemplate } from 'features/nodes/types/field'; +import { memo, useCallback } from 'react'; +import { useFluxModels } from 'services/api/hooks/modelsByType'; +import type { MainModelConfig } from 'services/api/types'; + +import type { FieldComponentProps } from './types'; + +type Props = FieldComponentProps; + +const FluxMainModelFieldInputComponent = (props: Props) => { + const { nodeId, field } = props; + const dispatch = useAppDispatch(); + const [modelConfigs, { isLoading }] = useFluxModels(); + const _onChange = useCallback( + (value: MainModelConfig | null) => { + if (!value) { + return; + } + dispatch( + fieldMainModelValueChanged({ + nodeId, + fieldName: field.name, + value, + }) + ); + }, + [dispatch, field.name, nodeId] + ); + const { options, value, onChange, placeholder, noOptionsMessage } = useGroupedModelCombobox({ + modelConfigs, + onChange: _onChange, + isLoading, + selectedModel: field.value, + }); + + return ( + + + + + + ); +}; + +export default memo(FluxMainModelFieldInputComponent); diff --git a/invokeai/frontend/web/src/features/nodes/types/common.ts b/invokeai/frontend/web/src/features/nodes/types/common.ts index c84b2dae623..894d257f286 100644 --- a/invokeai/frontend/web/src/features/nodes/types/common.ts +++ b/invokeai/frontend/web/src/features/nodes/types/common.ts @@ -61,7 +61,7 @@ export type SchedulerField = z.infer; // #endregion // #region Model-related schemas -const zBaseModel = z.enum(['any', 'sd-1', 'sd-2', 'sdxl', 'sdxl-refiner']); +const zBaseModel = z.enum(['any', 'sd-1', 'sd-2', 'sdxl', 'sdxl-refiner', 'flux']); const zModelType = z.enum([ 'main', 'vae', @@ -76,6 +76,7 @@ const zModelType = z.enum([ ]); const zSubModelType = z.enum([ 'unet', + 'transformer', 'text_encoder', 'text_encoder_2', 'tokenizer', diff --git a/invokeai/frontend/web/src/features/nodes/types/constants.ts b/invokeai/frontend/web/src/features/nodes/types/constants.ts index 05697c384c0..ca43f35b550 100644 --- a/invokeai/frontend/web/src/features/nodes/types/constants.ts +++ b/invokeai/frontend/web/src/features/nodes/types/constants.ts @@ -31,6 +31,7 @@ export const MODEL_TYPES = [ 'ControlNetModelField', 'LoRAModelField', 'MainModelField', + 'FluxMainModelField', 'SDXLMainModelField', 'SDXLRefinerModelField', 'VaeModelField', @@ -61,6 +62,7 @@ export const FIELD_COLORS: { [key: string]: string } = { LatentsField: 'pink.500', LoRAModelField: 'teal.500', MainModelField: 'teal.500', + FluxMainModelField: 'teal.500', SDXLMainModelField: 'teal.500', SDXLRefinerModelField: 'teal.500', SpandrelImageToImageModelField: 'teal.500', @@ -68,6 +70,7 @@ export const FIELD_COLORS: { [key: string]: string } = { T2IAdapterField: 'teal.500', T2IAdapterModelField: 'teal.500', UNetField: 'red.500', + TransformerField: 'red.500', VAEField: 'blue.500', VAEModelField: 'teal.500', }; diff --git a/invokeai/frontend/web/src/features/nodes/types/field.ts b/invokeai/frontend/web/src/features/nodes/types/field.ts index 925bd40b9db..607a1005acb 100644 --- a/invokeai/frontend/web/src/features/nodes/types/field.ts +++ b/invokeai/frontend/web/src/features/nodes/types/field.ts @@ -115,6 +115,10 @@ const zSDXLMainModelFieldType = zFieldTypeBase.extend({ name: z.literal('SDXLMainModelField'), originalType: zStatelessFieldType.optional(), }); +const zFluxMainModelFieldType = zFieldTypeBase.extend({ + name: z.literal('FluxMainModelField'), + originalType: zStatelessFieldType.optional(), +}); const zSDXLRefinerModelFieldType = zFieldTypeBase.extend({ name: z.literal('SDXLRefinerModelField'), originalType: zStatelessFieldType.optional(), @@ -158,6 +162,7 @@ const zStatefulFieldType = z.union([ zModelIdentifierFieldType, zMainModelFieldType, zSDXLMainModelFieldType, + zFluxMainModelFieldType, zSDXLRefinerModelFieldType, zVAEModelFieldType, zLoRAModelFieldType, @@ -447,6 +452,29 @@ export const isSDXLMainModelFieldInputTemplate = (val: unknown): val is SDXLMain zSDXLMainModelFieldInputTemplate.safeParse(val).success; // #endregion +// #region FluxMainModelField + +const zFluxMainModelFieldValue = zMainModelFieldValue; // TODO: Narrow to SDXL models only. +const zFluxMainModelFieldInputInstance = zFieldInputInstanceBase.extend({ + value: zFluxMainModelFieldValue, +}); +const zFluxMainModelFieldInputTemplate = zFieldInputTemplateBase.extend({ + type: zFluxMainModelFieldType, + originalType: zFieldType.optional(), + default: zFluxMainModelFieldValue, +}); +const zFluxMainModelFieldOutputTemplate = zFieldOutputTemplateBase.extend({ + type: zFluxMainModelFieldType, +}); +export type FluxMainModelFieldInputInstance = z.infer; +export type FluxMainModelFieldInputTemplate = z.infer; +export const isFluxMainModelFieldInputInstance = (val: unknown): val is FluxMainModelFieldInputInstance => + zFluxMainModelFieldInputInstance.safeParse(val).success; +export const isFluxMainModelFieldInputTemplate = (val: unknown): val is FluxMainModelFieldInputTemplate => + zFluxMainModelFieldInputTemplate.safeParse(val).success; + +// #endregion + // #region SDXLRefinerModelField /** @alias */ // tells knip to ignore this duplicate export @@ -693,6 +721,7 @@ export const zStatefulFieldValue = z.union([ zModelIdentifierFieldValue, zMainModelFieldValue, zSDXLMainModelFieldValue, + zFluxMainModelFieldValue, zSDXLRefinerModelFieldValue, zVAEModelFieldValue, zLoRAModelFieldValue, @@ -720,6 +749,7 @@ const zStatefulFieldInputInstance = z.union([ zBoardFieldInputInstance, zModelIdentifierFieldInputInstance, zMainModelFieldInputInstance, + zFluxMainModelFieldInputInstance, zSDXLMainModelFieldInputInstance, zSDXLRefinerModelFieldInputInstance, zVAEModelFieldInputInstance, @@ -749,6 +779,7 @@ const zStatefulFieldInputTemplate = z.union([ zBoardFieldInputTemplate, zModelIdentifierFieldInputTemplate, zMainModelFieldInputTemplate, + zFluxMainModelFieldInputTemplate, zSDXLMainModelFieldInputTemplate, zSDXLRefinerModelFieldInputTemplate, zVAEModelFieldInputTemplate, @@ -779,6 +810,7 @@ const zStatefulFieldOutputTemplate = z.union([ zBoardFieldOutputTemplate, zModelIdentifierFieldOutputTemplate, zMainModelFieldOutputTemplate, + zFluxMainModelFieldOutputTemplate, zSDXLMainModelFieldOutputTemplate, zSDXLRefinerModelFieldOutputTemplate, zVAEModelFieldOutputTemplate, diff --git a/invokeai/frontend/web/src/features/nodes/types/v1/fieldTypeMap.ts b/invokeai/frontend/web/src/features/nodes/types/v1/fieldTypeMap.ts index f1d4e613004..719063cf683 100644 --- a/invokeai/frontend/web/src/features/nodes/types/v1/fieldTypeMap.ts +++ b/invokeai/frontend/web/src/features/nodes/types/v1/fieldTypeMap.ts @@ -114,6 +114,11 @@ const FIELD_TYPE_V1_TO_STATEFUL_FIELD_TYPE_V2: { isCollection: false, isCollectionOrScalar: false, }, + FluxMainModelField: { + name: 'FluxMainModelField', + isCollection: false, + isCollectionOrScalar: false, + }, SDXLMainModelField: { name: 'SDXLMainModelField', isCollection: false, diff --git a/invokeai/frontend/web/src/features/nodes/types/v1/workflowV1.ts b/invokeai/frontend/web/src/features/nodes/types/v1/workflowV1.ts index c7a50b20e41..b4ec9cd94e5 100644 --- a/invokeai/frontend/web/src/features/nodes/types/v1/workflowV1.ts +++ b/invokeai/frontend/web/src/features/nodes/types/v1/workflowV1.ts @@ -27,7 +27,7 @@ const zScheduler = z.enum([ 'kdpm_2_a', 'lcm', ]); -const zBaseModel = z.enum(['any', 'sd-1', 'sd-2', 'sdxl', 'sdxl-refiner']); +const zBaseModel = z.enum(['any', 'sd-1', 'sd-2', 'sdxl', 'sdxl-refiner', 'flux']); const zMainModel = z.object({ model_name: z.string().min(1), base_model: zBaseModel, @@ -89,6 +89,7 @@ const zFieldTypeV1 = z.enum([ 'ONNXModelField', 'Scheduler', 'SDXLMainModelField', + 'FluxMainModelField', 'SDXLRefinerModelField', 'string', 'StringCollection', @@ -417,6 +418,11 @@ const zSDXLMainModelInputFieldValue = zInputFieldValueBase.extend({ value: zMainOrOnnxModel.optional(), }); +const zFluxMainModelInputFieldValue = zInputFieldValueBase.extend({ + type: z.literal('FluxMainModelField'), + value: zMainModel.optional(), +}); + const zSDXLRefinerModelInputFieldValue = zInputFieldValueBase.extend({ type: z.literal('SDXLRefinerModelField'), value: zMainOrOnnxModel.optional(), // TODO: should narrow this down to a refiner model @@ -572,6 +578,7 @@ const zInputFieldValue = z.discriminatedUnion('type', [ zMainModelInputFieldValue, zSchedulerInputFieldValue, zSDXLMainModelInputFieldValue, + zFluxMainModelInputFieldValue, zSDXLRefinerModelInputFieldValue, zStringCollectionInputFieldValue, zStringPolymorphicInputFieldValue, diff --git a/invokeai/frontend/web/src/features/nodes/types/v2/common.ts b/invokeai/frontend/web/src/features/nodes/types/v2/common.ts index 8613076132d..64d4db04515 100644 --- a/invokeai/frontend/web/src/features/nodes/types/v2/common.ts +++ b/invokeai/frontend/web/src/features/nodes/types/v2/common.ts @@ -44,7 +44,7 @@ export const zSchedulerField = z.enum([ // #endregion // #region Model-related schemas -const zBaseModel = z.enum(['any', 'sd-1', 'sd-2', 'sdxl', 'sdxl-refiner']); +const zBaseModel = z.enum(['any', 'sd-1', 'sd-2', 'sdxl', 'sdxl-refiner', 'flux']); const zModelName = z.string().min(3); export const zModelIdentifier = z.object({ model_name: zModelName, diff --git a/invokeai/frontend/web/src/features/nodes/types/v2/field.ts b/invokeai/frontend/web/src/features/nodes/types/v2/field.ts index 4b680d1de33..a02a9985089 100644 --- a/invokeai/frontend/web/src/features/nodes/types/v2/field.ts +++ b/invokeai/frontend/web/src/features/nodes/types/v2/field.ts @@ -203,6 +203,20 @@ const zSDXLMainModelFieldOutputInstance = zFieldOutputInstanceBase.extend({ }); // #endregion +// #region FluxMainModelField +const zFluxMainModelFieldType = zFieldTypeBase.extend({ + name: z.literal('FluxMainModelField'), +}); +const zFluxMainModelFieldValue = zMainModelFieldValue; // TODO: Narrow to SDXL models only. +const zFluxMainModelFieldInputInstance = zFieldInputInstanceBase.extend({ + type: zFluxMainModelFieldType, + value: zFluxMainModelFieldValue, +}); +const zFluxMainModelFieldOutputInstance = zFieldOutputInstanceBase.extend({ + type: zFluxMainModelFieldType, +}); +// #endregion + // #region SDXLRefinerModelField const zSDXLRefinerModelFieldType = zFieldTypeBase.extend({ name: z.literal('SDXLRefinerModelField'), @@ -338,6 +352,7 @@ const zStatefulFieldType = z.union([ zBoardFieldType, zMainModelFieldType, zSDXLMainModelFieldType, + zFluxMainModelFieldType, zSDXLRefinerModelFieldType, zVAEModelFieldType, zLoRAModelFieldType, @@ -377,6 +392,7 @@ const zStatefulFieldInputInstance = z.union([ zBoardFieldInputInstance, zMainModelFieldInputInstance, zSDXLMainModelFieldInputInstance, + zFluxMainModelFieldInputInstance, zSDXLRefinerModelFieldInputInstance, zVAEModelFieldInputInstance, zLoRAModelFieldInputInstance, @@ -401,6 +417,7 @@ const zStatefulFieldOutputInstance = z.union([ zBoardFieldOutputInstance, zMainModelFieldOutputInstance, zSDXLMainModelFieldOutputInstance, + zFluxMainModelFieldOutputInstance, zSDXLRefinerModelFieldOutputInstance, zVAEModelFieldOutputInstance, zLoRAModelFieldOutputInstance, diff --git a/invokeai/frontend/web/src/features/nodes/util/schema/buildFieldInputInstance.ts b/invokeai/frontend/web/src/features/nodes/util/schema/buildFieldInputInstance.ts index a5a2d89f03c..e8784a11638 100644 --- a/invokeai/frontend/web/src/features/nodes/util/schema/buildFieldInputInstance.ts +++ b/invokeai/frontend/web/src/features/nodes/util/schema/buildFieldInputInstance.ts @@ -15,6 +15,7 @@ const FIELD_VALUE_FALLBACK_MAP: Record = MainModelField: undefined, SchedulerField: 'euler', SDXLMainModelField: undefined, + FluxMainModelField: undefined, SDXLRefinerModelField: undefined, StringField: '', T2IAdapterModelField: undefined, diff --git a/invokeai/frontend/web/src/features/nodes/util/schema/buildFieldInputTemplate.ts b/invokeai/frontend/web/src/features/nodes/util/schema/buildFieldInputTemplate.ts index 8478415cd14..f4f3ef85afa 100644 --- a/invokeai/frontend/web/src/features/nodes/util/schema/buildFieldInputTemplate.ts +++ b/invokeai/frontend/web/src/features/nodes/util/schema/buildFieldInputTemplate.ts @@ -8,6 +8,7 @@ import type { FieldInputTemplate, FieldType, FloatFieldInputTemplate, + FluxMainModelFieldInputTemplate, ImageFieldInputTemplate, IntegerFieldInputTemplate, IPAdapterModelFieldInputTemplate, @@ -180,6 +181,20 @@ const buildSDXLMainModelFieldInputTemplate: FieldInputTemplateBuilder = ({ + schemaObject, + baseField, + fieldType, +}) => { + const template: FluxMainModelFieldInputTemplate = { + ...baseField, + type: fieldType, + default: schemaObject.default ?? undefined, + }; + + return template; +}; + const buildRefinerModelFieldInputTemplate: FieldInputTemplateBuilder = ({ schemaObject, baseField, @@ -386,6 +401,7 @@ export const TEMPLATE_BUILDER_MAP: Record { + return config.type === 'main' && config.base === 'flux'; +}; + export const isNonSDXLMainModelConfig = (config: AnyModelConfig): config is MainModelConfig => { return config.type === 'main' && (config.base === 'sd-1' || config.base === 'sd-2'); }; From 6779e032f1b2a7579ae8cd66300f2f6a85fca88c Mon Sep 17 00:00:00 2001 From: Brandon Rising Date: Mon, 12 Aug 2024 14:06:17 -0400 Subject: [PATCH 024/113] Remove changes to v1 workflow --- .../web/src/features/nodes/types/v1/fieldTypeMap.ts | 5 ----- .../web/src/features/nodes/types/v1/workflowV1.ts | 9 +-------- 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/invokeai/frontend/web/src/features/nodes/types/v1/fieldTypeMap.ts b/invokeai/frontend/web/src/features/nodes/types/v1/fieldTypeMap.ts index 719063cf683..f1d4e613004 100644 --- a/invokeai/frontend/web/src/features/nodes/types/v1/fieldTypeMap.ts +++ b/invokeai/frontend/web/src/features/nodes/types/v1/fieldTypeMap.ts @@ -114,11 +114,6 @@ const FIELD_TYPE_V1_TO_STATEFUL_FIELD_TYPE_V2: { isCollection: false, isCollectionOrScalar: false, }, - FluxMainModelField: { - name: 'FluxMainModelField', - isCollection: false, - isCollectionOrScalar: false, - }, SDXLMainModelField: { name: 'SDXLMainModelField', isCollection: false, diff --git a/invokeai/frontend/web/src/features/nodes/types/v1/workflowV1.ts b/invokeai/frontend/web/src/features/nodes/types/v1/workflowV1.ts index b4ec9cd94e5..c7a50b20e41 100644 --- a/invokeai/frontend/web/src/features/nodes/types/v1/workflowV1.ts +++ b/invokeai/frontend/web/src/features/nodes/types/v1/workflowV1.ts @@ -27,7 +27,7 @@ const zScheduler = z.enum([ 'kdpm_2_a', 'lcm', ]); -const zBaseModel = z.enum(['any', 'sd-1', 'sd-2', 'sdxl', 'sdxl-refiner', 'flux']); +const zBaseModel = z.enum(['any', 'sd-1', 'sd-2', 'sdxl', 'sdxl-refiner']); const zMainModel = z.object({ model_name: z.string().min(1), base_model: zBaseModel, @@ -89,7 +89,6 @@ const zFieldTypeV1 = z.enum([ 'ONNXModelField', 'Scheduler', 'SDXLMainModelField', - 'FluxMainModelField', 'SDXLRefinerModelField', 'string', 'StringCollection', @@ -418,11 +417,6 @@ const zSDXLMainModelInputFieldValue = zInputFieldValueBase.extend({ value: zMainOrOnnxModel.optional(), }); -const zFluxMainModelInputFieldValue = zInputFieldValueBase.extend({ - type: z.literal('FluxMainModelField'), - value: zMainModel.optional(), -}); - const zSDXLRefinerModelInputFieldValue = zInputFieldValueBase.extend({ type: z.literal('SDXLRefinerModelField'), value: zMainOrOnnxModel.optional(), // TODO: should narrow this down to a refiner model @@ -578,7 +572,6 @@ const zInputFieldValue = z.discriminatedUnion('type', [ zMainModelInputFieldValue, zSchedulerInputFieldValue, zSDXLMainModelInputFieldValue, - zFluxMainModelInputFieldValue, zSDXLRefinerModelInputFieldValue, zStringCollectionInputFieldValue, zStringPolymorphicInputFieldValue, From 4bb343864a8f6f0f4fb5f6efa9335ed5a0b9b766 Mon Sep 17 00:00:00 2001 From: Brandon Rising Date: Mon, 12 Aug 2024 18:01:42 -0400 Subject: [PATCH 025/113] Manage quantization of models within the loader --- invokeai/app/invocations/fields.py | 1 + invokeai/app/invocations/flux_text_encoder.py | 112 +++++-------- .../app/invocations/flux_text_to_image.py | 153 ++++-------------- invokeai/app/invocations/model.py | 10 +- .../load/model_loaders/generic_diffusers.py | 7 +- .../backend/model_manager/load/model_util.py | 9 +- .../fast_quantized_diffusion_model.py | 14 +- .../fast_quantized_transformers_model.py | 11 +- .../frontend/web/src/services/api/schema.ts | 56 +++---- 9 files changed, 133 insertions(+), 240 deletions(-) diff --git a/invokeai/app/invocations/fields.py b/invokeai/app/invocations/fields.py index 91dfcb51a7f..ba2c75aa132 100644 --- a/invokeai/app/invocations/fields.py +++ b/invokeai/app/invocations/fields.py @@ -126,6 +126,7 @@ class FieldDescriptions: negative_cond = "Negative conditioning tensor" noise = "Noise tensor" clip = "CLIP (tokenizer, text encoder, LoRAs) and skipped layer count" + t5Encoder = "T5 tokenizer and text encoder" unet = "UNet (scheduler, LoRAs)" transformer = "Transformer" vae = "VAE" diff --git a/invokeai/app/invocations/flux_text_encoder.py b/invokeai/app/invocations/flux_text_encoder.py index 582ae6fabcc..ce173a49a15 100644 --- a/invokeai/app/invocations/flux_text_encoder.py +++ b/invokeai/app/invocations/flux_text_encoder.py @@ -6,8 +6,10 @@ from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation -from invokeai.app.invocations.fields import InputField -from invokeai.app.invocations.flux_text_to_image import FLUX_MODELS, QuantizedModelForTextEncoding, TFluxModelKeys +from invokeai.app.invocations.model import CLIPField, T5EncoderField +from invokeai.app.invocations.fields import InputField, FieldDescriptions, Input +from invokeai.app.invocations.flux_text_to_image import FLUX_MODELS, QuantizedModelForTextEncoding +from invokeai.app.invocations.model import CLIPField, T5EncoderField from invokeai.app.invocations.primitives import ConditioningOutput from invokeai.app.services.shared.invocation_context import InvocationContext from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ConditioningFieldData, FLUXConditioningInfo @@ -22,9 +24,15 @@ version="1.0.0", ) class FluxTextEncoderInvocation(BaseInvocation): - model: TFluxModelKeys = InputField(description="The FLUX model to use for text-to-image generation.") - use_8bit: bool = InputField( - default=False, description="Whether to quantize the transformer model to 8-bit precision." + clip: CLIPField = InputField( + title="CLIP", + description=FieldDescriptions.clip, + input=Input.Connection, + ) + t5Encoder: T5EncoderField = InputField( + title="T5EncoderField", + description=FieldDescriptions.t5Encoder, + input=Input.Connection, ) positive_prompt: str = InputField(description="Positive prompt for text-to-image generation.") @@ -32,47 +40,43 @@ class FluxTextEncoderInvocation(BaseInvocation): # compatible with other ConditioningOutputs. @torch.no_grad() def invoke(self, context: InvocationContext) -> ConditioningOutput: - model_path = context.models.download_and_cache_model(FLUX_MODELS[self.model]) - t5_embeddings, clip_embeddings = self._encode_prompt(context, model_path) + t5_embeddings, clip_embeddings = self._encode_prompt(context) conditioning_data = ConditioningFieldData( conditionings=[FLUXConditioningInfo(clip_embeds=clip_embeddings, t5_embeds=t5_embeddings)] ) conditioning_name = context.conditioning.save(conditioning_data) return ConditioningOutput.build(conditioning_name) + + def _encode_prompt(self, context: InvocationContext) -> tuple[torch.Tensor, torch.Tensor]: + # TODO: Determine the T5 max sequence length based on the model. + # if self.model == "flux-schnell": + max_seq_len = 256 + # # elif self.model == "flux-dev": + # # max_seq_len = 512 + # else: + # raise ValueError(f"Unknown model: {self.model}") + + # Load CLIP. + clip_tokenizer_info = context.models.load(self.clip.tokenizer) + clip_text_encoder_info = context.models.load(self.clip.text_encoder) + + # Load T5. + t5_tokenizer_info = context.models.load(self.t5Encoder.tokenizer) + t5_text_encoder_info = context.models.load(self.t5Encoder.text_encoder) - def _encode_prompt(self, context: InvocationContext, flux_model_dir: Path) -> tuple[torch.Tensor, torch.Tensor]: - # Determine the T5 max sequence length based on the model. - if self.model == "flux-schnell": - max_seq_len = 256 - # elif self.model == "flux-dev": - # max_seq_len = 512 - else: - raise ValueError(f"Unknown model: {self.model}") - - # Load the CLIP tokenizer. - clip_tokenizer_path = flux_model_dir / "tokenizer" - clip_tokenizer = CLIPTokenizer.from_pretrained(clip_tokenizer_path, local_files_only=True) - assert isinstance(clip_tokenizer, CLIPTokenizer) - - # Load the T5 tokenizer. - t5_tokenizer_path = flux_model_dir / "tokenizer_2" - t5_tokenizer = T5TokenizerFast.from_pretrained(t5_tokenizer_path, local_files_only=True) - assert isinstance(t5_tokenizer, T5TokenizerFast) - - clip_text_encoder_path = flux_model_dir / "text_encoder" - t5_text_encoder_path = flux_model_dir / "text_encoder_2" with ( - context.models.load_local_model( - model_path=clip_text_encoder_path, loader=self._load_flux_text_encoder - ) as clip_text_encoder, - context.models.load_local_model( - model_path=t5_text_encoder_path, loader=self._load_flux_text_encoder_2 - ) as t5_text_encoder, + clip_text_encoder_info as clip_text_encoder, + t5_text_encoder_info as t5_text_encoder, + clip_tokenizer_info as clip_tokenizer, + t5_tokenizer_info as t5_tokenizer, ): assert isinstance(clip_text_encoder, CLIPTextModel) assert isinstance(t5_text_encoder, T5EncoderModel) + assert isinstance(clip_tokenizer, CLIPTokenizer) + assert isinstance(t5_tokenizer, T5TokenizerFast) + pipeline = FluxPipeline( scheduler=None, vae=None, @@ -85,7 +89,7 @@ def _encode_prompt(self, context: InvocationContext, flux_model_dir: Path) -> tu # prompt_embeds: T5 embeddings # pooled_prompt_embeds: CLIP embeddings - prompt_embeds, pooled_prompt_embeds, text_ids = pipeline.encode_prompt( + prompt_embeds, pooled_prompt_embeds, _ = pipeline.encode_prompt( prompt=self.positive_prompt, prompt_2=self.positive_prompt, device=TorchDevice.choose_torch_device(), @@ -95,41 +99,3 @@ def _encode_prompt(self, context: InvocationContext, flux_model_dir: Path) -> tu assert isinstance(prompt_embeds, torch.Tensor) assert isinstance(pooled_prompt_embeds, torch.Tensor) return prompt_embeds, pooled_prompt_embeds - - @staticmethod - def _load_flux_text_encoder(path: Path) -> CLIPTextModel: - model = CLIPTextModel.from_pretrained(path, local_files_only=True) - assert isinstance(model, CLIPTextModel) - return model - - def _load_flux_text_encoder_2(self, path: Path) -> T5EncoderModel: - if self.use_8bit: - model_8bit_path = path / "quantized" - if model_8bit_path.exists(): - # The quantized model exists, load it. - # TODO(ryand): The requantize(...) operation in from_pretrained(...) is very slow. This seems like - # something that we should be able to make much faster. - q_model = QuantizedModelForTextEncoding.from_pretrained(model_8bit_path) - - # Access the underlying wrapped model. - # We access the wrapped model, even though it is private, because it simplifies the type checking by - # always returning a T5EncoderModel from this function. - model = q_model._wrapped - else: - # The quantized model does not exist yet, quantize and save it. - # TODO(ryand): dtype? - model = T5EncoderModel.from_pretrained(path, local_files_only=True) - assert isinstance(model, T5EncoderModel) - - q_model = QuantizedModelForTextEncoding.quantize(model, weights=qfloat8) - - model_8bit_path.mkdir(parents=True, exist_ok=True) - q_model.save_pretrained(model_8bit_path) - - # (See earlier comment about accessing the wrapped model.) - model = q_model._wrapped - else: - model = T5EncoderModel.from_pretrained(path, local_files_only=True) - - assert isinstance(model, T5EncoderModel) - return model diff --git a/invokeai/app/invocations/flux_text_to_image.py b/invokeai/app/invocations/flux_text_to_image.py index 7a577215f86..334e8fd1ea9 100644 --- a/invokeai/app/invocations/flux_text_to_image.py +++ b/invokeai/app/invocations/flux_text_to_image.py @@ -6,7 +6,7 @@ import torch from diffusers.models.transformers.transformer_flux import FluxTransformer2DModel from diffusers.pipelines.flux.pipeline_flux import FluxPipeline -from invokeai.app.invocations.model import ModelIdentifierField +from invokeai.app.invocations.model import TransformerField, VAEField from optimum.quanto import qfloat8 from PIL import Image from safetensors.torch import load_file @@ -52,17 +52,14 @@ class QuantizedModelForTextEncoding(FastQuantizedTransformersModel): class FluxTextToImageInvocation(BaseInvocation, WithMetadata, WithBoard): """Text-to-image generation using a FLUX model.""" - flux_model: ModelIdentifierField = InputField( - description="The Flux model", - input=Input.Any, - ui_type=UIType.FluxMainModel + transformer: TransformerField = InputField( + description=FieldDescriptions.unet, + input=Input.Connection, + title="Transformer", ) - model: TFluxModelKeys = InputField(description="The FLUX model to use for text-to-image generation.") - quantization_type: Literal["raw", "NF4", "llm_int8"] = InputField( - default="raw", description="The type of quantization to use for the transformer model." - ) - use_8bit: bool = InputField( - default=False, description="Whether to quantize the transformer model to 8-bit precision." + vae: VAEField = InputField( + description=FieldDescriptions.vae, + input=Input.Connection, ) positive_text_conditioning: ConditioningField = InputField( description=FieldDescriptions.positive_cond, input=Input.Connection @@ -78,13 +75,6 @@ class FluxTextToImageInvocation(BaseInvocation, WithMetadata, WithBoard): @torch.no_grad() def invoke(self, context: InvocationContext) -> ImageOutput: - # model_path = context.models.download_and_cache_model(FLUX_MODELS[self.model]) - flux_transformer_path = context.models.download_and_cache_model( - "https://huggingface.co/black-forest-labs/FLUX.1-schnell/resolve/main/flux1-schnell.safetensors" - ) - flux_ae_path = context.models.download_and_cache_model( - "https://huggingface.co/black-forest-labs/FLUX.1-schnell/resolve/main/ae.safetensors" - ) # Load the conditioning data. cond_data = context.conditioning.load(self.positive_text_conditioning.conditioning_name) @@ -92,56 +82,31 @@ def invoke(self, context: InvocationContext) -> ImageOutput: flux_conditioning = cond_data.conditionings[0] assert isinstance(flux_conditioning, FLUXConditioningInfo) - latents = self._run_diffusion( - context, flux_transformer_path, flux_conditioning.clip_embeds, flux_conditioning.t5_embeds - ) - image = self._run_vae_decoding(context, flux_ae_path, latents) + latents = self._run_diffusion(context, flux_conditioning.clip_embeds, flux_conditioning.t5_embeds) + image = self._run_vae_decoding(context, latents) image_dto = context.images.save(image=image) return ImageOutput.build(image_dto) def _run_diffusion( self, context: InvocationContext, - flux_transformer_path: Path, clip_embeddings: torch.Tensor, t5_embeddings: torch.Tensor, ): - inference_dtype = TorchDevice.choose_torch_dtype() - - # Prepare input noise. - # TODO(ryand): Does the seed behave the same on different devices? Should we re-implement this to always use a - # CPU RNG? - x = get_noise( - num_samples=1, - height=self.height, - width=self.width, - device=TorchDevice.choose_torch_device(), - dtype=inference_dtype, - seed=self.seed, - ) - - img, img_ids = self._prepare_latent_img_patches(x) - - # HACK(ryand): Find a better way to determine if this is a schnell model or not. - is_schnell = "shnell" in str(flux_transformer_path) - timesteps = get_schedule( - num_steps=self.num_steps, - image_seq_len=img.shape[1], - shift=not is_schnell, - ) - - bs, t5_seq_len, _ = t5_embeddings.shape - txt_ids = torch.zeros(bs, t5_seq_len, 3, dtype=inference_dtype, device=TorchDevice.choose_torch_device()) + scheduler_info = context.models.load(self.transformer.scheduler) + transformer_info = context.models.load(self.transformer.transformer) # HACK(ryand): Manually empty the cache. Currently we don't check the size of the model before loading it from # disk. Since the transformer model is large (24GB), there's a good chance that it will OOM on 32GB RAM systems # if the cache is not empty. - context.models._services.model_manager.load.ram_cache.make_room(24 * 2**30) + # context.models._services.model_manager.load.ram_cache.make_room(24 * 2**30) - with context.models.load_local_model( - model_path=flux_transformer_path, loader=self._load_flux_transformer - ) as transformer: - assert isinstance(transformer, Flux) + with ( + transformer_info as transformer, + scheduler_info as scheduler + ): + assert isinstance(transformer, FluxTransformer2DModel) + assert isinstance(scheduler, FlowMatchEulerDiscreteScheduler) x = denoise( model=transformer, @@ -185,75 +150,25 @@ def _prepare_latent_img_patches(self, latent_img: torch.Tensor) -> tuple[torch.T def _run_vae_decoding( self, context: InvocationContext, - flux_ae_path: Path, latents: torch.Tensor, ) -> Image.Image: - with context.models.load_local_model(model_path=flux_ae_path, loader=self._load_flux_vae) as vae: - assert isinstance(vae, AutoEncoder) - # TODO(ryand): Test that this works with both float16 and bfloat16. - with torch.autocast(device_type=latents.device.type, dtype=TorchDevice.choose_torch_dtype()): - img = vae.decode(latents) + vae_info = context.models.load(self.vae.vae) + with vae_info as vae: + assert isinstance(vae, AutoencoderKL) img.clamp(-1, 1) img = rearrange(img[0], "c h w -> h w c") img_pil = Image.fromarray((127.5 * (img + 1.0)).byte().cpu().numpy()) - return img_pil - - def _load_flux_transformer(self, path: Path) -> FluxTransformer2DModel: - inference_dtype = TorchDevice.choose_torch_dtype() - if self.quantization_type == "raw": - # TODO(ryand): Determine if this is a schnell model or a dev model and load the appropriate config. - params = flux_configs["flux-schnell"].params - - # Initialize the model on the "meta" device. - with accelerate.init_empty_weights(): - model = Flux(params).to(inference_dtype) - - state_dict = load_file(path) - # TODO(ryand): Cast the state_dict to the appropriate dtype? - model.load_state_dict(state_dict, strict=True, assign=True) - elif self.quantization_type == "NF4": - model_path = path.parent / "bnb_nf4.safetensors" - - # TODO(ryand): Determine if this is a schnell model or a dev model and load the appropriate config. - params = flux_configs["flux-schnell"].params - # Initialize the model on the "meta" device. - with accelerate.init_empty_weights(): - model = Flux(params) - model = quantize_model_nf4(model, modules_to_not_convert=set(), compute_dtype=torch.bfloat16) - - # TODO(ryand): Right now, some of the weights are loaded in bfloat16. Think about how best to handle - # this on GPUs without bfloat16 support. - state_dict = load_file(model_path) - model.load_state_dict(state_dict, strict=True, assign=True) - - elif self.quantization_type == "llm_int8": - raise NotImplementedError("LLM int8 quantization is not yet supported.") - # model_config = FluxTransformer2DModel.load_config(path, local_files_only=True) - # with accelerate.init_empty_weights(): - # empty_model = FluxTransformer2DModel.from_config(model_config) - # assert isinstance(empty_model, FluxTransformer2DModel) - # model_int8_path = path / "bnb_llm_int8" - # assert model_int8_path.exists() - # with accelerate.init_empty_weights(): - # model = quantize_model_llm_int8(empty_model, modules_to_not_convert=set()) - - # sd = load_file(model_int8_path / "model.safetensors") - # model.load_state_dict(sd, strict=True, assign=True) - else: - raise ValueError(f"Unsupported quantization type: {self.quantization_type}") - - assert isinstance(model, FluxTransformer2DModel) - return model - - @staticmethod - def _load_flux_vae(path: Path) -> AutoEncoder: - # TODO(ryand): Determine if this is a schnell model or a dev model and load the appropriate config. - ae_params = flux_configs["flux1-schnell"].ae_params - with accelerate.init_empty_weights(): - ae = AutoEncoder(ae_params) - - state_dict = load_file(path) - ae.load_state_dict(state_dict, strict=True, assign=True) - return ae + latents = flux_pipeline_with_vae._unpack_latents( + latents, self.height, self.width, flux_pipeline_with_vae.vae_scale_factor + ) + latents = ( + latents / flux_pipeline_with_vae.vae.config.scaling_factor + ) + flux_pipeline_with_vae.vae.config.shift_factor + latents = latents.to(dtype=vae.dtype) + image = flux_pipeline_with_vae.vae.decode(latents, return_dict=False)[0] + image = flux_pipeline_with_vae.image_processor.postprocess(image, output_type="pil")[0] + + assert isinstance(image, Image.Image) + return image diff --git a/invokeai/app/invocations/model.py b/invokeai/app/invocations/model.py index dd121092699..4672f6a83de 100644 --- a/invokeai/app/invocations/model.py +++ b/invokeai/app/invocations/model.py @@ -65,6 +65,10 @@ class TransformerField(BaseModel): transformer: ModelIdentifierField = Field(description="Info to load Transformer submodel") scheduler: ModelIdentifierField = Field(description="Info to load scheduler submodel") +class T5EncoderField(BaseModel): + tokenizer: ModelIdentifierField = Field(description="Info to load tokenizer submodel") + text_encoder: ModelIdentifierField = Field(description="Info to load text_encoder submodel") + class VAEField(BaseModel): vae: ModelIdentifierField = Field(description="Info to load vae submodel") @@ -133,8 +137,8 @@ class FluxModelLoaderOutput(BaseInvocationOutput): """Flux base model loader output""" transformer: TransformerField = OutputField(description=FieldDescriptions.transformer, title="Transformer") - clip: CLIPField = OutputField(description=FieldDescriptions.clip, title="CLIP 1") - clip2: CLIPField = OutputField(description=FieldDescriptions.clip, title="CLIP 2") + clip: CLIPField = OutputField(description=FieldDescriptions.clip, title="CLIP") + t5Encoder: T5EncoderField = OutputField(description=FieldDescriptions.t5Encoder, title="T5 Encoder") vae: VAEField = OutputField(description=FieldDescriptions.vae, title="VAE") @@ -166,7 +170,7 @@ def invoke(self, context: InvocationContext) -> FluxModelLoaderOutput: return FluxModelLoaderOutput( transformer=TransformerField(transformer=transformer, scheduler=scheduler), clip=CLIPField(tokenizer=tokenizer, text_encoder=text_encoder, loras=[], skipped_layers=0), - clip2=CLIPField(tokenizer=tokenizer2, text_encoder=text_encoder2, loras=[], skipped_layers=0), + t5Encoder=T5EncoderField(tokenizer=tokenizer2, text_encoder=text_encoder2), vae=VAEField(vae=vae), ) diff --git a/invokeai/backend/model_manager/load/model_loaders/generic_diffusers.py b/invokeai/backend/model_manager/load/model_loaders/generic_diffusers.py index dfe38aa79c2..f1691ec4d4b 100644 --- a/invokeai/backend/model_manager/load/model_loaders/generic_diffusers.py +++ b/invokeai/backend/model_manager/load/model_loaders/generic_diffusers.py @@ -78,7 +78,12 @@ def get_hf_load_class(self, model_path: Path, submodel_type: Optional[SubModelTy # TO DO: Add exception handling def _hf_definition_to_type(self, module: str, class_name: str) -> ModelMixin: # fix with correct type - if module in ["diffusers", "transformers"]: + if module in [ + "diffusers", + "transformers", + "invokeai.backend.quantization.fast_quantized_transformers_model", + "invokeai.backend.quantization.fast_quantized_diffusion_model", + ]: res_type = sys.modules[module] else: res_type = sys.modules["diffusers"].pipelines diff --git a/invokeai/backend/model_manager/load/model_util.py b/invokeai/backend/model_manager/load/model_util.py index bc612043e34..b3b78104d9e 100644 --- a/invokeai/backend/model_manager/load/model_util.py +++ b/invokeai/backend/model_manager/load/model_util.py @@ -9,7 +9,7 @@ import torch from diffusers.pipelines.pipeline_utils import DiffusionPipeline from diffusers.schedulers.scheduling_utils import SchedulerMixin -from transformers import CLIPTokenizer +from transformers import CLIPTokenizer, T5TokenizerFast from invokeai.backend.image_util.depth_anything.depth_anything_pipeline import DepthAnythingPipeline from invokeai.backend.image_util.grounding_dino.grounding_dino_pipeline import GroundingDinoPipeline @@ -50,6 +50,13 @@ def calc_model_size_by_data(logger: logging.Logger, model: AnyModel) -> int: ), ): return model.calc_size() + elif isinstance( + model, + ( + T5TokenizerFast, + ), + ): + return len(model) else: # TODO(ryand): Promote this from a log to an exception once we are confident that we are handling all of the # supported model types. diff --git a/invokeai/backend/quantization/fast_quantized_diffusion_model.py b/invokeai/backend/quantization/fast_quantized_diffusion_model.py index 0759984bf93..395efc99c47 100644 --- a/invokeai/backend/quantization/fast_quantized_diffusion_model.py +++ b/invokeai/backend/quantization/fast_quantized_diffusion_model.py @@ -12,15 +12,17 @@ ) from optimum.quanto.models import QuantizedDiffusersModel from optimum.quanto.models.shared_dict import ShardedStateDict +from diffusers.models.transformers.transformer_flux import FluxTransformer2DModel from invokeai.backend.requantize import requantize class FastQuantizedDiffusersModel(QuantizedDiffusersModel): @classmethod - def from_pretrained(cls, model_name_or_path: Union[str, os.PathLike]): + def from_pretrained(cls, model_name_or_path: Union[str, os.PathLike], base_class = FluxTransformer2DModel, **kwargs): """We override the `from_pretrained()` method in order to use our custom `requantize()` implementation.""" - if cls.base_class is None: + base_class = base_class or cls.base_class + if base_class is None: raise ValueError("The `base_class` attribute needs to be configured.") if not is_accelerate_available(): @@ -43,16 +45,16 @@ def from_pretrained(cls, model_name_or_path: Union[str, os.PathLike]): with open(model_config_path, "r", encoding="utf-8") as f: original_model_cls_name = json.load(f)["_class_name"] - configured_cls_name = cls.base_class.__name__ + configured_cls_name = base_class.__name__ if configured_cls_name != original_model_cls_name: raise ValueError( f"Configured base class ({configured_cls_name}) differs from what was derived from the provided configuration ({original_model_cls_name})." ) # Create an empty model - config = cls.base_class.load_config(model_name_or_path) + config = base_class.load_config(model_name_or_path) with init_empty_weights(): - model = cls.base_class.from_config(config) + model = base_class.from_config(config) # Look for the index of a sharded checkpoint checkpoint_file = os.path.join(model_name_or_path, SAFE_WEIGHTS_INDEX_NAME) @@ -72,6 +74,6 @@ def from_pretrained(cls, model_name_or_path: Union[str, os.PathLike]): # Requantize and load quantized weights from state_dict requantize(model, state_dict=state_dict, quantization_map=qmap) model.eval() - return cls(model) + return cls(model)._wrapped else: raise NotImplementedError("Reloading quantized models directly from the hub is not supported yet.") diff --git a/invokeai/backend/quantization/fast_quantized_transformers_model.py b/invokeai/backend/quantization/fast_quantized_transformers_model.py index ce5cc7a3a9b..99f889b4af6 100644 --- a/invokeai/backend/quantization/fast_quantized_transformers_model.py +++ b/invokeai/backend/quantization/fast_quantized_transformers_model.py @@ -1,5 +1,6 @@ import json import os +import torch from typing import Union from optimum.quanto.models import QuantizedTransformersModel @@ -7,15 +8,17 @@ from transformers import AutoConfig from transformers.modeling_utils import get_checkpoint_shard_files, load_state_dict from transformers.utils import SAFE_WEIGHTS_INDEX_NAME, SAFE_WEIGHTS_NAME, is_accelerate_available +from transformers.models.auto import AutoModelForTextEncoding from invokeai.backend.requantize import requantize class FastQuantizedTransformersModel(QuantizedTransformersModel): @classmethod - def from_pretrained(cls, model_name_or_path: Union[str, os.PathLike]): + def from_pretrained(cls, model_name_or_path: Union[str, os.PathLike], auto_class = AutoModelForTextEncoding, **kwargs): """We override the `from_pretrained()` method in order to use our custom `requantize()` implementation.""" - if cls.auto_class is None: + auto_class = auto_class or cls.auto_class + if auto_class is None: raise ValueError( "Quantized models cannot be reloaded using {cls}: use a specialized quantized class such as QuantizedModelForCausalLM instead." ) @@ -33,7 +36,7 @@ def from_pretrained(cls, model_name_or_path: Union[str, os.PathLike]): # Create an empty model config = AutoConfig.from_pretrained(model_name_or_path) with init_empty_weights(): - model = cls.auto_class.from_config(config) + model = auto_class.from_config(config) # Look for the index of a sharded checkpoint checkpoint_file = os.path.join(model_name_or_path, SAFE_WEIGHTS_INDEX_NAME) if os.path.exists(checkpoint_file): @@ -56,6 +59,6 @@ def from_pretrained(cls, model_name_or_path: Union[str, os.PathLike]): model.tie_weights() # Set model in evaluation mode as it is done in transformers model.eval() - return cls(model) + return cls(model)._wrapped else: raise NotImplementedError("Reloading quantized models directly from the hub is not supported yet.") diff --git a/invokeai/frontend/web/src/services/api/schema.ts b/invokeai/frontend/web/src/services/api/schema.ts index ef0b869b8ee..b8cdc2e88d6 100644 --- a/invokeai/frontend/web/src/services/api/schema.ts +++ b/invokeai/frontend/web/src/services/api/schema.ts @@ -5697,15 +5697,15 @@ export type components = { */ transformer: components["schemas"]["TransformerField"]; /** - * CLIP 1 + * CLIP * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count */ clip: components["schemas"]["CLIPField"]; /** - * CLIP 2 - * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count + * T5 Encoder + * @description T5 tokenizer and text encoder */ - clip2: components["schemas"]["CLIPField"]; + t5Encoder: components["schemas"]["T5EncoderField"]; /** * VAE * @description VAE @@ -5739,19 +5739,17 @@ export type components = { */ use_cache?: boolean; /** - * Model - * @description The FLUX model to use for text-to-image generation. + * CLIP + * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count * @default null - * @constant - * @enum {string} */ - model?: "flux-schnell"; + clip?: components["schemas"]["CLIPField"]; /** - * Use 8Bit - * @description Whether to quantize the transformer model to 8-bit precision. - * @default false + * T5EncoderField + * @description T5 tokenizer and text encoder + * @default null */ - use_8bit?: boolean; + t5Encoder?: components["schemas"]["T5EncoderField"]; /** * Positive Prompt * @description Positive prompt for text-to-image generation. @@ -5799,31 +5797,16 @@ export type components = { */ use_cache?: boolean; /** - * @description The Flux model + * Transformer + * @description UNet (scheduler, LoRAs) * @default null */ - flux_model?: components["schemas"]["ModelIdentifierField"]; + transformer?: components["schemas"]["TransformerField"]; /** - * Model - * @description The FLUX model to use for text-to-image generation. + * @description VAE * @default null - * @constant - * @enum {string} - */ - model?: "flux-schnell"; - /** - * Quantization Type - * @description The type of quantization to use for the transformer model. - * @default raw - * @enum {string} */ - quantization_type?: "raw" | "NF4" | "llm_int8"; - /** - * Use 8Bit - * @description Whether to quantize the transformer model to 8-bit precision. - * @default false - */ - use_8bit?: boolean; + vae?: components["schemas"]["VAEField"]; /** * @description Positive conditioning tensor * @default null @@ -14268,6 +14251,13 @@ export type components = { */ type: "t2i_adapter_output"; }; + /** T5EncoderField */ + T5EncoderField: { + /** @description Info to load tokenizer submodel */ + tokenizer: components["schemas"]["ModelIdentifierField"]; + /** @description Info to load text_encoder submodel */ + text_encoder: components["schemas"]["ModelIdentifierField"]; + }; /** TBLR */ TBLR: { /** Top */ From 6b0f5f43bf6985b667ef798533770b69a650a9a1 Mon Sep 17 00:00:00 2001 From: Brandon Rising Date: Wed, 14 Aug 2024 11:53:07 -0400 Subject: [PATCH 026/113] Run Ruff --- invokeai/app/invocations/flux_text_to_image.py | 5 +---- invokeai/app/invocations/model.py | 2 +- invokeai/backend/model_manager/load/model_util.py | 4 +--- invokeai/backend/model_manager/util/select_hf_files.py | 2 +- .../backend/quantization/fast_quantized_diffusion_model.py | 2 +- .../quantization/fast_quantized_transformers_model.py | 4 +++- 6 files changed, 8 insertions(+), 11 deletions(-) diff --git a/invokeai/app/invocations/flux_text_to_image.py b/invokeai/app/invocations/flux_text_to_image.py index 334e8fd1ea9..ed744f441fd 100644 --- a/invokeai/app/invocations/flux_text_to_image.py +++ b/invokeai/app/invocations/flux_text_to_image.py @@ -101,10 +101,7 @@ def _run_diffusion( # if the cache is not empty. # context.models._services.model_manager.load.ram_cache.make_room(24 * 2**30) - with ( - transformer_info as transformer, - scheduler_info as scheduler - ): + with transformer_info as transformer, scheduler_info as scheduler: assert isinstance(transformer, FluxTransformer2DModel) assert isinstance(scheduler, FlowMatchEulerDiscreteScheduler) diff --git a/invokeai/app/invocations/model.py b/invokeai/app/invocations/model.py index 4672f6a83de..c3902c1cb14 100644 --- a/invokeai/app/invocations/model.py +++ b/invokeai/app/invocations/model.py @@ -60,11 +60,11 @@ class CLIPField(BaseModel): loras: List[LoRAField] = Field(description="LoRAs to apply on model loading") - class TransformerField(BaseModel): transformer: ModelIdentifierField = Field(description="Info to load Transformer submodel") scheduler: ModelIdentifierField = Field(description="Info to load scheduler submodel") + class T5EncoderField(BaseModel): tokenizer: ModelIdentifierField = Field(description="Info to load tokenizer submodel") text_encoder: ModelIdentifierField = Field(description="Info to load text_encoder submodel") diff --git a/invokeai/backend/model_manager/load/model_util.py b/invokeai/backend/model_manager/load/model_util.py index b3b78104d9e..9794b8098e5 100644 --- a/invokeai/backend/model_manager/load/model_util.py +++ b/invokeai/backend/model_manager/load/model_util.py @@ -52,9 +52,7 @@ def calc_model_size_by_data(logger: logging.Logger, model: AnyModel) -> int: return model.calc_size() elif isinstance( model, - ( - T5TokenizerFast, - ), + (T5TokenizerFast,), ): return len(model) else: diff --git a/invokeai/backend/model_manager/util/select_hf_files.py b/invokeai/backend/model_manager/util/select_hf_files.py index 2e86d9a62e7..60abc3384ca 100644 --- a/invokeai/backend/model_manager/util/select_hf_files.py +++ b/invokeai/backend/model_manager/util/select_hf_files.py @@ -54,7 +54,7 @@ def filter_files( "lora_weights.safetensors", "weights.pb", "onnx_data", - "spiece.model", # Added for `black-forest-labs/FLUX.1-schnell`. + "spiece.model", # Added for `black-forest-labs/FLUX.1-schnell`. ) ): paths.append(file) diff --git a/invokeai/backend/quantization/fast_quantized_diffusion_model.py b/invokeai/backend/quantization/fast_quantized_diffusion_model.py index 395efc99c47..b1531094d13 100644 --- a/invokeai/backend/quantization/fast_quantized_diffusion_model.py +++ b/invokeai/backend/quantization/fast_quantized_diffusion_model.py @@ -19,7 +19,7 @@ class FastQuantizedDiffusersModel(QuantizedDiffusersModel): @classmethod - def from_pretrained(cls, model_name_or_path: Union[str, os.PathLike], base_class = FluxTransformer2DModel, **kwargs): + def from_pretrained(cls, model_name_or_path: Union[str, os.PathLike], base_class=FluxTransformer2DModel, **kwargs): """We override the `from_pretrained()` method in order to use our custom `requantize()` implementation.""" base_class = base_class or cls.base_class if base_class is None: diff --git a/invokeai/backend/quantization/fast_quantized_transformers_model.py b/invokeai/backend/quantization/fast_quantized_transformers_model.py index 99f889b4af6..5f16bae611b 100644 --- a/invokeai/backend/quantization/fast_quantized_transformers_model.py +++ b/invokeai/backend/quantization/fast_quantized_transformers_model.py @@ -15,7 +15,9 @@ class FastQuantizedTransformersModel(QuantizedTransformersModel): @classmethod - def from_pretrained(cls, model_name_or_path: Union[str, os.PathLike], auto_class = AutoModelForTextEncoding, **kwargs): + def from_pretrained( + cls, model_name_or_path: Union[str, os.PathLike], auto_class=AutoModelForTextEncoding, **kwargs + ): """We override the `from_pretrained()` method in order to use our custom `requantize()` implementation.""" auto_class = auto_class or cls.auto_class if auto_class is None: From 3814cd760b43be2208cf8f04de74654b558f354d Mon Sep 17 00:00:00 2001 From: Brandon Rising Date: Thu, 15 Aug 2024 10:27:42 -0400 Subject: [PATCH 027/113] Run Ruff --- invokeai/app/invocations/flux_text_encoder.py | 10 ++-------- invokeai/app/invocations/flux_text_to_image.py | 7 +------ .../quantization/fast_quantized_diffusion_model.py | 2 +- .../quantization/fast_quantized_transformers_model.py | 3 +-- 4 files changed, 5 insertions(+), 17 deletions(-) diff --git a/invokeai/app/invocations/flux_text_encoder.py b/invokeai/app/invocations/flux_text_encoder.py index ce173a49a15..955d864b166 100644 --- a/invokeai/app/invocations/flux_text_encoder.py +++ b/invokeai/app/invocations/flux_text_encoder.py @@ -1,14 +1,9 @@ -from pathlib import Path - import torch from diffusers.pipelines.flux.pipeline_flux import FluxPipeline -from optimum.quanto import qfloat8 from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation -from invokeai.app.invocations.model import CLIPField, T5EncoderField -from invokeai.app.invocations.fields import InputField, FieldDescriptions, Input -from invokeai.app.invocations.flux_text_to_image import FLUX_MODELS, QuantizedModelForTextEncoding +from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField from invokeai.app.invocations.model import CLIPField, T5EncoderField from invokeai.app.invocations.primitives import ConditioningOutput from invokeai.app.services.shared.invocation_context import InvocationContext @@ -40,7 +35,6 @@ class FluxTextEncoderInvocation(BaseInvocation): # compatible with other ConditioningOutputs. @torch.no_grad() def invoke(self, context: InvocationContext) -> ConditioningOutput: - t5_embeddings, clip_embeddings = self._encode_prompt(context) conditioning_data = ConditioningFieldData( conditionings=[FLUXConditioningInfo(clip_embeds=clip_embeddings, t5_embeds=t5_embeddings)] @@ -48,7 +42,7 @@ def invoke(self, context: InvocationContext) -> ConditioningOutput: conditioning_name = context.conditioning.save(conditioning_data) return ConditioningOutput.build(conditioning_name) - + def _encode_prompt(self, context: InvocationContext) -> tuple[torch.Tensor, torch.Tensor]: # TODO: Determine the T5 max sequence length based on the model. # if self.model == "flux-schnell": diff --git a/invokeai/app/invocations/flux_text_to_image.py b/invokeai/app/invocations/flux_text_to_image.py index ed744f441fd..0f6762c2ae2 100644 --- a/invokeai/app/invocations/flux_text_to_image.py +++ b/invokeai/app/invocations/flux_text_to_image.py @@ -1,13 +1,9 @@ -from pathlib import Path from typing import Literal -from pydantic import Field import accelerate import torch from diffusers.models.transformers.transformer_flux import FluxTransformer2DModel from diffusers.pipelines.flux.pipeline_flux import FluxPipeline -from invokeai.app.invocations.model import TransformerField, VAEField -from optimum.quanto import qfloat8 from PIL import Image from safetensors.torch import load_file from transformers.models.auto import AutoModelForTextEncoding @@ -20,8 +16,8 @@ InputField, WithBoard, WithMetadata, - UIType, ) +from invokeai.app.invocations.model import TransformerField, VAEField from invokeai.app.invocations.primitives import ImageOutput from invokeai.app.services.shared.invocation_context import InvocationContext from invokeai.backend.quantization.bnb_nf4 import quantize_model_nf4 @@ -75,7 +71,6 @@ class FluxTextToImageInvocation(BaseInvocation, WithMetadata, WithBoard): @torch.no_grad() def invoke(self, context: InvocationContext) -> ImageOutput: - # Load the conditioning data. cond_data = context.conditioning.load(self.positive_text_conditioning.conditioning_name) assert len(cond_data.conditionings) == 1 diff --git a/invokeai/backend/quantization/fast_quantized_diffusion_model.py b/invokeai/backend/quantization/fast_quantized_diffusion_model.py index b1531094d13..65b64a69a17 100644 --- a/invokeai/backend/quantization/fast_quantized_diffusion_model.py +++ b/invokeai/backend/quantization/fast_quantized_diffusion_model.py @@ -3,6 +3,7 @@ from typing import Union from diffusers.models.model_loading_utils import load_state_dict +from diffusers.models.transformers.transformer_flux import FluxTransformer2DModel from diffusers.utils import ( CONFIG_NAME, SAFE_WEIGHTS_INDEX_NAME, @@ -12,7 +13,6 @@ ) from optimum.quanto.models import QuantizedDiffusersModel from optimum.quanto.models.shared_dict import ShardedStateDict -from diffusers.models.transformers.transformer_flux import FluxTransformer2DModel from invokeai.backend.requantize import requantize diff --git a/invokeai/backend/quantization/fast_quantized_transformers_model.py b/invokeai/backend/quantization/fast_quantized_transformers_model.py index 5f16bae611b..72636a43fb1 100644 --- a/invokeai/backend/quantization/fast_quantized_transformers_model.py +++ b/invokeai/backend/quantization/fast_quantized_transformers_model.py @@ -1,14 +1,13 @@ import json import os -import torch from typing import Union from optimum.quanto.models import QuantizedTransformersModel from optimum.quanto.models.shared_dict import ShardedStateDict from transformers import AutoConfig from transformers.modeling_utils import get_checkpoint_shard_files, load_state_dict -from transformers.utils import SAFE_WEIGHTS_INDEX_NAME, SAFE_WEIGHTS_NAME, is_accelerate_available from transformers.models.auto import AutoModelForTextEncoding +from transformers.utils import SAFE_WEIGHTS_INDEX_NAME, SAFE_WEIGHTS_NAME, is_accelerate_available from invokeai.backend.requantize import requantize From a6ad70e5fb3c460cb434d501a9109a9cd80117d2 Mon Sep 17 00:00:00 2001 From: Brandon Rising Date: Thu, 15 Aug 2024 10:49:14 -0400 Subject: [PATCH 028/113] Some UI cleanup, regenerate schema --- invokeai/app/invocations/flux_text_encoder.py | 2 +- invokeai/frontend/web/src/features/nodes/types/constants.ts | 2 ++ invokeai/frontend/web/src/services/api/schema.ts | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/invokeai/app/invocations/flux_text_encoder.py b/invokeai/app/invocations/flux_text_encoder.py index 955d864b166..8e33a3f0cd2 100644 --- a/invokeai/app/invocations/flux_text_encoder.py +++ b/invokeai/app/invocations/flux_text_encoder.py @@ -25,7 +25,7 @@ class FluxTextEncoderInvocation(BaseInvocation): input=Input.Connection, ) t5Encoder: T5EncoderField = InputField( - title="T5EncoderField", + title="T5Encoder", description=FieldDescriptions.t5Encoder, input=Input.Connection, ) diff --git a/invokeai/frontend/web/src/features/nodes/types/constants.ts b/invokeai/frontend/web/src/features/nodes/types/constants.ts index ca43f35b550..19927220f20 100644 --- a/invokeai/frontend/web/src/features/nodes/types/constants.ts +++ b/invokeai/frontend/web/src/features/nodes/types/constants.ts @@ -39,6 +39,7 @@ export const MODEL_TYPES = [ 'VAEField', 'CLIPField', 'T2IAdapterModelField', + 'T5EncoderField', 'SpandrelImageToImageModelField', ]; @@ -70,6 +71,7 @@ export const FIELD_COLORS: { [key: string]: string } = { T2IAdapterField: 'teal.500', T2IAdapterModelField: 'teal.500', UNetField: 'red.500', + T5EncoderField: 'green.500', TransformerField: 'red.500', VAEField: 'blue.500', VAEModelField: 'teal.500', diff --git a/invokeai/frontend/web/src/services/api/schema.ts b/invokeai/frontend/web/src/services/api/schema.ts index b8cdc2e88d6..157aacd3f83 100644 --- a/invokeai/frontend/web/src/services/api/schema.ts +++ b/invokeai/frontend/web/src/services/api/schema.ts @@ -5745,7 +5745,7 @@ export type components = { */ clip?: components["schemas"]["CLIPField"]; /** - * T5EncoderField + * T5Encoder * @description T5 tokenizer and text encoder * @default null */ From f3096a8e513cbd6e8fa10555670380445f28f178 Mon Sep 17 00:00:00 2001 From: Brandon Rising Date: Fri, 16 Aug 2024 17:04:48 -0400 Subject: [PATCH 029/113] Add backend functions and classes for Flux implementation, Update the way flux encoders/tokenizers are loaded for prompt encoding, Update way flux vae is loaded --- invokeai/app/invocations/flux_text_encoder.py | 31 +- .../app/invocations/flux_text_to_image.py | 4 +- invokeai/app/invocations/model.py | 81 ++++- .../model_records/model_records_base.py | 1 + .../model_records/model_records_sql.py | 2 +- .../app/services/shared/invocation_context.py | 15 + invokeai/backend/flux/math.py | 30 ++ invokeai/backend/flux/model.py | 111 +++++++ invokeai/backend/flux/modules/autoencoder.py | 312 ++++++++++++++++++ invokeai/backend/flux/modules/conditioner.py | 30 ++ invokeai/backend/flux/modules/layers.py | 253 ++++++++++++++ invokeai/backend/model_manager/config.py | 30 ++ .../model_manager/load/model_loaders/flux.py | 159 +++++++++ .../load/model_loaders/stable_diffusion.py | 10 +- .../backend/model_manager/load/model_util.py | 4 +- invokeai/backend/model_manager/probe.py | 44 ++- invokeai/configs/flux/flux1-dev.yaml | 33 ++ invokeai/configs/flux/flux1-schnell.yaml | 34 ++ .../frontend/web/src/services/api/schema.ts | 161 ++++++++- 19 files changed, 1277 insertions(+), 68 deletions(-) create mode 100644 invokeai/backend/flux/math.py create mode 100644 invokeai/backend/flux/model.py create mode 100644 invokeai/backend/flux/modules/autoencoder.py create mode 100644 invokeai/backend/flux/modules/conditioner.py create mode 100644 invokeai/backend/flux/modules/layers.py create mode 100644 invokeai/backend/model_manager/load/model_loaders/flux.py create mode 100644 invokeai/configs/flux/flux1-dev.yaml create mode 100644 invokeai/configs/flux/flux1-schnell.yaml diff --git a/invokeai/app/invocations/flux_text_encoder.py b/invokeai/app/invocations/flux_text_encoder.py index 8e33a3f0cd2..5c0d0ef2ac7 100644 --- a/invokeai/app/invocations/flux_text_encoder.py +++ b/invokeai/app/invocations/flux_text_encoder.py @@ -1,6 +1,9 @@ import torch + + +from einops import repeat from diffusers.pipelines.flux.pipeline_flux import FluxPipeline -from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast +from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5Tokenizer from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField @@ -9,6 +12,7 @@ from invokeai.app.services.shared.invocation_context import InvocationContext from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ConditioningFieldData, FLUXConditioningInfo from invokeai.backend.util.devices import TorchDevice +from invokeai.backend.flux.modules.conditioner import HFEncoder @invocation( @@ -69,26 +73,15 @@ def _encode_prompt(self, context: InvocationContext) -> tuple[torch.Tensor, torc assert isinstance(clip_text_encoder, CLIPTextModel) assert isinstance(t5_text_encoder, T5EncoderModel) assert isinstance(clip_tokenizer, CLIPTokenizer) - assert isinstance(t5_tokenizer, T5TokenizerFast) + assert isinstance(t5_tokenizer, T5Tokenizer) + + clip_encoder = HFEncoder(clip_text_encoder, clip_tokenizer, True, 77) + t5_encoder = HFEncoder(t5_text_encoder, t5_tokenizer, False, max_seq_len) - pipeline = FluxPipeline( - scheduler=None, - vae=None, - text_encoder=clip_text_encoder, - tokenizer=clip_tokenizer, - text_encoder_2=t5_text_encoder, - tokenizer_2=t5_tokenizer, - transformer=None, - ) + prompt = [self.positive_prompt] + prompt_embeds = t5_encoder(prompt) - # prompt_embeds: T5 embeddings - # pooled_prompt_embeds: CLIP embeddings - prompt_embeds, pooled_prompt_embeds, _ = pipeline.encode_prompt( - prompt=self.positive_prompt, - prompt_2=self.positive_prompt, - device=TorchDevice.choose_torch_device(), - max_sequence_length=max_seq_len, - ) + pooled_prompt_embeds = clip_encoder(prompt) assert isinstance(prompt_embeds, torch.Tensor) assert isinstance(pooled_prompt_embeds, torch.Tensor) diff --git a/invokeai/app/invocations/flux_text_to_image.py b/invokeai/app/invocations/flux_text_to_image.py index 0f6762c2ae2..1327f81709d 100644 --- a/invokeai/app/invocations/flux_text_to_image.py +++ b/invokeai/app/invocations/flux_text_to_image.py @@ -88,7 +88,6 @@ def _run_diffusion( clip_embeddings: torch.Tensor, t5_embeddings: torch.Tensor, ): - scheduler_info = context.models.load(self.transformer.scheduler) transformer_info = context.models.load(self.transformer.transformer) # HACK(ryand): Manually empty the cache. Currently we don't check the size of the model before loading it from @@ -96,9 +95,8 @@ def _run_diffusion( # if the cache is not empty. # context.models._services.model_manager.load.ram_cache.make_room(24 * 2**30) - with transformer_info as transformer, scheduler_info as scheduler: + with transformer_info as transformer: assert isinstance(transformer, FluxTransformer2DModel) - assert isinstance(scheduler, FlowMatchEulerDiscreteScheduler) x = denoise( model=transformer, diff --git a/invokeai/app/invocations/model.py b/invokeai/app/invocations/model.py index c3902c1cb14..3908bef4da9 100644 --- a/invokeai/app/invocations/model.py +++ b/invokeai/app/invocations/model.py @@ -1,5 +1,6 @@ import copy -from typing import List, Optional +from time import sleep +from typing import List, Optional, Literal, Dict from pydantic import BaseModel, Field @@ -13,7 +14,8 @@ from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, OutputField, UIType from invokeai.app.services.shared.invocation_context import InvocationContext from invokeai.app.shared.models import FreeUConfig -from invokeai.backend.model_manager.config import AnyModelConfig, BaseModelType, ModelType, SubModelType +from invokeai.app.services.model_records import ModelRecordChanges +from invokeai.backend.model_manager.config import AnyModelConfig, BaseModelType, ModelType, SubModelType, ModelFormat class ModelIdentifierField(BaseModel): @@ -62,7 +64,6 @@ class CLIPField(BaseModel): class TransformerField(BaseModel): transformer: ModelIdentifierField = Field(description="Info to load Transformer submodel") - scheduler: ModelIdentifierField = Field(description="Info to load scheduler submodel") class T5EncoderField(BaseModel): @@ -131,6 +132,30 @@ def invoke(self, context: InvocationContext) -> ModelIdentifierOutput: return ModelIdentifierOutput(model=self.model) +T5_ENCODER_OPTIONS = Literal["base", "16b_quantized", "8b_quantized"] +T5_ENCODER_MAP: Dict[str, Dict[str, str]] = { + "base": { + "text_encoder_repo": "black-forest-labs/FLUX.1-schnell::text_encoder_2", + "tokenizer_repo": "black-forest-labs/FLUX.1-schnell::tokenizer_2", + "text_encoder_name": "FLUX.1-schnell_text_encoder_2", + "tokenizer_name": "FLUX.1-schnell_tokenizer_2", + "format": ModelFormat.T5Encoder, + }, + "8b_quantized": { + "text_encoder_repo": "hf_repo1", + "tokenizer_repo": "hf_repo1", + "text_encoder_name": "hf_repo1", + "tokenizer_name": "hf_repo1", + "format": ModelFormat.T5Encoder8b, + }, + "4b_quantized": { + "text_encoder_repo": "hf_repo2", + "tokenizer_repo": "hf_repo2", + "text_encoder_name": "hf_repo2", + "tokenizer_name": "hf_repo2", + "format": ModelFormat.T5Encoder8b, + }, +} @invocation_output("flux_model_loader_output") class FluxModelLoaderOutput(BaseInvocationOutput): @@ -151,29 +176,55 @@ class FluxModelLoaderInvocation(BaseInvocation): ui_type=UIType.FluxMainModel, input=Input.Direct, ) + + t5_encoder: T5_ENCODER_OPTIONS = InputField(description="The T5 Encoder model to use.") def invoke(self, context: InvocationContext) -> FluxModelLoaderOutput: model_key = self.model.key - # TODO: not found exceptions if not context.models.exists(model_key): raise Exception(f"Unknown model: {model_key}") - - transformer = self.model.model_copy(update={"submodel_type": SubModelType.Transformer}) - scheduler = self.model.model_copy(update={"submodel_type": SubModelType.Scheduler}) - tokenizer = self.model.model_copy(update={"submodel_type": SubModelType.Tokenizer}) - text_encoder = self.model.model_copy(update={"submodel_type": SubModelType.TextEncoder}) - tokenizer2 = self.model.model_copy(update={"submodel_type": SubModelType.Tokenizer2}) - text_encoder2 = self.model.model_copy(update={"submodel_type": SubModelType.TextEncoder2}) - vae = self.model.model_copy(update={"submodel_type": SubModelType.VAE}) + transformer = self._get_model(context, SubModelType.Transformer) + tokenizer = self._get_model(context, SubModelType.Tokenizer) + tokenizer2 = self._get_model(context, SubModelType.Tokenizer2) + clip_encoder = self._get_model(context, SubModelType.TextEncoder) + t5_encoder = self._get_model(context, SubModelType.TextEncoder2) + vae = self._install_model(context, SubModelType.VAE, "FLUX.1-schnell_ae", "black-forest-labs/FLUX.1-schnell::ae.safetensors", ModelFormat.Checkpoint, ModelType.VAE, BaseModelType.Flux) return FluxModelLoaderOutput( - transformer=TransformerField(transformer=transformer, scheduler=scheduler), - clip=CLIPField(tokenizer=tokenizer, text_encoder=text_encoder, loras=[], skipped_layers=0), - t5Encoder=T5EncoderField(tokenizer=tokenizer2, text_encoder=text_encoder2), + transformer=TransformerField(transformer=transformer), + clip=CLIPField(tokenizer=tokenizer, text_encoder=clip_encoder, loras=[], skipped_layers=0), + t5Encoder=T5EncoderField(tokenizer=tokenizer2, text_encoder=t5_encoder), vae=VAEField(vae=vae), ) + def _get_model(self, context: InvocationContext, submodel:SubModelType) -> ModelIdentifierField: + match(submodel): + case SubModelType.Transformer: + return self.model.model_copy(update={"submodel_type": SubModelType.Transformer}) + case submodel if submodel in [SubModelType.Tokenizer, SubModelType.TextEncoder]: + return self._install_model(context, submodel, "clip-vit-large-patch14", "openai/clip-vit-large-patch14", ModelFormat.Diffusers, ModelType.CLIPEmbed, BaseModelType.Any) + case SubModelType.TextEncoder2: + return self._install_model(context, submodel, T5_ENCODER_MAP[self.t5_encoder]["text_encoder_name"], T5_ENCODER_MAP[self.t5_encoder]["text_encoder_repo"], ModelFormat(T5_ENCODER_MAP[self.t5_encoder]["format"]), ModelType.T5Encoder, BaseModelType.Any) + case SubModelType.Tokenizer2: + return self._install_model(context, submodel, T5_ENCODER_MAP[self.t5_encoder]["tokenizer_name"], T5_ENCODER_MAP[self.t5_encoder]["tokenizer_repo"], ModelFormat(T5_ENCODER_MAP[self.t5_encoder]["format"]), ModelType.T5Encoder, BaseModelType.Any) + case _: + raise Exception(f"{submodel.value} is not a supported submodule for a flux model") + + def _install_model(self, context: InvocationContext, submodel:SubModelType, name: str, repo_id: str, format: ModelFormat, type: ModelType, base: BaseModelType): + if (models := context.models.search_by_attrs(name=name, base=base, type=type)): + if len(models) != 1: + raise Exception(f"Multiple models detected for selected model with name {name}") + return ModelIdentifierField.from_config(models[0]).model_copy(update={"submodel_type": submodel}) + else: + model_path = context.models.download_and_cache_model(repo_id) + config = ModelRecordChanges(name = name, base = base, type=type, format=format) + model_install_job = context.models.import_local_model(model_path=model_path, config=config) + while not model_install_job.in_terminal_state: + sleep(0.01) + if not model_install_job.config_out: + raise Exception(f"Failed to install {name}") + return ModelIdentifierField.from_config(model_install_job.config_out).model_copy(update={"submodel_type": submodel}) @invocation( "main_model_loader", diff --git a/invokeai/app/services/model_records/model_records_base.py b/invokeai/app/services/model_records/model_records_base.py index 46d11d4ddf2..9cc1486a019 100644 --- a/invokeai/app/services/model_records/model_records_base.py +++ b/invokeai/app/services/model_records/model_records_base.py @@ -77,6 +77,7 @@ class ModelRecordChanges(BaseModelExcludeNull): type: Optional[ModelType] = Field(description="Type of model", default=None) key: Optional[str] = Field(description="Database ID for this model", default=None) hash: Optional[str] = Field(description="hash of model file", default=None) + format: Optional[str] = Field(description="format of model file", default=None) trigger_phrases: Optional[set[str]] = Field(description="Set of trigger phrases for this model", default=None) default_settings: Optional[MainModelDefaultSettings | ControlAdapterDefaultSettings] = Field( description="Default settings for this model", default=None diff --git a/invokeai/app/services/model_records/model_records_sql.py b/invokeai/app/services/model_records/model_records_sql.py index 1d0780efe1f..d1ec0152429 100644 --- a/invokeai/app/services/model_records/model_records_sql.py +++ b/invokeai/app/services/model_records/model_records_sql.py @@ -301,7 +301,7 @@ def search_by_attr( for row in result: try: model_config = ModelConfigFactory.make_config(json.loads(row[0]), timestamp=row[1]) - except pydantic.ValidationError: + except pydantic.ValidationError as e: # We catch this error so that the app can still run if there are invalid model configs in the database. # One reason that an invalid model config might be in the database is if someone had to rollback from a # newer version of the app that added a new model type. diff --git a/invokeai/app/services/shared/invocation_context.py b/invokeai/app/services/shared/invocation_context.py index 01662335e46..9a5ac3fb5a9 100644 --- a/invokeai/app/services/shared/invocation_context.py +++ b/invokeai/app/services/shared/invocation_context.py @@ -13,6 +13,7 @@ from invokeai.app.services.image_records.image_records_common import ImageCategory, ResourceOrigin from invokeai.app.services.images.images_common import ImageDTO from invokeai.app.services.invocation_services import InvocationServices +from invokeai.app.services.model_records import ModelRecordChanges from invokeai.app.services.model_records.model_records_base import UnknownModelException from invokeai.app.util.step_callback import stable_diffusion_step_callback from invokeai.backend.model_manager.config import ( @@ -463,6 +464,20 @@ def download_and_cache_model( """ return self._services.model_manager.install.download_and_cache_model(source=source) + def import_local_model( + self, + model_path: Path, + config: Optional[ModelRecordChanges] = None, + access_token: Optional[str] = None, + inplace: Optional[bool] = False, + ): + """ + TODO: Fill out description of this method + """ + if not model_path.exists(): + raise Exception("Models provided to import_local_model must already exist on disk") + return self._services.model_manager.install.heuristic_import(str(model_path), config=config, access_token=access_token, inplace=inplace) + def load_local_model( self, model_path: Path, diff --git a/invokeai/backend/flux/math.py b/invokeai/backend/flux/math.py new file mode 100644 index 00000000000..71b91fa0f5a --- /dev/null +++ b/invokeai/backend/flux/math.py @@ -0,0 +1,30 @@ +import torch +from einops import rearrange +from torch import Tensor + + +def attention(q: Tensor, k: Tensor, v: Tensor, pe: Tensor) -> Tensor: + q, k = apply_rope(q, k, pe) + + x = torch.nn.functional.scaled_dot_product_attention(q, k, v) + x = rearrange(x, "B H L D -> B L (H D)") + + return x + + +def rope(pos: Tensor, dim: int, theta: int) -> Tensor: + assert dim % 2 == 0 + scale = torch.arange(0, dim, 2, dtype=torch.float64, device=pos.device) / dim + omega = 1.0 / (theta**scale) + out = torch.einsum("...n,d->...nd", pos, omega) + out = torch.stack([torch.cos(out), -torch.sin(out), torch.sin(out), torch.cos(out)], dim=-1) + out = rearrange(out, "b n d (i j) -> b n d i j", i=2, j=2) + return out.float() + + +def apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor) -> tuple[Tensor, Tensor]: + xq_ = xq.float().reshape(*xq.shape[:-1], -1, 1, 2) + xk_ = xk.float().reshape(*xk.shape[:-1], -1, 1, 2) + xq_out = freqs_cis[..., 0] * xq_[..., 0] + freqs_cis[..., 1] * xq_[..., 1] + xk_out = freqs_cis[..., 0] * xk_[..., 0] + freqs_cis[..., 1] * xk_[..., 1] + return xq_out.reshape(*xq.shape).type_as(xq), xk_out.reshape(*xk.shape).type_as(xk) \ No newline at end of file diff --git a/invokeai/backend/flux/model.py b/invokeai/backend/flux/model.py new file mode 100644 index 00000000000..2cb0aa102e7 --- /dev/null +++ b/invokeai/backend/flux/model.py @@ -0,0 +1,111 @@ +from dataclasses import dataclass + +import torch +from torch import Tensor, nn + +from invokeai.backend.flux.modules.layers import (DoubleStreamBlock, EmbedND, LastLayer, + MLPEmbedder, SingleStreamBlock, + timestep_embedding) + +@dataclass +class FluxParams: + in_channels: int + vec_in_dim: int + context_in_dim: int + hidden_size: int + mlp_ratio: float + num_heads: int + depth: int + depth_single_blocks: int + axes_dim: list[int] + theta: int + qkv_bias: bool + guidance_embed: bool + + +class Flux(nn.Module): + """ + Transformer model for flow matching on sequences. + """ + + def __init__(self, params: FluxParams): + super().__init__() + + self.params = params + self.in_channels = params.in_channels + self.out_channels = self.in_channels + if params.hidden_size % params.num_heads != 0: + raise ValueError( + f"Hidden size {params.hidden_size} must be divisible by num_heads {params.num_heads}" + ) + pe_dim = params.hidden_size // params.num_heads + if sum(params.axes_dim) != pe_dim: + raise ValueError(f"Got {params.axes_dim} but expected positional dim {pe_dim}") + self.hidden_size = params.hidden_size + self.num_heads = params.num_heads + self.pe_embedder = EmbedND(dim=pe_dim, theta=params.theta, axes_dim=params.axes_dim) + self.img_in = nn.Linear(self.in_channels, self.hidden_size, bias=True) + self.time_in = MLPEmbedder(in_dim=256, hidden_dim=self.hidden_size) + self.vector_in = MLPEmbedder(params.vec_in_dim, self.hidden_size) + self.guidance_in = ( + MLPEmbedder(in_dim=256, hidden_dim=self.hidden_size) if params.guidance_embed else nn.Identity() + ) + self.txt_in = nn.Linear(params.context_in_dim, self.hidden_size) + + self.double_blocks = nn.ModuleList( + [ + DoubleStreamBlock( + self.hidden_size, + self.num_heads, + mlp_ratio=params.mlp_ratio, + qkv_bias=params.qkv_bias, + ) + for _ in range(params.depth) + ] + ) + + self.single_blocks = nn.ModuleList( + [ + SingleStreamBlock(self.hidden_size, self.num_heads, mlp_ratio=params.mlp_ratio) + for _ in range(params.depth_single_blocks) + ] + ) + + self.final_layer = LastLayer(self.hidden_size, 1, self.out_channels) + + def forward( + self, + img: Tensor, + img_ids: Tensor, + txt: Tensor, + txt_ids: Tensor, + timesteps: Tensor, + y: Tensor, + guidance: Tensor | None = None, + ) -> Tensor: + if img.ndim != 3 or txt.ndim != 3: + raise ValueError("Input img and txt tensors must have 3 dimensions.") + + # running on sequences img + img = self.img_in(img) + vec = self.time_in(timestep_embedding(timesteps, 256)) + if self.params.guidance_embed: + if guidance is None: + raise ValueError("Didn't get guidance strength for guidance distilled model.") + vec = vec + self.guidance_in(timestep_embedding(guidance, 256)) + vec = vec + self.vector_in(y) + txt = self.txt_in(txt) + + ids = torch.cat((txt_ids, img_ids), dim=1) + pe = self.pe_embedder(ids) + + for block in self.double_blocks: + img, txt = block(img=img, txt=txt, vec=vec, pe=pe) + + img = torch.cat((txt, img), 1) + for block in self.single_blocks: + img = block(img, vec=vec, pe=pe) + img = img[:, txt.shape[1] :, ...] + + img = self.final_layer(img, vec) # (N, T, patch_size ** 2 * out_channels) + return img \ No newline at end of file diff --git a/invokeai/backend/flux/modules/autoencoder.py b/invokeai/backend/flux/modules/autoencoder.py new file mode 100644 index 00000000000..f6e072ecaaa --- /dev/null +++ b/invokeai/backend/flux/modules/autoencoder.py @@ -0,0 +1,312 @@ +from dataclasses import dataclass + +import torch +from einops import rearrange +from torch import Tensor, nn + + +@dataclass +class AutoEncoderParams: + resolution: int + in_channels: int + ch: int + out_ch: int + ch_mult: list[int] + num_res_blocks: int + z_channels: int + scale_factor: float + shift_factor: float + + +def swish(x: Tensor) -> Tensor: + return x * torch.sigmoid(x) + + +class AttnBlock(nn.Module): + def __init__(self, in_channels: int): + super().__init__() + self.in_channels = in_channels + + self.norm = nn.GroupNorm(num_groups=32, num_channels=in_channels, eps=1e-6, affine=True) + + self.q = nn.Conv2d(in_channels, in_channels, kernel_size=1) + self.k = nn.Conv2d(in_channels, in_channels, kernel_size=1) + self.v = nn.Conv2d(in_channels, in_channels, kernel_size=1) + self.proj_out = nn.Conv2d(in_channels, in_channels, kernel_size=1) + + def attention(self, h_: Tensor) -> Tensor: + h_ = self.norm(h_) + q = self.q(h_) + k = self.k(h_) + v = self.v(h_) + + b, c, h, w = q.shape + q = rearrange(q, "b c h w -> b 1 (h w) c").contiguous() + k = rearrange(k, "b c h w -> b 1 (h w) c").contiguous() + v = rearrange(v, "b c h w -> b 1 (h w) c").contiguous() + h_ = nn.functional.scaled_dot_product_attention(q, k, v) + + return rearrange(h_, "b 1 (h w) c -> b c h w", h=h, w=w, c=c, b=b) + + def forward(self, x: Tensor) -> Tensor: + return x + self.proj_out(self.attention(x)) + + +class ResnetBlock(nn.Module): + def __init__(self, in_channels: int, out_channels: int): + super().__init__() + self.in_channels = in_channels + out_channels = in_channels if out_channels is None else out_channels + self.out_channels = out_channels + + self.norm1 = nn.GroupNorm(num_groups=32, num_channels=in_channels, eps=1e-6, affine=True) + self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1) + self.norm2 = nn.GroupNorm(num_groups=32, num_channels=out_channels, eps=1e-6, affine=True) + self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1) + if self.in_channels != self.out_channels: + self.nin_shortcut = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0) + + def forward(self, x): + h = x + h = self.norm1(h) + h = swish(h) + h = self.conv1(h) + + h = self.norm2(h) + h = swish(h) + h = self.conv2(h) + + if self.in_channels != self.out_channels: + x = self.nin_shortcut(x) + + return x + h + + +class Downsample(nn.Module): + def __init__(self, in_channels: int): + super().__init__() + # no asymmetric padding in torch conv, must do it ourselves + self.conv = nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=2, padding=0) + + def forward(self, x: Tensor): + pad = (0, 1, 0, 1) + x = nn.functional.pad(x, pad, mode="constant", value=0) + x = self.conv(x) + return x + + +class Upsample(nn.Module): + def __init__(self, in_channels: int): + super().__init__() + self.conv = nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1) + + def forward(self, x: Tensor): + x = nn.functional.interpolate(x, scale_factor=2.0, mode="nearest") + x = self.conv(x) + return x + + +class Encoder(nn.Module): + def __init__( + self, + resolution: int, + in_channels: int, + ch: int, + ch_mult: list[int], + num_res_blocks: int, + z_channels: int, + ): + super().__init__() + self.ch = ch + self.num_resolutions = len(ch_mult) + self.num_res_blocks = num_res_blocks + self.resolution = resolution + self.in_channels = in_channels + # downsampling + self.conv_in = nn.Conv2d(in_channels, self.ch, kernel_size=3, stride=1, padding=1) + + curr_res = resolution + in_ch_mult = (1,) + tuple(ch_mult) + self.in_ch_mult = in_ch_mult + self.down = nn.ModuleList() + block_in = self.ch + for i_level in range(self.num_resolutions): + block = nn.ModuleList() + attn = nn.ModuleList() + block_in = ch * in_ch_mult[i_level] + block_out = ch * ch_mult[i_level] + for _ in range(self.num_res_blocks): + block.append(ResnetBlock(in_channels=block_in, out_channels=block_out)) + block_in = block_out + down = nn.Module() + down.block = block + down.attn = attn + if i_level != self.num_resolutions - 1: + down.downsample = Downsample(block_in) + curr_res = curr_res // 2 + self.down.append(down) + + # middle + self.mid = nn.Module() + self.mid.block_1 = ResnetBlock(in_channels=block_in, out_channels=block_in) + self.mid.attn_1 = AttnBlock(block_in) + self.mid.block_2 = ResnetBlock(in_channels=block_in, out_channels=block_in) + + # end + self.norm_out = nn.GroupNorm(num_groups=32, num_channels=block_in, eps=1e-6, affine=True) + self.conv_out = nn.Conv2d(block_in, 2 * z_channels, kernel_size=3, stride=1, padding=1) + + def forward(self, x: Tensor) -> Tensor: + # downsampling + hs = [self.conv_in(x)] + for i_level in range(self.num_resolutions): + for i_block in range(self.num_res_blocks): + h = self.down[i_level].block[i_block](hs[-1]) + if len(self.down[i_level].attn) > 0: + h = self.down[i_level].attn[i_block](h) + hs.append(h) + if i_level != self.num_resolutions - 1: + hs.append(self.down[i_level].downsample(hs[-1])) + + # middle + h = hs[-1] + h = self.mid.block_1(h) + h = self.mid.attn_1(h) + h = self.mid.block_2(h) + # end + h = self.norm_out(h) + h = swish(h) + h = self.conv_out(h) + return h + + +class Decoder(nn.Module): + def __init__( + self, + ch: int, + out_ch: int, + ch_mult: list[int], + num_res_blocks: int, + in_channels: int, + resolution: int, + z_channels: int, + ): + super().__init__() + self.ch = ch + self.num_resolutions = len(ch_mult) + self.num_res_blocks = num_res_blocks + self.resolution = resolution + self.in_channels = in_channels + self.ffactor = 2 ** (self.num_resolutions - 1) + + # compute in_ch_mult, block_in and curr_res at lowest res + block_in = ch * ch_mult[self.num_resolutions - 1] + curr_res = resolution // 2 ** (self.num_resolutions - 1) + self.z_shape = (1, z_channels, curr_res, curr_res) + + # z to block_in + self.conv_in = nn.Conv2d(z_channels, block_in, kernel_size=3, stride=1, padding=1) + + # middle + self.mid = nn.Module() + self.mid.block_1 = ResnetBlock(in_channels=block_in, out_channels=block_in) + self.mid.attn_1 = AttnBlock(block_in) + self.mid.block_2 = ResnetBlock(in_channels=block_in, out_channels=block_in) + + # upsampling + self.up = nn.ModuleList() + for i_level in reversed(range(self.num_resolutions)): + block = nn.ModuleList() + attn = nn.ModuleList() + block_out = ch * ch_mult[i_level] + for _ in range(self.num_res_blocks + 1): + block.append(ResnetBlock(in_channels=block_in, out_channels=block_out)) + block_in = block_out + up = nn.Module() + up.block = block + up.attn = attn + if i_level != 0: + up.upsample = Upsample(block_in) + curr_res = curr_res * 2 + self.up.insert(0, up) # prepend to get consistent order + + # end + self.norm_out = nn.GroupNorm(num_groups=32, num_channels=block_in, eps=1e-6, affine=True) + self.conv_out = nn.Conv2d(block_in, out_ch, kernel_size=3, stride=1, padding=1) + + def forward(self, z: Tensor) -> Tensor: + # z to block_in + h = self.conv_in(z) + + # middle + h = self.mid.block_1(h) + h = self.mid.attn_1(h) + h = self.mid.block_2(h) + + # upsampling + for i_level in reversed(range(self.num_resolutions)): + for i_block in range(self.num_res_blocks + 1): + h = self.up[i_level].block[i_block](h) + if len(self.up[i_level].attn) > 0: + h = self.up[i_level].attn[i_block](h) + if i_level != 0: + h = self.up[i_level].upsample(h) + + # end + h = self.norm_out(h) + h = swish(h) + h = self.conv_out(h) + return h + + +class DiagonalGaussian(nn.Module): + def __init__(self, sample: bool = True, chunk_dim: int = 1): + super().__init__() + self.sample = sample + self.chunk_dim = chunk_dim + + def forward(self, z: Tensor) -> Tensor: + mean, logvar = torch.chunk(z, 2, dim=self.chunk_dim) + if self.sample: + std = torch.exp(0.5 * logvar) + return mean + std * torch.randn_like(mean) + else: + return mean + + +class AutoEncoder(nn.Module): + def __init__(self, params: AutoEncoderParams): + super().__init__() + self.encoder = Encoder( + resolution=params.resolution, + in_channels=params.in_channels, + ch=params.ch, + ch_mult=params.ch_mult, + num_res_blocks=params.num_res_blocks, + z_channels=params.z_channels, + ) + self.decoder = Decoder( + resolution=params.resolution, + in_channels=params.in_channels, + ch=params.ch, + out_ch=params.out_ch, + ch_mult=params.ch_mult, + num_res_blocks=params.num_res_blocks, + z_channels=params.z_channels, + ) + self.reg = DiagonalGaussian() + + self.scale_factor = params.scale_factor + self.shift_factor = params.shift_factor + + def encode(self, x: Tensor) -> Tensor: + z = self.reg(self.encoder(x)) + z = self.scale_factor * (z - self.shift_factor) + return z + + def decode(self, z: Tensor) -> Tensor: + z = z / self.scale_factor + self.shift_factor + return self.decoder(z) + + def forward(self, x: Tensor) -> Tensor: + return self.decode(self.encode(x)) \ No newline at end of file diff --git a/invokeai/backend/flux/modules/conditioner.py b/invokeai/backend/flux/modules/conditioner.py new file mode 100644 index 00000000000..2a9e17c20e3 --- /dev/null +++ b/invokeai/backend/flux/modules/conditioner.py @@ -0,0 +1,30 @@ +from torch import Tensor, nn +from transformers import (PreTrainedModel, PreTrainedTokenizer) + +class HFEncoder(nn.Module): + def __init__(self, encoder: PreTrainedModel, tokenizer: PreTrainedTokenizer, is_clip: bool, max_length: int): + super().__init__() + self.max_length = max_length + self.is_clip = is_clip + self.output_key = "pooler_output" if self.is_clip else "last_hidden_state" + self.tokenizer = tokenizer + self.hf_module = encoder + self.hf_module = self.hf_module.eval().requires_grad_(False) + + def forward(self, text: list[str]) -> Tensor: + batch_encoding = self.tokenizer( + text, + truncation=True, + max_length=self.max_length, + return_length=False, + return_overflowing_tokens=False, + padding="max_length", + return_tensors="pt", + ) + + outputs = self.hf_module( + input_ids=batch_encoding["input_ids"].to(self.hf_module.device), + attention_mask=None, + output_hidden_states=False, + ) + return outputs[self.output_key] \ No newline at end of file diff --git a/invokeai/backend/flux/modules/layers.py b/invokeai/backend/flux/modules/layers.py new file mode 100644 index 00000000000..cb4eee0c2d7 --- /dev/null +++ b/invokeai/backend/flux/modules/layers.py @@ -0,0 +1,253 @@ +import math +from dataclasses import dataclass + +import torch +from einops import rearrange +from torch import Tensor, nn + +from ..math import attention, rope + + +class EmbedND(nn.Module): + def __init__(self, dim: int, theta: int, axes_dim: list[int]): + super().__init__() + self.dim = dim + self.theta = theta + self.axes_dim = axes_dim + + def forward(self, ids: Tensor) -> Tensor: + n_axes = ids.shape[-1] + emb = torch.cat( + [rope(ids[..., i], self.axes_dim[i], self.theta) for i in range(n_axes)], + dim=-3, + ) + + return emb.unsqueeze(1) + + +def timestep_embedding(t: Tensor, dim, max_period=10000, time_factor: float = 1000.0): + """ + Create sinusoidal timestep embeddings. + :param t: a 1-D Tensor of N indices, one per batch element. + These may be fractional. + :param dim: the dimension of the output. + :param max_period: controls the minimum frequency of the embeddings. + :return: an (N, D) Tensor of positional embeddings. + """ + t = time_factor * t + half = dim // 2 + freqs = torch.exp(-math.log(max_period) * torch.arange(start=0, end=half, dtype=torch.float32) / half).to( + t.device + ) + + args = t[:, None].float() * freqs[None] + embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1) + if dim % 2: + embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1) + if torch.is_floating_point(t): + embedding = embedding.to(t) + return embedding + + +class MLPEmbedder(nn.Module): + def __init__(self, in_dim: int, hidden_dim: int): + super().__init__() + self.in_layer = nn.Linear(in_dim, hidden_dim, bias=True) + self.silu = nn.SiLU() + self.out_layer = nn.Linear(hidden_dim, hidden_dim, bias=True) + + def forward(self, x: Tensor) -> Tensor: + return self.out_layer(self.silu(self.in_layer(x))) + + +class RMSNorm(torch.nn.Module): + def __init__(self, dim: int): + super().__init__() + self.scale = nn.Parameter(torch.ones(dim)) + + def forward(self, x: Tensor): + x_dtype = x.dtype + x = x.float() + rrms = torch.rsqrt(torch.mean(x**2, dim=-1, keepdim=True) + 1e-6) + return (x * rrms).to(dtype=x_dtype) * self.scale + + +class QKNorm(torch.nn.Module): + def __init__(self, dim: int): + super().__init__() + self.query_norm = RMSNorm(dim) + self.key_norm = RMSNorm(dim) + + def forward(self, q: Tensor, k: Tensor, v: Tensor) -> tuple[Tensor, Tensor]: + q = self.query_norm(q) + k = self.key_norm(k) + return q.to(v), k.to(v) + + +class SelfAttention(nn.Module): + def __init__(self, dim: int, num_heads: int = 8, qkv_bias: bool = False): + super().__init__() + self.num_heads = num_heads + head_dim = dim // num_heads + + self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) + self.norm = QKNorm(head_dim) + self.proj = nn.Linear(dim, dim) + + def forward(self, x: Tensor, pe: Tensor) -> Tensor: + qkv = self.qkv(x) + q, k, v = rearrange(qkv, "B L (K H D) -> K B H L D", K=3, H=self.num_heads) + q, k = self.norm(q, k, v) + x = attention(q, k, v, pe=pe) + x = self.proj(x) + return x + + +@dataclass +class ModulationOut: + shift: Tensor + scale: Tensor + gate: Tensor + + +class Modulation(nn.Module): + def __init__(self, dim: int, double: bool): + super().__init__() + self.is_double = double + self.multiplier = 6 if double else 3 + self.lin = nn.Linear(dim, self.multiplier * dim, bias=True) + + def forward(self, vec: Tensor) -> tuple[ModulationOut, ModulationOut | None]: + out = self.lin(nn.functional.silu(vec))[:, None, :].chunk(self.multiplier, dim=-1) + + return ( + ModulationOut(*out[:3]), + ModulationOut(*out[3:]) if self.is_double else None, + ) + + +class DoubleStreamBlock(nn.Module): + def __init__(self, hidden_size: int, num_heads: int, mlp_ratio: float, qkv_bias: bool = False): + super().__init__() + + mlp_hidden_dim = int(hidden_size * mlp_ratio) + self.num_heads = num_heads + self.hidden_size = hidden_size + self.img_mod = Modulation(hidden_size, double=True) + self.img_norm1 = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6) + self.img_attn = SelfAttention(dim=hidden_size, num_heads=num_heads, qkv_bias=qkv_bias) + + self.img_norm2 = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6) + self.img_mlp = nn.Sequential( + nn.Linear(hidden_size, mlp_hidden_dim, bias=True), + nn.GELU(approximate="tanh"), + nn.Linear(mlp_hidden_dim, hidden_size, bias=True), + ) + + self.txt_mod = Modulation(hidden_size, double=True) + self.txt_norm1 = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6) + self.txt_attn = SelfAttention(dim=hidden_size, num_heads=num_heads, qkv_bias=qkv_bias) + + self.txt_norm2 = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6) + self.txt_mlp = nn.Sequential( + nn.Linear(hidden_size, mlp_hidden_dim, bias=True), + nn.GELU(approximate="tanh"), + nn.Linear(mlp_hidden_dim, hidden_size, bias=True), + ) + + def forward(self, img: Tensor, txt: Tensor, vec: Tensor, pe: Tensor) -> tuple[Tensor, Tensor]: + img_mod1, img_mod2 = self.img_mod(vec) + txt_mod1, txt_mod2 = self.txt_mod(vec) + + # prepare image for attention + img_modulated = self.img_norm1(img) + img_modulated = (1 + img_mod1.scale) * img_modulated + img_mod1.shift + img_qkv = self.img_attn.qkv(img_modulated) + img_q, img_k, img_v = rearrange(img_qkv, "B L (K H D) -> K B H L D", K=3, H=self.num_heads) + img_q, img_k = self.img_attn.norm(img_q, img_k, img_v) + + # prepare txt for attention + txt_modulated = self.txt_norm1(txt) + txt_modulated = (1 + txt_mod1.scale) * txt_modulated + txt_mod1.shift + txt_qkv = self.txt_attn.qkv(txt_modulated) + txt_q, txt_k, txt_v = rearrange(txt_qkv, "B L (K H D) -> K B H L D", K=3, H=self.num_heads) + txt_q, txt_k = self.txt_attn.norm(txt_q, txt_k, txt_v) + + # run actual attention + q = torch.cat((txt_q, img_q), dim=2) + k = torch.cat((txt_k, img_k), dim=2) + v = torch.cat((txt_v, img_v), dim=2) + + attn = attention(q, k, v, pe=pe) + txt_attn, img_attn = attn[:, : txt.shape[1]], attn[:, txt.shape[1] :] + + # calculate the img bloks + img = img + img_mod1.gate * self.img_attn.proj(img_attn) + img = img + img_mod2.gate * self.img_mlp((1 + img_mod2.scale) * self.img_norm2(img) + img_mod2.shift) + + # calculate the txt bloks + txt = txt + txt_mod1.gate * self.txt_attn.proj(txt_attn) + txt = txt + txt_mod2.gate * self.txt_mlp((1 + txt_mod2.scale) * self.txt_norm2(txt) + txt_mod2.shift) + return img, txt + + +class SingleStreamBlock(nn.Module): + """ + A DiT block with parallel linear layers as described in + https://arxiv.org/abs/2302.05442 and adapted modulation interface. + """ + + def __init__( + self, + hidden_size: int, + num_heads: int, + mlp_ratio: float = 4.0, + qk_scale: float | None = None, + ): + super().__init__() + self.hidden_dim = hidden_size + self.num_heads = num_heads + head_dim = hidden_size // num_heads + self.scale = qk_scale or head_dim**-0.5 + + self.mlp_hidden_dim = int(hidden_size * mlp_ratio) + # qkv and mlp_in + self.linear1 = nn.Linear(hidden_size, hidden_size * 3 + self.mlp_hidden_dim) + # proj and mlp_out + self.linear2 = nn.Linear(hidden_size + self.mlp_hidden_dim, hidden_size) + + self.norm = QKNorm(head_dim) + + self.hidden_size = hidden_size + self.pre_norm = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6) + + self.mlp_act = nn.GELU(approximate="tanh") + self.modulation = Modulation(hidden_size, double=False) + + def forward(self, x: Tensor, vec: Tensor, pe: Tensor) -> Tensor: + mod, _ = self.modulation(vec) + x_mod = (1 + mod.scale) * self.pre_norm(x) + mod.shift + qkv, mlp = torch.split(self.linear1(x_mod), [3 * self.hidden_size, self.mlp_hidden_dim], dim=-1) + + q, k, v = rearrange(qkv, "B L (K H D) -> K B H L D", K=3, H=self.num_heads) + q, k = self.norm(q, k, v) + + # compute attention + attn = attention(q, k, v, pe=pe) + # compute activation in mlp stream, cat again and run second linear layer + output = self.linear2(torch.cat((attn, self.mlp_act(mlp)), 2)) + return x + mod.gate * output + + +class LastLayer(nn.Module): + def __init__(self, hidden_size: int, patch_size: int, out_channels: int): + super().__init__() + self.norm_final = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6) + self.linear = nn.Linear(hidden_size, patch_size * patch_size * out_channels, bias=True) + self.adaLN_modulation = nn.Sequential(nn.SiLU(), nn.Linear(hidden_size, 2 * hidden_size, bias=True)) + + def forward(self, x: Tensor, vec: Tensor) -> Tensor: + shift, scale = self.adaLN_modulation(vec).chunk(2, dim=1) + x = (1 + scale[:, None, :]) * self.norm_final(x) + shift[:, None, :] + x = self.linear(x) + return x \ No newline at end of file diff --git a/invokeai/backend/model_manager/config.py b/invokeai/backend/model_manager/config.py index 29ef9536668..dfa6cef29b3 100644 --- a/invokeai/backend/model_manager/config.py +++ b/invokeai/backend/model_manager/config.py @@ -67,7 +67,9 @@ class ModelType(str, Enum): TextualInversion = "embedding" IPAdapter = "ip_adapter" CLIPVision = "clip_vision" + CLIPEmbed = "clip_embed" T2IAdapter = "t2i_adapter" + T5Encoder = "t5_encoder" SpandrelImageToImage = "spandrel_image_to_image" @@ -106,6 +108,9 @@ class ModelFormat(str, Enum): EmbeddingFile = "embedding_file" EmbeddingFolder = "embedding_folder" InvokeAI = "invokeai" + T5Encoder = "t5_encoder" + T5Encoder8b = "t5_encoder_8b" + T5Encoder4b = "t5_encoder_4b" class SchedulerPredictionType(str, Enum): @@ -207,6 +212,18 @@ class LoRAConfigBase(ModelConfigBase): trigger_phrases: Optional[set[str]] = Field(description="Set of trigger phrases for this model", default=None) +class T5EncoderConfigBase(ModelConfigBase): + type: Literal[ModelType.T5Encoder] = ModelType.T5Encoder + + +class T5EncoderConfig(T5EncoderConfigBase): + format: Literal[ModelFormat.T5Encoder] = ModelFormat.T5Encoder + + @staticmethod + def get_tag() -> Tag: + return Tag(f"{ModelType.T5Encoder.value}.{ModelFormat.T5Encoder.value}") + + class LoRALyCORISConfig(LoRAConfigBase): """Model config for LoRA/Lycoris models.""" @@ -352,6 +369,17 @@ def get_tag() -> Tag: return Tag(f"{ModelType.IPAdapter.value}.{ModelFormat.Checkpoint.value}") +class CLIPEmbedDiffusersConfig(DiffusersConfigBase): + """Model config for Clip Embeddings.""" + + type: Literal[ModelType.CLIPEmbed] = ModelType.CLIPEmbed + format: Literal[ModelFormat.Diffusers] = ModelFormat.Diffusers + + @staticmethod + def get_tag() -> Tag: + return Tag(f"{ModelType.CLIPEmbed.value}.{ModelFormat.Diffusers.value}") + + class CLIPVisionDiffusersConfig(DiffusersConfigBase): """Model config for CLIPVision.""" @@ -416,6 +444,7 @@ def get_model_discriminator_value(v: Any) -> str: Annotated[ControlNetCheckpointConfig, ControlNetCheckpointConfig.get_tag()], Annotated[LoRALyCORISConfig, LoRALyCORISConfig.get_tag()], Annotated[LoRADiffusersConfig, LoRADiffusersConfig.get_tag()], + Annotated[T5EncoderConfig, T5EncoderConfig.get_tag()], Annotated[TextualInversionFileConfig, TextualInversionFileConfig.get_tag()], Annotated[TextualInversionFolderConfig, TextualInversionFolderConfig.get_tag()], Annotated[IPAdapterInvokeAIConfig, IPAdapterInvokeAIConfig.get_tag()], @@ -423,6 +452,7 @@ def get_model_discriminator_value(v: Any) -> str: Annotated[T2IAdapterConfig, T2IAdapterConfig.get_tag()], Annotated[SpandrelImageToImageConfig, SpandrelImageToImageConfig.get_tag()], Annotated[CLIPVisionDiffusersConfig, CLIPVisionDiffusersConfig.get_tag()], + Annotated[CLIPEmbedDiffusersConfig, CLIPEmbedDiffusersConfig.get_tag()], ], Discriminator(get_model_discriminator_value), ] diff --git a/invokeai/backend/model_manager/load/model_loaders/flux.py b/invokeai/backend/model_manager/load/model_loaders/flux.py new file mode 100644 index 00000000000..7a028a55e10 --- /dev/null +++ b/invokeai/backend/model_manager/load/model_loaders/flux.py @@ -0,0 +1,159 @@ +# Copyright (c) 2024, Brandon W. Rising and the InvokeAI Development Team +"""Class for Flux model loading in InvokeAI.""" + +from pathlib import Path +import yaml + +from dataclasses import fields +from safetensors.torch import load_file +from typing import Optional, Any +from transformers import T5EncoderModel, T5Tokenizer + +from invokeai.backend.model_manager import ( + AnyModel, + AnyModelConfig, + BaseModelType, + ModelFormat, + ModelType, + SubModelType, +) +from invokeai.backend.model_manager.config import ( + CheckpointConfigBase, + MainCheckpointConfig, + CLIPEmbedDiffusersConfig, + T5EncoderConfig, + VAECheckpointConfig, +) +from invokeai.app.services.config.config_default import get_config +from invokeai.backend.model_manager.load.model_loader_registry import ModelLoaderRegistry +from invokeai.backend.model_manager.load.model_loaders.generic_diffusers import GenericDiffusersLoader +from invokeai.backend.util.silence_warnings import SilenceWarnings +from invokeai.backend.util.devices import TorchDevice +from invokeai.backend.flux.model import Flux, FluxParams +from invokeai.backend.flux.modules.autoencoder import AutoEncoderParams, AutoEncoder +from transformers import (CLIPTextModel, CLIPTokenizer, T5EncoderModel, + T5Tokenizer) + +app_config = get_config() + + +@ModelLoaderRegistry.register(base=BaseModelType.Flux, type=ModelType.VAE, format=ModelFormat.Checkpoint) +class FluxVAELoader(GenericDiffusersLoader): + """Class to load VAE models.""" + + def _load_model( + self, + config: AnyModelConfig, + submodel_type: Optional[SubModelType] = None, + ) -> AnyModel: + if isinstance(config, VAECheckpointConfig): + model_path = Path(config.path) + load_class = AutoEncoder + legacy_config_path = app_config.legacy_conf_path / config.config_path + config_path = legacy_config_path.as_posix() + with open(config_path, "r") as stream: + try: + flux_conf = yaml.safe_load(stream) + except: + raise + + dataclass_fields = {f.name for f in fields(AutoEncoderParams)} + filtered_data = {k: v for k, v in flux_conf['params']['ae_params'].items() if k in dataclass_fields} + params = AutoEncoderParams(**filtered_data) + + with SilenceWarnings(): + model = load_class(params).to(self._torch_dtype) + # load_sft doesn't support torch.device + sd = load_file(model_path, device=str(TorchDevice.choose_torch_device())) + model.load_state_dict(sd, strict=False, assign=True) + + return model + else: + return super()._load_model(config, submodel_type) + + +@ModelLoaderRegistry.register(base=BaseModelType.Any, type=ModelType.CLIPEmbed, format=ModelFormat.Diffusers) +class ClipCheckpointModel(GenericDiffusersLoader): + """Class to load main models.""" + + def _load_model( + self, + config: AnyModelConfig, + submodel_type: Optional[SubModelType] = None, + ) -> AnyModel: + if not isinstance(config, CLIPEmbedDiffusersConfig): + raise Exception("Only Checkpoint Flux models are currently supported.") + + match submodel_type: + case SubModelType.Tokenizer: + return CLIPTokenizer.from_pretrained(config.path, max_length=77) + case SubModelType.TextEncoder: + return CLIPTextModel.from_pretrained(config.path) + + raise Exception("Only Checkpoint Flux models are currently supported.") + +@ModelLoaderRegistry.register(base=BaseModelType.Any, type=ModelType.T5Encoder, format=ModelFormat.T5Encoder) +class T5EncoderCheckpointModel(GenericDiffusersLoader): + """Class to load main models.""" + + def _load_model( + self, + config: AnyModelConfig, + submodel_type: Optional[SubModelType] = None, + ) -> AnyModel: + if not isinstance(config, T5EncoderConfig): + raise Exception("Only Checkpoint Flux models are currently supported.") + + match submodel_type: + case SubModelType.Tokenizer2: + return T5Tokenizer.from_pretrained(Path(config.path), max_length=512) + case SubModelType.TextEncoder2: + return T5EncoderModel.from_pretrained(Path(config.path)) + + raise Exception("Only Checkpoint Flux models are currently supported.") + + +@ModelLoaderRegistry.register(base=BaseModelType.Flux, type=ModelType.Main, format=ModelFormat.Checkpoint) +class FluxCheckpointModel(GenericDiffusersLoader): + """Class to load main models.""" + + def _load_model( + self, + config: AnyModelConfig, + submodel_type: Optional[SubModelType] = None, + ) -> AnyModel: + if not isinstance(config, CheckpointConfigBase): + raise Exception("Only Checkpoint Flux models are currently supported.") + legacy_config_path = app_config.legacy_conf_path / config.config_path + config_path = legacy_config_path.as_posix() + with open(config_path, "r") as stream: + try: + flux_conf = yaml.safe_load(stream) + except: + raise + + match submodel_type: + case SubModelType.Transformer: + return self._load_from_singlefile(config, flux_conf) + + raise Exception("Only Checkpoint Flux models are currently supported.") + + def _load_from_singlefile( + self, + config: AnyModelConfig, + flux_conf: Any, + ) -> AnyModel: + assert isinstance(config, MainCheckpointConfig) + load_class = Flux + params = None + model_path = Path(config.path) + dataclass_fields = {f.name for f in fields(FluxParams)} + filtered_data = {k: v for k, v in flux_conf['params'].items() if k in dataclass_fields} + params = FluxParams(**filtered_data) + + with SilenceWarnings(): + model = load_class(params).to(self._torch_dtype) + # load_sft doesn't support torch.device + sd = load_file(model_path, device=str(TorchDevice.choose_torch_device())) + model.load_state_dict(sd, strict=False, assign=True) + return model diff --git a/invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py b/invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py index 33ce4abc4d4..e034e110115 100644 --- a/invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py +++ b/invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py @@ -36,8 +36,14 @@ } -@ModelLoaderRegistry.register(base=BaseModelType.Any, type=ModelType.Main, format=ModelFormat.Diffusers) -@ModelLoaderRegistry.register(base=BaseModelType.Any, type=ModelType.Main, format=ModelFormat.Checkpoint) +@ModelLoaderRegistry.register(base=BaseModelType.StableDiffusion1, type=ModelType.Main, format=ModelFormat.Diffusers) +@ModelLoaderRegistry.register(base=BaseModelType.StableDiffusion2, type=ModelType.Main, format=ModelFormat.Diffusers) +@ModelLoaderRegistry.register(base=BaseModelType.StableDiffusionXL, type=ModelType.Main, format=ModelFormat.Diffusers) +@ModelLoaderRegistry.register(base=BaseModelType.StableDiffusionXLRefiner, type=ModelType.Main, format=ModelFormat.Diffusers) +@ModelLoaderRegistry.register(base=BaseModelType.StableDiffusion1, type=ModelType.Main, format=ModelFormat.Checkpoint) +@ModelLoaderRegistry.register(base=BaseModelType.StableDiffusion2, type=ModelType.Main, format=ModelFormat.Checkpoint) +@ModelLoaderRegistry.register(base=BaseModelType.StableDiffusionXL, type=ModelType.Main, format=ModelFormat.Checkpoint) +@ModelLoaderRegistry.register(base=BaseModelType.StableDiffusionXLRefiner, type=ModelType.Main, format=ModelFormat.Checkpoint) class StableDiffusionDiffusersModel(GenericDiffusersLoader): """Class to load main models.""" diff --git a/invokeai/backend/model_manager/load/model_util.py b/invokeai/backend/model_manager/load/model_util.py index 9794b8098e5..6987e5222db 100644 --- a/invokeai/backend/model_manager/load/model_util.py +++ b/invokeai/backend/model_manager/load/model_util.py @@ -9,7 +9,7 @@ import torch from diffusers.pipelines.pipeline_utils import DiffusionPipeline from diffusers.schedulers.scheduling_utils import SchedulerMixin -from transformers import CLIPTokenizer, T5TokenizerFast +from transformers import CLIPTokenizer, T5TokenizerFast, T5Tokenizer from invokeai.backend.image_util.depth_anything.depth_anything_pipeline import DepthAnythingPipeline from invokeai.backend.image_util.grounding_dino.grounding_dino_pipeline import GroundingDinoPipeline @@ -52,7 +52,7 @@ def calc_model_size_by_data(logger: logging.Logger, model: AnyModel) -> int: return model.calc_size() elif isinstance( model, - (T5TokenizerFast,), + (T5TokenizerFast,T5Tokenizer,), ): return len(model) else: diff --git a/invokeai/backend/model_manager/probe.py b/invokeai/backend/model_manager/probe.py index 82053149ad1..a3a648806fc 100644 --- a/invokeai/backend/model_manager/probe.py +++ b/invokeai/backend/model_manager/probe.py @@ -56,7 +56,7 @@ }, BaseModelType.StableDiffusionXLRefiner: { ModelVariantType.Normal: "sd_xl_refiner.yaml", - }, + } } @@ -132,7 +132,7 @@ def probe( fields = {} model_path = model_path.resolve() - + format_type = ModelFormat.Diffusers if model_path.is_dir() else ModelFormat.Checkpoint model_info = None model_type = ModelType(fields["type"]) if "type" in fields and fields["type"] else None @@ -162,7 +162,7 @@ def probe( fields["description"] = ( fields.get("description") or f"{fields['base'].value} {model_type.value} model {fields['name']}" ) - fields["format"] = fields.get("format") or probe.get_format() + fields["format"] = ModelFormat(fields.get("format")) or probe.get_format() fields["hash"] = fields.get("hash") or ModelHash(algorithm=hash_algo).hash(model_path) fields["default_settings"] = fields.get("default_settings") @@ -223,7 +223,7 @@ def get_model_type_from_checkpoint(cls, model_path: Path, checkpoint: Optional[C ckpt = ckpt.get("state_dict", ckpt) for key in [str(k) for k in ckpt.keys()]: - if key.startswith(("cond_stage_model.", "first_stage_model.", "model.diffusion_model.")): + if key.startswith(("cond_stage_model.", "first_stage_model.", "model.diffusion_model.", "double_blocks.")): return ModelType.Main elif key.startswith(("encoder.conv_in", "decoder.conv_in")): return ModelType.VAE @@ -322,10 +322,13 @@ def _get_checkpoint_config_path( return possible_conf.absolute() if model_type is ModelType.Main: - config_file = LEGACY_CONFIGS[base_type][variant_type] - if isinstance(config_file, dict): # need another tier for sd-2.x models - config_file = config_file[prediction_type] - config_file = f"stable-diffusion/{config_file}" + if base_type == BaseModelType.Flux: + config_file="flux/flux1-schnell.yaml" + else: + config_file = LEGACY_CONFIGS[base_type][variant_type] + if isinstance(config_file, dict): # need another tier for sd-2.x models + config_file = config_file[prediction_type] + config_file = f"stable-diffusion/{config_file}" elif model_type is ModelType.ControlNet: config_file = ( "controlnet/cldm_v15.yaml" @@ -334,7 +337,9 @@ def _get_checkpoint_config_path( ) elif model_type is ModelType.VAE: config_file = ( - "stable-diffusion/v1-inference.yaml" + "flux/flux1-schnell.yaml" + if base_type is BaseModelType.Flux + else "stable-diffusion/v1-inference.yaml" if base_type is BaseModelType.StableDiffusion1 else "stable-diffusion/sd_xl_base.yaml" if base_type is BaseModelType.StableDiffusionXL @@ -421,7 +426,8 @@ def get_format(self) -> ModelFormat: def get_variant_type(self) -> ModelVariantType: model_type = ModelProbe.get_model_type_from_checkpoint(self.model_path, self.checkpoint) - if model_type != ModelType.Main: + base_type = self.get_base_type() + if model_type != ModelType.Main or base_type == BaseModelType.Flux: return ModelVariantType.Normal state_dict = self.checkpoint.get("state_dict") or self.checkpoint in_channels = state_dict["model.diffusion_model.input_blocks.0.0.weight"].shape[1] @@ -441,6 +447,8 @@ class PipelineCheckpointProbe(CheckpointProbeBase): def get_base_type(self) -> BaseModelType: checkpoint = self.checkpoint state_dict = self.checkpoint.get("state_dict") or checkpoint + if "double_blocks.0.img_attn.norm.key_norm.scale" in state_dict: + return BaseModelType.Flux key_name = "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_k.weight" if key_name in state_dict and state_dict[key_name].shape[-1] == 768: return BaseModelType.StableDiffusion1 @@ -483,6 +491,7 @@ def get_base_type(self) -> BaseModelType: (r"xl", BaseModelType.StableDiffusionXL), (r"sd2", BaseModelType.StableDiffusion2), (r"vae", BaseModelType.StableDiffusion1), + (r"FLUX.1-schnell_ae", BaseModelType.Flux), ]: if re.search(regexp, self.model_path.name, re.IGNORECASE): return basetype @@ -627,10 +636,6 @@ def get_repo_variant(self) -> ModelRepoVariant: class PipelineFolderProbe(FolderProbeBase): def get_base_type(self) -> BaseModelType: - with open(f"{self.model_path}/model_index.json", "r") as file: - conf = json.load(file) - if "_class_name" in conf and conf.get("_class_name") == "FluxPipeline": - return BaseModelType.Flux with open(self.model_path / "unet" / "config.json", "r") as file: unet_conf = json.load(file) if unet_conf["cross_attention_dim"] == 768: @@ -718,6 +723,10 @@ def get_base_type(self) -> BaseModelType: return TextualInversionCheckpointProbe(path).get_base_type() +class T5EncoderFolderProbe(FolderProbeBase): + def get_format(self) -> ModelFormat: + return ModelFormat.T5Encoder + class ONNXFolderProbe(PipelineFolderProbe): def get_base_type(self) -> BaseModelType: # Due to the way the installer is set up, the configuration file for safetensors @@ -810,6 +819,11 @@ def get_base_type(self) -> BaseModelType: return BaseModelType.Any +class CLIPEmbedFolderProbe(FolderProbeBase): + def get_base_type(self) -> BaseModelType: + return BaseModelType.Any + + class SpandrelImageToImageFolderProbe(FolderProbeBase): def get_base_type(self) -> BaseModelType: raise NotImplementedError() @@ -840,8 +854,10 @@ def get_base_type(self) -> BaseModelType: ModelProbe.register_probe("diffusers", ModelType.VAE, VaeFolderProbe) ModelProbe.register_probe("diffusers", ModelType.LoRA, LoRAFolderProbe) ModelProbe.register_probe("diffusers", ModelType.TextualInversion, TextualInversionFolderProbe) +ModelProbe.register_probe("diffusers", ModelType.T5Encoder, T5EncoderFolderProbe) ModelProbe.register_probe("diffusers", ModelType.ControlNet, ControlNetFolderProbe) ModelProbe.register_probe("diffusers", ModelType.IPAdapter, IPAdapterFolderProbe) +ModelProbe.register_probe("diffusers", ModelType.CLIPEmbed, CLIPEmbedFolderProbe) ModelProbe.register_probe("diffusers", ModelType.CLIPVision, CLIPVisionFolderProbe) ModelProbe.register_probe("diffusers", ModelType.T2IAdapter, T2IAdapterFolderProbe) ModelProbe.register_probe("diffusers", ModelType.SpandrelImageToImage, SpandrelImageToImageFolderProbe) diff --git a/invokeai/configs/flux/flux1-dev.yaml b/invokeai/configs/flux/flux1-dev.yaml new file mode 100644 index 00000000000..3f76f11cd4e --- /dev/null +++ b/invokeai/configs/flux/flux1-dev.yaml @@ -0,0 +1,33 @@ +repo_id: "black-forest-labs/FLUX.1-dev" +repo_ae: "ae.safetensors" +max_length: 512 +params: + in_channels: 64 + vec_in_dim: 768 + context_in_dim: 4096 + hidden_size: 3072 + mlp_ratio: 4.0 + num_heads: 24 + depth: 19 + depth_single_blocks: 38 + axes_dim: + - 16 + - 56 + - 56 + theta: 10_000 + qkv_bias: True + guidance_embed: True + ae_params: + resolution: 256 + in_channels: 3 + ch: 128 + out_ch: 3 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + z_channels: 16 + scale_factor: 0.3611 + shift_factor: 0.1159 diff --git a/invokeai/configs/flux/flux1-schnell.yaml b/invokeai/configs/flux/flux1-schnell.yaml new file mode 100644 index 00000000000..bea1824e35a --- /dev/null +++ b/invokeai/configs/flux/flux1-schnell.yaml @@ -0,0 +1,34 @@ +repo_id: "black-forest-labs/FLUX.1-schnell" +repo_ae: "ae.safetensors" +t5_encoder: "google/t5-v1_1-xxl" +max_length: 512 +params: + in_channels: 64 + vec_in_dim: 768 + context_in_dim: 4096 + hidden_size: 3072 + mlp_ratio: 4.0 + num_heads: 24 + depth: 19 + depth_single_blocks: 38 + axes_dim: + - 16 + - 56 + - 56 + theta: 10_000 + qkv_bias: True + guidance_embed: False + ae_params: + resolution: 256 + in_channels: 3 + ch: 128 + out_ch: 3 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + z_channels: 16 + scale_factor: 0.3611 + shift_factor: 0.1159 diff --git a/invokeai/frontend/web/src/services/api/schema.ts b/invokeai/frontend/web/src/services/api/schema.ts index 157aacd3f83..8045e3e0c71 100644 --- a/invokeai/frontend/web/src/services/api/schema.ts +++ b/invokeai/frontend/web/src/services/api/schema.ts @@ -2439,6 +2439,72 @@ export type components = { */ bulk_download_item_name: string; }; + /** + * CLIPEmbedDiffusersConfig + * @description Model config for Clip Embeddings. + */ + CLIPEmbedDiffusersConfig: { + /** + * Key + * @description A unique key for this model. + */ + key: string; + /** + * Hash + * @description The hash of the model file(s). + */ + hash: string; + /** + * Path + * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. + */ + path: string; + /** + * Name + * @description Name of the model. + */ + name: string; + /** @description The base model. */ + base: components["schemas"]["BaseModelType"]; + /** + * Description + * @description Model description + */ + description?: string | null; + /** + * Source + * @description The original source of the model (path, URL or repo_id). + */ + source: string; + /** @description The type of source */ + source_type: components["schemas"]["ModelSourceType"]; + /** + * Source Api Response + * @description The original API response from the source, as stringified JSON. + */ + source_api_response?: string | null; + /** + * Cover Image + * @description Url for image to preview model + */ + cover_image?: string | null; + /** + * Format + * @default diffusers + * @constant + * @enum {string} + */ + format: "diffusers"; + /** @default */ + repo_variant?: components["schemas"]["ModelRepoVariant"] | null; + /** + * Type + * @default clip_embed + * @constant + * @enum {string} + */ + type: "clip_embed"; + }; /** CLIPField */ CLIPField: { /** @description Info to load tokenizer submodel */ @@ -5678,6 +5744,13 @@ export type components = { use_cache?: boolean; /** @description Flux model (Transformer, VAE, CLIP) to load */ model: components["schemas"]["ModelIdentifierField"]; + /** + * T5 Encoder + * @description The T5 Encoder model to use. + * @default null + * @enum {string} + */ + t5_encoder?: "base" | "16b_quantized" | "8b_quantized"; /** * type * @default flux_model_loader @@ -10597,7 +10670,7 @@ export type components = { * @description Storage format of model. * @enum {string} */ - ModelFormat: "diffusers" | "checkpoint" | "lycoris" | "onnx" | "olive" | "embedding_file" | "embedding_folder" | "invokeai"; + ModelFormat: "diffusers" | "checkpoint" | "lycoris" | "onnx" | "olive" | "embedding_file" | "embedding_folder" | "invokeai" | "t5_encoder" | "t5_encoder_8b" | "t5_encoder_4b"; /** ModelIdentifierField */ ModelIdentifierField: { /** @@ -10897,7 +10970,7 @@ export type components = { * Config Out * @description After successful installation, this will hold the configuration object. */ - config_out?: (components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"]) | null; + config_out?: (components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]) | null; /** * Inplace * @description Leave model in its current location; otherwise install under models directory @@ -10983,7 +11056,7 @@ export type components = { * Config * @description The model's config */ - config: components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"]; + config: components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; /** * @description The submodel type, if any * @default null @@ -11004,7 +11077,7 @@ export type components = { * Config * @description The model's config */ - config: components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"]; + config: components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; /** * @description The submodel type, if any * @default null @@ -11085,6 +11158,11 @@ export type components = { * @description hash of model file */ hash?: string | null; + /** + * Format + * @description format of model file + */ + format?: string | null; /** * Trigger Phrases * @description Set of trigger phrases for this model @@ -11127,7 +11205,7 @@ export type components = { * @description Model type. * @enum {string} */ - ModelType: "onnx" | "main" | "vae" | "lora" | "controlnet" | "embedding" | "ip_adapter" | "clip_vision" | "t2i_adapter" | "spandrel_image_to_image"; + ModelType: "onnx" | "main" | "vae" | "lora" | "controlnet" | "embedding" | "ip_adapter" | "clip_vision" | "clip_embed" | "t2i_adapter" | "t5_encoder" | "spandrel_image_to_image"; /** * ModelVariantType * @description Variant type. @@ -11140,7 +11218,7 @@ export type components = { */ ModelsList: { /** Models */ - models: (components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"])[]; + models: (components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"])[]; }; /** * Multiply Integers @@ -14251,6 +14329,67 @@ export type components = { */ type: "t2i_adapter_output"; }; + /** T5EncoderConfig */ + T5EncoderConfig: { + /** + * Key + * @description A unique key for this model. + */ + key: string; + /** + * Hash + * @description The hash of the model file(s). + */ + hash: string; + /** + * Path + * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. + */ + path: string; + /** + * Name + * @description Name of the model. + */ + name: string; + /** @description The base model. */ + base: components["schemas"]["BaseModelType"]; + /** + * Description + * @description Model description + */ + description?: string | null; + /** + * Source + * @description The original source of the model (path, URL or repo_id). + */ + source: string; + /** @description The type of source */ + source_type: components["schemas"]["ModelSourceType"]; + /** + * Source Api Response + * @description The original API response from the source, as stringified JSON. + */ + source_api_response?: string | null; + /** + * Cover Image + * @description Url for image to preview model + */ + cover_image?: string | null; + /** + * Type + * @default t5_encoder + * @constant + * @enum {string} + */ + type: "t5_encoder"; + /** + * Format + * @default t5_encoder + * @constant + * @enum {string} + */ + format: "t5_encoder"; + }; /** T5EncoderField */ T5EncoderField: { /** @description Info to load tokenizer submodel */ @@ -14693,8 +14832,6 @@ export type components = { TransformerField: { /** @description Info to load Transformer submodel */ transformer: components["schemas"]["ModelIdentifierField"]; - /** @description Info to load scheduler submodel */ - scheduler: components["schemas"]["ModelIdentifierField"]; }; /** * UIComponent @@ -15482,7 +15619,7 @@ export interface operations { [name: string]: unknown; }; content: { - "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"]; + "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; }; }; /** @description Validation Error */ @@ -15514,7 +15651,7 @@ export interface operations { [name: string]: unknown; }; content: { - "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"]; + "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; }; }; /** @description Bad request */ @@ -15611,7 +15748,7 @@ export interface operations { [name: string]: unknown; }; content: { - "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"]; + "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; }; }; /** @description Bad request */ @@ -16111,7 +16248,7 @@ export interface operations { [name: string]: unknown; }; content: { - "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"]; + "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; }; }; /** @description Bad request */ From 5fc6c28239ee8516ec9d64846feb3422f0c2d99f Mon Sep 17 00:00:00 2001 From: Brandon Rising Date: Mon, 19 Aug 2024 10:14:58 -0400 Subject: [PATCH 030/113] Run ruff, setup initial text to image node --- invokeai/app/invocations/flux_text_encoder.py | 7 +- .../app/invocations/flux_text_to_image.py | 75 +++++----- invokeai/app/invocations/model.py | 95 ++++++++----- .../model_records/model_records_sql.py | 2 +- .../app/services/shared/invocation_context.py | 14 +- invokeai/backend/flux/math.py | 2 +- invokeai/backend/flux/model.py | 18 ++- invokeai/backend/flux/modules/autoencoder.py | 2 +- invokeai/backend/flux/modules/conditioner.py | 5 +- invokeai/backend/flux/modules/layers.py | 6 +- invokeai/backend/flux/sampling.py | 134 ++++++++++++++++++ .../model_manager/load/model_loaders/flux.py | 31 ++-- .../load/model_loaders/stable_diffusion.py | 8 +- .../backend/model_manager/load/model_util.py | 7 +- invokeai/backend/model_manager/probe.py | 7 +- 15 files changed, 290 insertions(+), 123 deletions(-) create mode 100644 invokeai/backend/flux/sampling.py diff --git a/invokeai/app/invocations/flux_text_encoder.py b/invokeai/app/invocations/flux_text_encoder.py index 5c0d0ef2ac7..9cecd89bcaf 100644 --- a/invokeai/app/invocations/flux_text_encoder.py +++ b/invokeai/app/invocations/flux_text_encoder.py @@ -1,8 +1,4 @@ import torch - - -from einops import repeat -from diffusers.pipelines.flux.pipeline_flux import FluxPipeline from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5Tokenizer from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation @@ -10,9 +6,8 @@ from invokeai.app.invocations.model import CLIPField, T5EncoderField from invokeai.app.invocations.primitives import ConditioningOutput from invokeai.app.services.shared.invocation_context import InvocationContext -from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ConditioningFieldData, FLUXConditioningInfo -from invokeai.backend.util.devices import TorchDevice from invokeai.backend.flux.modules.conditioner import HFEncoder +from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ConditioningFieldData, FLUXConditioningInfo @invocation( diff --git a/invokeai/app/invocations/flux_text_to_image.py b/invokeai/app/invocations/flux_text_to_image.py index 1327f81709d..fdb8e9c1dd1 100644 --- a/invokeai/app/invocations/flux_text_to_image.py +++ b/invokeai/app/invocations/flux_text_to_image.py @@ -1,12 +1,6 @@ -from typing import Literal - -import accelerate import torch -from diffusers.models.transformers.transformer_flux import FluxTransformer2DModel -from diffusers.pipelines.flux.pipeline_flux import FluxPipeline +from einops import rearrange, repeat from PIL import Image -from safetensors.torch import load_file -from transformers.models.auto import AutoModelForTextEncoding from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation from invokeai.app.invocations.fields import ( @@ -20,23 +14,12 @@ from invokeai.app.invocations.model import TransformerField, VAEField from invokeai.app.invocations.primitives import ImageOutput from invokeai.app.services.shared.invocation_context import InvocationContext -from invokeai.backend.quantization.bnb_nf4 import quantize_model_nf4 -from invokeai.backend.quantization.fast_quantized_diffusion_model import FastQuantizedDiffusersModel -from invokeai.backend.quantization.fast_quantized_transformers_model import FastQuantizedTransformersModel +from invokeai.backend.flux.model import Flux +from invokeai.backend.flux.modules.autoencoder import AutoEncoder +from invokeai.backend.flux.sampling import denoise, get_noise, get_schedule, unpack from invokeai.backend.stable_diffusion.diffusion.conditioning_data import FLUXConditioningInfo from invokeai.backend.util.devices import TorchDevice -TFluxModelKeys = Literal["flux-schnell"] -FLUX_MODELS: dict[TFluxModelKeys, str] = {"flux-schnell": "black-forest-labs/FLUX.1-schnell"} - - -class QuantizedFluxTransformer2DModel(FastQuantizedDiffusersModel): - base_class = FluxTransformer2DModel - - -class QuantizedModelForTextEncoding(FastQuantizedTransformersModel): - auto_class = AutoModelForTextEncoding - @invocation( "flux_text_to_image", @@ -78,7 +61,7 @@ def invoke(self, context: InvocationContext) -> ImageOutput: assert isinstance(flux_conditioning, FLUXConditioningInfo) latents = self._run_diffusion(context, flux_conditioning.clip_embeds, flux_conditioning.t5_embeds) - image = self._run_vae_decoding(context, latents) + image = self._run_vae_decoding(context, flux_ae_path, latents) image_dto = context.images.save(image=image) return ImageOutput.build(image_dto) @@ -89,14 +72,40 @@ def _run_diffusion( t5_embeddings: torch.Tensor, ): transformer_info = context.models.load(self.transformer.transformer) + inference_dtype = TorchDevice.choose_torch_dtype() + + # Prepare input noise. + # TODO(ryand): Does the seed behave the same on different devices? Should we re-implement this to always use a + # CPU RNG? + x = get_noise( + num_samples=1, + height=self.height, + width=self.width, + device=TorchDevice.choose_torch_device(), + dtype=inference_dtype, + seed=self.seed, + ) + + img, img_ids = self._prepare_latent_img_patches(x) + + # HACK(ryand): Find a better way to determine if this is a schnell model or not. + is_schnell = "shnell" in transformer_info.config.path if transformer_info.config else "" + timesteps = get_schedule( + num_steps=self.num_steps, + image_seq_len=img.shape[1], + shift=not is_schnell, + ) + + bs, t5_seq_len, _ = t5_embeddings.shape + txt_ids = torch.zeros(bs, t5_seq_len, 3, dtype=inference_dtype, device=TorchDevice.choose_torch_device()) # HACK(ryand): Manually empty the cache. Currently we don't check the size of the model before loading it from # disk. Since the transformer model is large (24GB), there's a good chance that it will OOM on 32GB RAM systems # if the cache is not empty. - # context.models._services.model_manager.load.ram_cache.make_room(24 * 2**30) + context.models._services.model_manager.load.ram_cache.make_room(24 * 2**30) with transformer_info as transformer: - assert isinstance(transformer, FluxTransformer2DModel) + assert isinstance(transformer, Flux) x = denoise( model=transformer, @@ -144,21 +153,13 @@ def _run_vae_decoding( ) -> Image.Image: vae_info = context.models.load(self.vae.vae) with vae_info as vae: - assert isinstance(vae, AutoencoderKL) + assert isinstance(vae, AutoEncoder) + # TODO(ryand): Test that this works with both float16 and bfloat16. + with torch.autocast(device_type=latents.device.type, dtype=TorchDevice.choose_torch_dtype()): + img = vae.decode(latents) img.clamp(-1, 1) img = rearrange(img[0], "c h w -> h w c") img_pil = Image.fromarray((127.5 * (img + 1.0)).byte().cpu().numpy()) - latents = flux_pipeline_with_vae._unpack_latents( - latents, self.height, self.width, flux_pipeline_with_vae.vae_scale_factor - ) - latents = ( - latents / flux_pipeline_with_vae.vae.config.scaling_factor - ) + flux_pipeline_with_vae.vae.config.shift_factor - latents = latents.to(dtype=vae.dtype) - image = flux_pipeline_with_vae.vae.decode(latents, return_dict=False)[0] - image = flux_pipeline_with_vae.image_processor.postprocess(image, output_type="pil")[0] - - assert isinstance(image, Image.Image) - return image + return img_pil diff --git a/invokeai/app/invocations/model.py b/invokeai/app/invocations/model.py index 3908bef4da9..f408dc3e0e4 100644 --- a/invokeai/app/invocations/model.py +++ b/invokeai/app/invocations/model.py @@ -1,6 +1,6 @@ import copy from time import sleep -from typing import List, Optional, Literal, Dict +from typing import Dict, List, Literal, Optional from pydantic import BaseModel, Field @@ -12,10 +12,10 @@ invocation_output, ) from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, OutputField, UIType +from invokeai.app.services.model_records import ModelRecordChanges from invokeai.app.services.shared.invocation_context import InvocationContext from invokeai.app.shared.models import FreeUConfig -from invokeai.app.services.model_records import ModelRecordChanges -from invokeai.backend.model_manager.config import AnyModelConfig, BaseModelType, ModelType, SubModelType, ModelFormat +from invokeai.backend.model_manager.config import AnyModelConfig, BaseModelType, ModelFormat, ModelType, SubModelType class ModelIdentifierField(BaseModel): @@ -132,31 +132,22 @@ def invoke(self, context: InvocationContext) -> ModelIdentifierOutput: return ModelIdentifierOutput(model=self.model) -T5_ENCODER_OPTIONS = Literal["base", "16b_quantized", "8b_quantized"] + +T5_ENCODER_OPTIONS = Literal["base", "8b_quantized"] T5_ENCODER_MAP: Dict[str, Dict[str, str]] = { "base": { - "text_encoder_repo": "black-forest-labs/FLUX.1-schnell::text_encoder_2", - "tokenizer_repo": "black-forest-labs/FLUX.1-schnell::tokenizer_2", - "text_encoder_name": "FLUX.1-schnell_text_encoder_2", - "tokenizer_name": "FLUX.1-schnell_tokenizer_2", + "repo": "invokeai/flux_dev::t5_xxl_encoder/base", + "name": "t5_base_encoder", "format": ModelFormat.T5Encoder, }, "8b_quantized": { - "text_encoder_repo": "hf_repo1", - "tokenizer_repo": "hf_repo1", - "text_encoder_name": "hf_repo1", - "tokenizer_name": "hf_repo1", - "format": ModelFormat.T5Encoder8b, - }, - "4b_quantized": { - "text_encoder_repo": "hf_repo2", - "tokenizer_repo": "hf_repo2", - "text_encoder_name": "hf_repo2", - "tokenizer_name": "hf_repo2", - "format": ModelFormat.T5Encoder8b, + "repo": "invokeai/flux_dev::t5_xxl_encoder/8b_quantized", + "name": "t5_8b_quantized_encoder", + "format": ModelFormat.T5Encoder, }, } + @invocation_output("flux_model_loader_output") class FluxModelLoaderOutput(BaseInvocationOutput): """Flux base model loader output""" @@ -176,7 +167,7 @@ class FluxModelLoaderInvocation(BaseInvocation): ui_type=UIType.FluxMainModel, input=Input.Direct, ) - + t5_encoder: T5_ENCODER_OPTIONS = InputField(description="The T5 Encoder model to use.") def invoke(self, context: InvocationContext) -> FluxModelLoaderOutput: @@ -189,7 +180,15 @@ def invoke(self, context: InvocationContext) -> FluxModelLoaderOutput: tokenizer2 = self._get_model(context, SubModelType.Tokenizer2) clip_encoder = self._get_model(context, SubModelType.TextEncoder) t5_encoder = self._get_model(context, SubModelType.TextEncoder2) - vae = self._install_model(context, SubModelType.VAE, "FLUX.1-schnell_ae", "black-forest-labs/FLUX.1-schnell::ae.safetensors", ModelFormat.Checkpoint, ModelType.VAE, BaseModelType.Flux) + vae = self._install_model( + context, + SubModelType.VAE, + "FLUX.1-schnell_ae", + "black-forest-labs/FLUX.1-schnell::ae.safetensors", + ModelFormat.Checkpoint, + ModelType.VAE, + BaseModelType.Flux, + ) return FluxModelLoaderOutput( transformer=TransformerField(transformer=transformer), @@ -198,33 +197,59 @@ def invoke(self, context: InvocationContext) -> FluxModelLoaderOutput: vae=VAEField(vae=vae), ) - def _get_model(self, context: InvocationContext, submodel:SubModelType) -> ModelIdentifierField: - match(submodel): + def _get_model(self, context: InvocationContext, submodel: SubModelType) -> ModelIdentifierField: + match submodel: case SubModelType.Transformer: return self.model.model_copy(update={"submodel_type": SubModelType.Transformer}) case submodel if submodel in [SubModelType.Tokenizer, SubModelType.TextEncoder]: - return self._install_model(context, submodel, "clip-vit-large-patch14", "openai/clip-vit-large-patch14", ModelFormat.Diffusers, ModelType.CLIPEmbed, BaseModelType.Any) - case SubModelType.TextEncoder2: - return self._install_model(context, submodel, T5_ENCODER_MAP[self.t5_encoder]["text_encoder_name"], T5_ENCODER_MAP[self.t5_encoder]["text_encoder_repo"], ModelFormat(T5_ENCODER_MAP[self.t5_encoder]["format"]), ModelType.T5Encoder, BaseModelType.Any) - case SubModelType.Tokenizer2: - return self._install_model(context, submodel, T5_ENCODER_MAP[self.t5_encoder]["tokenizer_name"], T5_ENCODER_MAP[self.t5_encoder]["tokenizer_repo"], ModelFormat(T5_ENCODER_MAP[self.t5_encoder]["format"]), ModelType.T5Encoder, BaseModelType.Any) + return self._install_model( + context, + submodel, + "clip-vit-large-patch14", + "openai/clip-vit-large-patch14", + ModelFormat.Diffusers, + ModelType.CLIPEmbed, + BaseModelType.Any, + ) + case submodel if submodel in [SubModelType.Tokenizer2, SubModelType.TextEncoder2]: + return self._install_model( + context, + submodel, + T5_ENCODER_MAP[self.t5_encoder]["name"], + T5_ENCODER_MAP[self.t5_encoder]["repo"], + ModelFormat(T5_ENCODER_MAP[self.t5_encoder]["format"]), + ModelType.T5Encoder, + BaseModelType.Any, + ) case _: - raise Exception(f"{submodel.value} is not a supported submodule for a flux model") - - def _install_model(self, context: InvocationContext, submodel:SubModelType, name: str, repo_id: str, format: ModelFormat, type: ModelType, base: BaseModelType): - if (models := context.models.search_by_attrs(name=name, base=base, type=type)): + raise Exception(f"{submodel.value} is not a supported submodule for a flux model") + + def _install_model( + self, + context: InvocationContext, + submodel: SubModelType, + name: str, + repo_id: str, + format: ModelFormat, + type: ModelType, + base: BaseModelType, + ): + if models := context.models.search_by_attrs(name=name, base=base, type=type): if len(models) != 1: raise Exception(f"Multiple models detected for selected model with name {name}") return ModelIdentifierField.from_config(models[0]).model_copy(update={"submodel_type": submodel}) else: model_path = context.models.download_and_cache_model(repo_id) - config = ModelRecordChanges(name = name, base = base, type=type, format=format) + config = ModelRecordChanges(name=name, base=base, type=type, format=format) model_install_job = context.models.import_local_model(model_path=model_path, config=config) while not model_install_job.in_terminal_state: sleep(0.01) if not model_install_job.config_out: raise Exception(f"Failed to install {name}") - return ModelIdentifierField.from_config(model_install_job.config_out).model_copy(update={"submodel_type": submodel}) + return ModelIdentifierField.from_config(model_install_job.config_out).model_copy( + update={"submodel_type": submodel} + ) + @invocation( "main_model_loader", diff --git a/invokeai/app/services/model_records/model_records_sql.py b/invokeai/app/services/model_records/model_records_sql.py index d1ec0152429..1d0780efe1f 100644 --- a/invokeai/app/services/model_records/model_records_sql.py +++ b/invokeai/app/services/model_records/model_records_sql.py @@ -301,7 +301,7 @@ def search_by_attr( for row in result: try: model_config = ModelConfigFactory.make_config(json.loads(row[0]), timestamp=row[1]) - except pydantic.ValidationError as e: + except pydantic.ValidationError: # We catch this error so that the app can still run if there are invalid model configs in the database. # One reason that an invalid model config might be in the database is if someone had to rollback from a # newer version of the app that added a new model type. diff --git a/invokeai/app/services/shared/invocation_context.py b/invokeai/app/services/shared/invocation_context.py index 9a5ac3fb5a9..9ba1bf68f34 100644 --- a/invokeai/app/services/shared/invocation_context.py +++ b/invokeai/app/services/shared/invocation_context.py @@ -465,18 +465,20 @@ def download_and_cache_model( return self._services.model_manager.install.download_and_cache_model(source=source) def import_local_model( - self, - model_path: Path, - config: Optional[ModelRecordChanges] = None, - access_token: Optional[str] = None, - inplace: Optional[bool] = False, + self, + model_path: Path, + config: Optional[ModelRecordChanges] = None, + access_token: Optional[str] = None, + inplace: Optional[bool] = False, ): """ TODO: Fill out description of this method """ if not model_path.exists(): raise Exception("Models provided to import_local_model must already exist on disk") - return self._services.model_manager.install.heuristic_import(str(model_path), config=config, access_token=access_token, inplace=inplace) + return self._services.model_manager.install.heuristic_import( + str(model_path), config=config, access_token=access_token, inplace=inplace + ) def load_local_model( self, diff --git a/invokeai/backend/flux/math.py b/invokeai/backend/flux/math.py index 71b91fa0f5a..0156bb6a205 100644 --- a/invokeai/backend/flux/math.py +++ b/invokeai/backend/flux/math.py @@ -27,4 +27,4 @@ def apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor) -> tuple[Tensor, Tenso xk_ = xk.float().reshape(*xk.shape[:-1], -1, 1, 2) xq_out = freqs_cis[..., 0] * xq_[..., 0] + freqs_cis[..., 1] * xq_[..., 1] xk_out = freqs_cis[..., 0] * xk_[..., 0] + freqs_cis[..., 1] * xk_[..., 1] - return xq_out.reshape(*xq.shape).type_as(xq), xk_out.reshape(*xk.shape).type_as(xk) \ No newline at end of file + return xq_out.reshape(*xq.shape).type_as(xq), xk_out.reshape(*xk.shape).type_as(xk) diff --git a/invokeai/backend/flux/model.py b/invokeai/backend/flux/model.py index 2cb0aa102e7..f7ef25bf4fa 100644 --- a/invokeai/backend/flux/model.py +++ b/invokeai/backend/flux/model.py @@ -3,9 +3,15 @@ import torch from torch import Tensor, nn -from invokeai.backend.flux.modules.layers import (DoubleStreamBlock, EmbedND, LastLayer, - MLPEmbedder, SingleStreamBlock, - timestep_embedding) +from invokeai.backend.flux.modules.layers import ( + DoubleStreamBlock, + EmbedND, + LastLayer, + MLPEmbedder, + SingleStreamBlock, + timestep_embedding, +) + @dataclass class FluxParams: @@ -35,9 +41,7 @@ def __init__(self, params: FluxParams): self.in_channels = params.in_channels self.out_channels = self.in_channels if params.hidden_size % params.num_heads != 0: - raise ValueError( - f"Hidden size {params.hidden_size} must be divisible by num_heads {params.num_heads}" - ) + raise ValueError(f"Hidden size {params.hidden_size} must be divisible by num_heads {params.num_heads}") pe_dim = params.hidden_size // params.num_heads if sum(params.axes_dim) != pe_dim: raise ValueError(f"Got {params.axes_dim} but expected positional dim {pe_dim}") @@ -108,4 +112,4 @@ def forward( img = img[:, txt.shape[1] :, ...] img = self.final_layer(img, vec) # (N, T, patch_size ** 2 * out_channels) - return img \ No newline at end of file + return img diff --git a/invokeai/backend/flux/modules/autoencoder.py b/invokeai/backend/flux/modules/autoencoder.py index f6e072ecaaa..75159f711f6 100644 --- a/invokeai/backend/flux/modules/autoencoder.py +++ b/invokeai/backend/flux/modules/autoencoder.py @@ -309,4 +309,4 @@ def decode(self, z: Tensor) -> Tensor: return self.decoder(z) def forward(self, x: Tensor) -> Tensor: - return self.decode(self.encode(x)) \ No newline at end of file + return self.decode(self.encode(x)) diff --git a/invokeai/backend/flux/modules/conditioner.py b/invokeai/backend/flux/modules/conditioner.py index 2a9e17c20e3..974ad64ab3a 100644 --- a/invokeai/backend/flux/modules/conditioner.py +++ b/invokeai/backend/flux/modules/conditioner.py @@ -1,5 +1,6 @@ from torch import Tensor, nn -from transformers import (PreTrainedModel, PreTrainedTokenizer) +from transformers import PreTrainedModel, PreTrainedTokenizer + class HFEncoder(nn.Module): def __init__(self, encoder: PreTrainedModel, tokenizer: PreTrainedTokenizer, is_clip: bool, max_length: int): @@ -27,4 +28,4 @@ def forward(self, text: list[str]) -> Tensor: attention_mask=None, output_hidden_states=False, ) - return outputs[self.output_key] \ No newline at end of file + return outputs[self.output_key] diff --git a/invokeai/backend/flux/modules/layers.py b/invokeai/backend/flux/modules/layers.py index cb4eee0c2d7..4f9d515dafc 100644 --- a/invokeai/backend/flux/modules/layers.py +++ b/invokeai/backend/flux/modules/layers.py @@ -36,9 +36,7 @@ def timestep_embedding(t: Tensor, dim, max_period=10000, time_factor: float = 10 """ t = time_factor * t half = dim // 2 - freqs = torch.exp(-math.log(max_period) * torch.arange(start=0, end=half, dtype=torch.float32) / half).to( - t.device - ) + freqs = torch.exp(-math.log(max_period) * torch.arange(start=0, end=half, dtype=torch.float32) / half).to(t.device) args = t[:, None].float() * freqs[None] embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1) @@ -250,4 +248,4 @@ def forward(self, x: Tensor, vec: Tensor) -> Tensor: shift, scale = self.adaLN_modulation(vec).chunk(2, dim=1) x = (1 + scale[:, None, :]) * self.norm_final(x) + shift[:, None, :] x = self.linear(x) - return x \ No newline at end of file + return x diff --git a/invokeai/backend/flux/sampling.py b/invokeai/backend/flux/sampling.py new file mode 100644 index 00000000000..89d9d417e0d --- /dev/null +++ b/invokeai/backend/flux/sampling.py @@ -0,0 +1,134 @@ +import math +from typing import Callable + +import torch +from einops import rearrange, repeat +from torch import Tensor + +from .model import Flux +from .modules.conditioner import HFEncoder + + +def get_noise( + num_samples: int, + height: int, + width: int, + device: torch.device, + dtype: torch.dtype, + seed: int, +): + return torch.randn( + num_samples, + 16, + # allow for packing + 2 * math.ceil(height / 16), + 2 * math.ceil(width / 16), + device=device, + dtype=dtype, + generator=torch.Generator(device=device).manual_seed(seed), + ) + + +def prepare(t5: HFEncoder, clip: HFEncoder, img: Tensor, prompt: str | list[str]) -> dict[str, Tensor]: + bs, c, h, w = img.shape + if bs == 1 and not isinstance(prompt, str): + bs = len(prompt) + + img = rearrange(img, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=2, pw=2) + if img.shape[0] == 1 and bs > 1: + img = repeat(img, "1 ... -> bs ...", bs=bs) + + img_ids = torch.zeros(h // 2, w // 2, 3) + img_ids[..., 1] = img_ids[..., 1] + torch.arange(h // 2)[:, None] + img_ids[..., 2] = img_ids[..., 2] + torch.arange(w // 2)[None, :] + img_ids = repeat(img_ids, "h w c -> b (h w) c", b=bs) + + if isinstance(prompt, str): + prompt = [prompt] + txt = t5(prompt) + if txt.shape[0] == 1 and bs > 1: + txt = repeat(txt, "1 ... -> bs ...", bs=bs) + txt_ids = torch.zeros(bs, txt.shape[1], 3) + + vec = clip(prompt) + if vec.shape[0] == 1 and bs > 1: + vec = repeat(vec, "1 ... -> bs ...", bs=bs) + + return { + "img": img, + "img_ids": img_ids.to(img.device), + "txt": txt.to(img.device), + "txt_ids": txt_ids.to(img.device), + "vec": vec.to(img.device), + } + + +def time_shift(mu: float, sigma: float, t: Tensor): + return math.exp(mu) / (math.exp(mu) + (1 / t - 1) ** sigma) + + +def get_lin_function(x1: float = 256, y1: float = 0.5, x2: float = 4096, y2: float = 1.15) -> Callable[[float], float]: + m = (y2 - y1) / (x2 - x1) + b = y1 - m * x1 + return lambda x: m * x + b + + +def get_schedule( + num_steps: int, + image_seq_len: int, + base_shift: float = 0.5, + max_shift: float = 1.15, + shift: bool = True, +) -> list[float]: + # extra step for zero + timesteps = torch.linspace(1, 0, num_steps + 1) + + # shifting the schedule to favor high timesteps for higher signal images + if shift: + # eastimate mu based on linear estimation between two points + mu = get_lin_function(y1=base_shift, y2=max_shift)(image_seq_len) + timesteps = time_shift(mu, 1.0, timesteps) + + return timesteps.tolist() + + +def denoise( + model: Flux, + # model input + img: Tensor, + img_ids: Tensor, + txt: Tensor, + txt_ids: Tensor, + vec: Tensor, + # sampling parameters + timesteps: list[float], + guidance: float = 4.0, +): + # this is ignored for schnell + guidance_vec = torch.full((img.shape[0],), guidance, device=img.device, dtype=img.dtype) + for t_curr, t_prev in zip(timesteps[:-1], timesteps[1:], strict=False): + t_vec = torch.full((img.shape[0],), t_curr, dtype=img.dtype, device=img.device) + pred = model( + img=img, + img_ids=img_ids, + txt=txt, + txt_ids=txt_ids, + y=vec, + timesteps=t_vec, + guidance=guidance_vec, + ) + + img = img + (t_prev - t_curr) * pred + + return img + + +def unpack(x: Tensor, height: int, width: int) -> Tensor: + return rearrange( + x, + "b (h w) (c ph pw) -> b c (h ph) (w pw)", + h=math.ceil(height / 16), + w=math.ceil(width / 16), + ph=2, + pw=2, + ) diff --git a/invokeai/backend/model_manager/load/model_loaders/flux.py b/invokeai/backend/model_manager/load/model_loaders/flux.py index 7a028a55e10..78ecfccfa3b 100644 --- a/invokeai/backend/model_manager/load/model_loaders/flux.py +++ b/invokeai/backend/model_manager/load/model_loaders/flux.py @@ -1,14 +1,17 @@ # Copyright (c) 2024, Brandon W. Rising and the InvokeAI Development Team """Class for Flux model loading in InvokeAI.""" +from dataclasses import fields from pathlib import Path -import yaml +from typing import Any, Optional -from dataclasses import fields +import yaml from safetensors.torch import load_file -from typing import Optional, Any -from transformers import T5EncoderModel, T5Tokenizer +from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5Tokenizer +from invokeai.app.services.config.config_default import get_config +from invokeai.backend.flux.model import Flux, FluxParams +from invokeai.backend.flux.modules.autoencoder import AutoEncoder, AutoEncoderParams from invokeai.backend.model_manager import ( AnyModel, AnyModelConfig, @@ -19,20 +22,15 @@ ) from invokeai.backend.model_manager.config import ( CheckpointConfigBase, - MainCheckpointConfig, CLIPEmbedDiffusersConfig, + MainCheckpointConfig, T5EncoderConfig, VAECheckpointConfig, ) -from invokeai.app.services.config.config_default import get_config from invokeai.backend.model_manager.load.model_loader_registry import ModelLoaderRegistry from invokeai.backend.model_manager.load.model_loaders.generic_diffusers import GenericDiffusersLoader -from invokeai.backend.util.silence_warnings import SilenceWarnings from invokeai.backend.util.devices import TorchDevice -from invokeai.backend.flux.model import Flux, FluxParams -from invokeai.backend.flux.modules.autoencoder import AutoEncoderParams, AutoEncoder -from transformers import (CLIPTextModel, CLIPTokenizer, T5EncoderModel, - T5Tokenizer) +from invokeai.backend.util.silence_warnings import SilenceWarnings app_config = get_config() @@ -56,9 +54,9 @@ def _load_model( flux_conf = yaml.safe_load(stream) except: raise - + dataclass_fields = {f.name for f in fields(AutoEncoderParams)} - filtered_data = {k: v for k, v in flux_conf['params']['ae_params'].items() if k in dataclass_fields} + filtered_data = {k: v for k, v in flux_conf["params"]["ae_params"].items() if k in dataclass_fields} params = AutoEncoderParams(**filtered_data) with SilenceWarnings(): @@ -92,6 +90,7 @@ def _load_model( raise Exception("Only Checkpoint Flux models are currently supported.") + @ModelLoaderRegistry.register(base=BaseModelType.Any, type=ModelType.T5Encoder, format=ModelFormat.T5Encoder) class T5EncoderCheckpointModel(GenericDiffusersLoader): """Class to load main models.""" @@ -106,9 +105,9 @@ def _load_model( match submodel_type: case SubModelType.Tokenizer2: - return T5Tokenizer.from_pretrained(Path(config.path), max_length=512) + return T5Tokenizer.from_pretrained(Path(config.path) / "encoder", max_length=512) case SubModelType.TextEncoder2: - return T5EncoderModel.from_pretrained(Path(config.path)) + return T5EncoderModel.from_pretrained(Path(config.path) / "tokenizer") raise Exception("Only Checkpoint Flux models are currently supported.") @@ -148,7 +147,7 @@ def _load_from_singlefile( params = None model_path = Path(config.path) dataclass_fields = {f.name for f in fields(FluxParams)} - filtered_data = {k: v for k, v in flux_conf['params'].items() if k in dataclass_fields} + filtered_data = {k: v for k, v in flux_conf["params"].items() if k in dataclass_fields} params = FluxParams(**filtered_data) with SilenceWarnings(): diff --git a/invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py b/invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py index e034e110115..572859dbaee 100644 --- a/invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py +++ b/invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py @@ -39,11 +39,15 @@ @ModelLoaderRegistry.register(base=BaseModelType.StableDiffusion1, type=ModelType.Main, format=ModelFormat.Diffusers) @ModelLoaderRegistry.register(base=BaseModelType.StableDiffusion2, type=ModelType.Main, format=ModelFormat.Diffusers) @ModelLoaderRegistry.register(base=BaseModelType.StableDiffusionXL, type=ModelType.Main, format=ModelFormat.Diffusers) -@ModelLoaderRegistry.register(base=BaseModelType.StableDiffusionXLRefiner, type=ModelType.Main, format=ModelFormat.Diffusers) +@ModelLoaderRegistry.register( + base=BaseModelType.StableDiffusionXLRefiner, type=ModelType.Main, format=ModelFormat.Diffusers +) @ModelLoaderRegistry.register(base=BaseModelType.StableDiffusion1, type=ModelType.Main, format=ModelFormat.Checkpoint) @ModelLoaderRegistry.register(base=BaseModelType.StableDiffusion2, type=ModelType.Main, format=ModelFormat.Checkpoint) @ModelLoaderRegistry.register(base=BaseModelType.StableDiffusionXL, type=ModelType.Main, format=ModelFormat.Checkpoint) -@ModelLoaderRegistry.register(base=BaseModelType.StableDiffusionXLRefiner, type=ModelType.Main, format=ModelFormat.Checkpoint) +@ModelLoaderRegistry.register( + base=BaseModelType.StableDiffusionXLRefiner, type=ModelType.Main, format=ModelFormat.Checkpoint +) class StableDiffusionDiffusersModel(GenericDiffusersLoader): """Class to load main models.""" diff --git a/invokeai/backend/model_manager/load/model_util.py b/invokeai/backend/model_manager/load/model_util.py index 6987e5222db..6f93fcbd759 100644 --- a/invokeai/backend/model_manager/load/model_util.py +++ b/invokeai/backend/model_manager/load/model_util.py @@ -9,7 +9,7 @@ import torch from diffusers.pipelines.pipeline_utils import DiffusionPipeline from diffusers.schedulers.scheduling_utils import SchedulerMixin -from transformers import CLIPTokenizer, T5TokenizerFast, T5Tokenizer +from transformers import CLIPTokenizer, T5Tokenizer, T5TokenizerFast from invokeai.backend.image_util.depth_anything.depth_anything_pipeline import DepthAnythingPipeline from invokeai.backend.image_util.grounding_dino.grounding_dino_pipeline import GroundingDinoPipeline @@ -52,7 +52,10 @@ def calc_model_size_by_data(logger: logging.Logger, model: AnyModel) -> int: return model.calc_size() elif isinstance( model, - (T5TokenizerFast,T5Tokenizer,), + ( + T5TokenizerFast, + T5Tokenizer, + ), ): return len(model) else: diff --git a/invokeai/backend/model_manager/probe.py b/invokeai/backend/model_manager/probe.py index a3a648806fc..fcb4e9b2f03 100644 --- a/invokeai/backend/model_manager/probe.py +++ b/invokeai/backend/model_manager/probe.py @@ -56,7 +56,7 @@ }, BaseModelType.StableDiffusionXLRefiner: { ModelVariantType.Normal: "sd_xl_refiner.yaml", - } + }, } @@ -132,7 +132,7 @@ def probe( fields = {} model_path = model_path.resolve() - + format_type = ModelFormat.Diffusers if model_path.is_dir() else ModelFormat.Checkpoint model_info = None model_type = ModelType(fields["type"]) if "type" in fields and fields["type"] else None @@ -323,7 +323,7 @@ def _get_checkpoint_config_path( if model_type is ModelType.Main: if base_type == BaseModelType.Flux: - config_file="flux/flux1-schnell.yaml" + config_file = "flux/flux1-schnell.yaml" else: config_file = LEGACY_CONFIGS[base_type][variant_type] if isinstance(config_file, dict): # need another tier for sd-2.x models @@ -727,6 +727,7 @@ class T5EncoderFolderProbe(FolderProbeBase): def get_format(self) -> ModelFormat: return ModelFormat.T5Encoder + class ONNXFolderProbe(PipelineFolderProbe): def get_base_type(self) -> BaseModelType: # Due to the way the installer is set up, the configuration file for safetensors From 68d28db4362077192b1287721b6e37cf7fc1fed8 Mon Sep 17 00:00:00 2001 From: Brandon Rising Date: Mon, 19 Aug 2024 12:08:24 -0400 Subject: [PATCH 031/113] Add nf4 bnb quantized format --- invokeai/app/invocations/model.py | 4 ++-- invokeai/backend/model_manager/config.py | 21 ++++++++++++++++++--- invokeai/backend/model_manager/probe.py | 8 ++++++-- 3 files changed, 26 insertions(+), 7 deletions(-) diff --git a/invokeai/app/invocations/model.py b/invokeai/app/invocations/model.py index f408dc3e0e4..a6ec64d5c78 100644 --- a/invokeai/app/invocations/model.py +++ b/invokeai/app/invocations/model.py @@ -136,12 +136,12 @@ def invoke(self, context: InvocationContext) -> ModelIdentifierOutput: T5_ENCODER_OPTIONS = Literal["base", "8b_quantized"] T5_ENCODER_MAP: Dict[str, Dict[str, str]] = { "base": { - "repo": "invokeai/flux_dev::t5_xxl_encoder/base", + "repo": "InvokeAI/flux_schnell::t5_xxl_encoder/base", "name": "t5_base_encoder", "format": ModelFormat.T5Encoder, }, "8b_quantized": { - "repo": "invokeai/flux_dev::t5_xxl_encoder/8b_quantized", + "repo": "invokeai/flux_dev::t5_xxl_encoder/optimum_quanto_qfloat8", "name": "t5_8b_quantized_encoder", "format": ModelFormat.T5Encoder, }, diff --git a/invokeai/backend/model_manager/config.py b/invokeai/backend/model_manager/config.py index dfa6cef29b3..ce6b8ed8ccb 100644 --- a/invokeai/backend/model_manager/config.py +++ b/invokeai/backend/model_manager/config.py @@ -111,6 +111,7 @@ class ModelFormat(str, Enum): T5Encoder = "t5_encoder" T5Encoder8b = "t5_encoder_8b" T5Encoder4b = "t5_encoder_4b" + BnbQuantizednf4b = "bnb_quantized_nf4b" class SchedulerPredictionType(str, Enum): @@ -193,7 +194,7 @@ def json_schema_extra(schema: dict[str, Any], model_class: Type[BaseModel]) -> N class CheckpointConfigBase(ModelConfigBase): """Model config for checkpoint-style models.""" - format: Literal[ModelFormat.Checkpoint] = ModelFormat.Checkpoint + format: Literal[ModelFormat.Checkpoint, ModelFormat.BnbQuantizednf4b] = Field(description="Format of the provided checkpoint model", default=ModelFormat.Checkpoint) config_path: str = Field(description="path to the checkpoint model config file") converted_at: Optional[float] = Field( description="When this model was last converted to diffusers", default_factory=time.time @@ -248,7 +249,6 @@ class VAECheckpointConfig(CheckpointConfigBase): """Model config for standalone VAE models.""" type: Literal[ModelType.VAE] = ModelType.VAE - format: Literal[ModelFormat.Checkpoint] = ModelFormat.Checkpoint @staticmethod def get_tag() -> Tag: @@ -287,7 +287,6 @@ class ControlNetCheckpointConfig(CheckpointConfigBase, ControlAdapterConfigBase) """Model config for ControlNet models (diffusers version).""" type: Literal[ModelType.ControlNet] = ModelType.ControlNet - format: Literal[ModelFormat.Checkpoint] = ModelFormat.Checkpoint @staticmethod def get_tag() -> Tag: @@ -336,6 +335,21 @@ def get_tag() -> Tag: return Tag(f"{ModelType.Main.value}.{ModelFormat.Checkpoint.value}") +class MainBnbQuantized4bCheckpointConfig(CheckpointConfigBase, MainConfigBase): + """Model config for main checkpoint models.""" + + prediction_type: SchedulerPredictionType = SchedulerPredictionType.Epsilon + upcast_attention: bool = False + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.format = ModelFormat.BnbQuantizednf4b + + @staticmethod + def get_tag() -> Tag: + return Tag(f"{ModelType.Main.value}.{ModelFormat.BnbQuantizednf4b.value}") + + class MainDiffusersConfig(DiffusersConfigBase, MainConfigBase): """Model config for main diffusers models.""" @@ -438,6 +452,7 @@ def get_model_discriminator_value(v: Any) -> str: Union[ Annotated[MainDiffusersConfig, MainDiffusersConfig.get_tag()], Annotated[MainCheckpointConfig, MainCheckpointConfig.get_tag()], + Annotated[MainBnbQuantized4bCheckpointConfig, MainBnbQuantized4bCheckpointConfig.get_tag()], Annotated[VAEDiffusersConfig, VAEDiffusersConfig.get_tag()], Annotated[VAECheckpointConfig, VAECheckpointConfig.get_tag()], Annotated[ControlNetDiffusersConfig, ControlNetDiffusersConfig.get_tag()], diff --git a/invokeai/backend/model_manager/probe.py b/invokeai/backend/model_manager/probe.py index fcb4e9b2f03..dbc2275d851 100644 --- a/invokeai/backend/model_manager/probe.py +++ b/invokeai/backend/model_manager/probe.py @@ -162,7 +162,7 @@ def probe( fields["description"] = ( fields.get("description") or f"{fields['base'].value} {model_type.value} model {fields['name']}" ) - fields["format"] = ModelFormat(fields.get("format")) or probe.get_format() + fields["format"] = ModelFormat(fields.get("format")) if "format" in fields else probe.get_format() fields["hash"] = fields.get("hash") or ModelHash(algorithm=hash_algo).hash(model_path) fields["default_settings"] = fields.get("default_settings") @@ -179,7 +179,7 @@ def probe( # additional fields needed for main and controlnet models if ( fields["type"] in [ModelType.Main, ModelType.ControlNet, ModelType.VAE] - and fields["format"] is ModelFormat.Checkpoint + and fields["format"] in [ModelFormat.Checkpoint, ModelFormat.BnbQuantizednf4b] ): ckpt_config_path = cls._get_checkpoint_config_path( model_path, @@ -323,6 +323,7 @@ def _get_checkpoint_config_path( if model_type is ModelType.Main: if base_type == BaseModelType.Flux: + # TODO: Decide between dev/schnell config_file = "flux/flux1-schnell.yaml" else: config_file = LEGACY_CONFIGS[base_type][variant_type] @@ -422,6 +423,9 @@ def __init__(self, model_path: Path): self.checkpoint = ModelProbe._scan_and_load_checkpoint(model_path) def get_format(self) -> ModelFormat: + state_dict = self.checkpoint.get("state_dict") or self.checkpoint + if "double_blocks.0.img_attn.proj.weight.quant_state.bitsandbytes__nf4" in state_dict: + return ModelFormat.BnbQuantizednf4b return ModelFormat("checkpoint") def get_variant_type(self) -> ModelVariantType: From f3ebbe1d6b2aeaebb9a22c0ff57044edf0f4f120 Mon Sep 17 00:00:00 2001 From: Brandon Rising Date: Mon, 19 Aug 2024 12:12:06 -0400 Subject: [PATCH 032/113] Remove unused param on _run_vae_decoding in flux text to image --- invokeai/app/invocations/flux_text_to_image.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/invokeai/app/invocations/flux_text_to_image.py b/invokeai/app/invocations/flux_text_to_image.py index fdb8e9c1dd1..5e652b1375f 100644 --- a/invokeai/app/invocations/flux_text_to_image.py +++ b/invokeai/app/invocations/flux_text_to_image.py @@ -61,7 +61,7 @@ def invoke(self, context: InvocationContext) -> ImageOutput: assert isinstance(flux_conditioning, FLUXConditioningInfo) latents = self._run_diffusion(context, flux_conditioning.clip_embeds, flux_conditioning.t5_embeds) - image = self._run_vae_decoding(context, flux_ae_path, latents) + image = self._run_vae_decoding(context, latents) image_dto = context.images.save(image=image) return ImageOutput.build(image_dto) From efab4a38112a84e32151bfc7c460550844849dc3 Mon Sep 17 00:00:00 2001 From: Brandon Rising Date: Mon, 19 Aug 2024 13:12:38 -0400 Subject: [PATCH 033/113] Working inference node with quantized bnb nf4 checkpoint --- .../app/invocations/flux_text_to_image.py | 14 +++-- .../model_manager/load/model_loaders/flux.py | 62 +++++++++++++++++-- 2 files changed, 65 insertions(+), 11 deletions(-) diff --git a/invokeai/app/invocations/flux_text_to_image.py b/invokeai/app/invocations/flux_text_to_image.py index 5e652b1375f..fd7f53df103 100644 --- a/invokeai/app/invocations/flux_text_to_image.py +++ b/invokeai/app/invocations/flux_text_to_image.py @@ -89,7 +89,7 @@ def _run_diffusion( img, img_ids = self._prepare_latent_img_patches(x) # HACK(ryand): Find a better way to determine if this is a schnell model or not. - is_schnell = "shnell" in transformer_info.config.path if transformer_info.config else "" + is_schnell = "schnell" in transformer_info.config.path if transformer_info.config else "" timesteps = get_schedule( num_steps=self.num_steps, image_seq_len=img.shape[1], @@ -139,9 +139,9 @@ def _prepare_latent_img_patches(self, latent_img: torch.Tensor) -> tuple[torch.T img = repeat(img, "1 ... -> bs ...", bs=bs) # Generate patch position ids. - img_ids = torch.zeros(h // 2, w // 2, 3) - img_ids[..., 1] = img_ids[..., 1] + torch.arange(h // 2)[:, None] - img_ids[..., 2] = img_ids[..., 2] + torch.arange(w // 2)[None, :] + img_ids = torch.zeros(h // 2, w // 2, 3, device=img.device) + img_ids[..., 1] = img_ids[..., 1] + torch.arange(h // 2, device=img.device)[:, None] + img_ids[..., 2] = img_ids[..., 2] + torch.arange(w // 2, device=img.device)[None, :] img_ids = repeat(img_ids, "h w c -> b (h w) c", b=bs) return img, img_ids @@ -155,8 +155,10 @@ def _run_vae_decoding( with vae_info as vae: assert isinstance(vae, AutoEncoder) # TODO(ryand): Test that this works with both float16 and bfloat16. - with torch.autocast(device_type=latents.device.type, dtype=TorchDevice.choose_torch_dtype()): - img = vae.decode(latents) + # with torch.autocast(device_type=latents.device.type, dtype=torch.float32): + vae.to(torch.float32) + latents.to(torch.float32) + img = vae.decode(latents) img.clamp(-1, 1) img = rearrange(img[0], "c h w -> h w c") diff --git a/invokeai/backend/model_manager/load/model_loaders/flux.py b/invokeai/backend/model_manager/load/model_loaders/flux.py index 78ecfccfa3b..5ef7f460ce8 100644 --- a/invokeai/backend/model_manager/load/model_loaders/flux.py +++ b/invokeai/backend/model_manager/load/model_loaders/flux.py @@ -1,6 +1,8 @@ # Copyright (c) 2024, Brandon W. Rising and the InvokeAI Development Team """Class for Flux model loading in InvokeAI.""" +import accelerate +import torch from dataclasses import fields from pathlib import Path from typing import Any, Optional @@ -24,6 +26,7 @@ CheckpointConfigBase, CLIPEmbedDiffusersConfig, MainCheckpointConfig, + MainBnbQuantized4bCheckpointConfig, T5EncoderConfig, VAECheckpointConfig, ) @@ -31,6 +34,7 @@ from invokeai.backend.model_manager.load.model_loaders.generic_diffusers import GenericDiffusersLoader from invokeai.backend.util.devices import TorchDevice from invokeai.backend.util.silence_warnings import SilenceWarnings +from invokeai.backend.quantization.bnb_nf4 import quantize_model_nf4 app_config = get_config() @@ -62,7 +66,7 @@ def _load_model( with SilenceWarnings(): model = load_class(params).to(self._torch_dtype) # load_sft doesn't support torch.device - sd = load_file(model_path, device=str(TorchDevice.choose_torch_device())) + sd = load_file(model_path) model.load_state_dict(sd, strict=False, assign=True) return model @@ -105,9 +109,9 @@ def _load_model( match submodel_type: case SubModelType.Tokenizer2: - return T5Tokenizer.from_pretrained(Path(config.path) / "encoder", max_length=512) + return T5Tokenizer.from_pretrained(Path(config.path) / "tokenizer_2", max_length=512) case SubModelType.TextEncoder2: - return T5EncoderModel.from_pretrained(Path(config.path) / "tokenizer") + return T5EncoderModel.from_pretrained(Path(config.path) / "text_encoder_2") #TODO: Fix hf subfolder install raise Exception("Only Checkpoint Flux models are currently supported.") @@ -152,7 +156,55 @@ def _load_from_singlefile( with SilenceWarnings(): model = load_class(params).to(self._torch_dtype) - # load_sft doesn't support torch.device - sd = load_file(model_path, device=str(TorchDevice.choose_torch_device())) + sd = load_file(model_path) + model.load_state_dict(sd, strict=False, assign=True) + return model + + +@ModelLoaderRegistry.register(base=BaseModelType.Flux, type=ModelType.Main, format=ModelFormat.BnbQuantizednf4b) +class FluxBnbQuantizednf4bCheckpointModel(GenericDiffusersLoader): + """Class to load main models.""" + + def _load_model( + self, + config: AnyModelConfig, + submodel_type: Optional[SubModelType] = None, + ) -> AnyModel: + if not isinstance(config, CheckpointConfigBase): + raise Exception("Only Checkpoint Flux models are currently supported.") + legacy_config_path = app_config.legacy_conf_path / config.config_path + config_path = legacy_config_path.as_posix() + with open(config_path, "r") as stream: + try: + flux_conf = yaml.safe_load(stream) + except: + raise + + match submodel_type: + case SubModelType.Transformer: + return self._load_from_singlefile(config, flux_conf) + + raise Exception("Only Checkpoint Flux models are currently supported.") + + def _load_from_singlefile( + self, + config: AnyModelConfig, + flux_conf: Any, + ) -> AnyModel: + assert isinstance(config, MainBnbQuantized4bCheckpointConfig) + load_class = Flux + params = None + model_path = Path(config.path) + dataclass_fields = {f.name for f in fields(FluxParams)} + filtered_data = {k: v for k, v in flux_conf["params"].items() if k in dataclass_fields} + params = FluxParams(**filtered_data) + + with SilenceWarnings(): + with accelerate.init_empty_weights(): + model = load_class(params) + model = quantize_model_nf4(model, modules_to_not_convert=set(), compute_dtype=torch.bfloat16) + # TODO(ryand): Right now, some of the weights are loaded in bfloat16. Think about how best to handle + # this on GPUs without bfloat16 support. + sd = load_file(model_path) model.load_state_dict(sd, strict=False, assign=True) return model From 95a2d970239980b1bec7359da7f62b885cbecf18 Mon Sep 17 00:00:00 2001 From: Brandon Rising Date: Mon, 19 Aug 2024 13:59:44 -0400 Subject: [PATCH 034/113] Install sub directories with folders correctly, ensure consistent dtype of tensors in flux pipeline and vae --- invokeai/app/invocations/flux_text_to_image.py | 2 +- invokeai/app/services/model_install/model_install_default.py | 5 +++-- invokeai/backend/model_manager/load/model_loaders/flux.py | 5 +---- invokeai/backend/model_manager/util/select_hf_files.py | 2 +- invokeai/backend/quantization/bnb_nf4.py | 2 +- 5 files changed, 7 insertions(+), 9 deletions(-) diff --git a/invokeai/app/invocations/flux_text_to_image.py b/invokeai/app/invocations/flux_text_to_image.py index fd7f53df103..cba2dbbc234 100644 --- a/invokeai/app/invocations/flux_text_to_image.py +++ b/invokeai/app/invocations/flux_text_to_image.py @@ -72,7 +72,7 @@ def _run_diffusion( t5_embeddings: torch.Tensor, ): transformer_info = context.models.load(self.transformer.transformer) - inference_dtype = TorchDevice.choose_torch_dtype() + inference_dtype = torch.bfloat16 # Prepare input noise. # TODO(ryand): Does the seed behave the same on different devices? Should we re-implement this to always use a diff --git a/invokeai/app/services/model_install/model_install_default.py b/invokeai/app/services/model_install/model_install_default.py index e1d784f5bf4..0369b86fb42 100644 --- a/invokeai/app/services/model_install/model_install_default.py +++ b/invokeai/app/services/model_install/model_install_default.py @@ -783,8 +783,9 @@ def _multifile_download( # So what we do is to synthesize a folder named "sdxl-turbo_vae" here. if subfolder: top = Path(remote_files[0].path.parts[0]) # e.g. "sdxl-turbo/" - path_to_remove = top / subfolder.parts[-1] # sdxl-turbo/vae/ - path_to_add = Path(f"{top}_{subfolder}") + path_to_remove = top / subfolder # sdxl-turbo/vae/ + subfolder_rename = subfolder.name.replace('/', '_').replace('\\', '_') + path_to_add = Path(f"{top}_{subfolder_rename}") else: path_to_remove = Path(".") path_to_add = Path(".") diff --git a/invokeai/backend/model_manager/load/model_loaders/flux.py b/invokeai/backend/model_manager/load/model_loaders/flux.py index 5ef7f460ce8..658f626dbbe 100644 --- a/invokeai/backend/model_manager/load/model_loaders/flux.py +++ b/invokeai/backend/model_manager/load/model_loaders/flux.py @@ -64,8 +64,7 @@ def _load_model( params = AutoEncoderParams(**filtered_data) with SilenceWarnings(): - model = load_class(params).to(self._torch_dtype) - # load_sft doesn't support torch.device + model = load_class(params) sd = load_file(model_path) model.load_state_dict(sd, strict=False, assign=True) @@ -203,8 +202,6 @@ def _load_from_singlefile( with accelerate.init_empty_weights(): model = load_class(params) model = quantize_model_nf4(model, modules_to_not_convert=set(), compute_dtype=torch.bfloat16) - # TODO(ryand): Right now, some of the weights are loaded in bfloat16. Think about how best to handle - # this on GPUs without bfloat16 support. sd = load_file(model_path) model.load_state_dict(sd, strict=False, assign=True) return model diff --git a/invokeai/backend/model_manager/util/select_hf_files.py b/invokeai/backend/model_manager/util/select_hf_files.py index 60abc3384ca..b0d33d6efb7 100644 --- a/invokeai/backend/model_manager/util/select_hf_files.py +++ b/invokeai/backend/model_manager/util/select_hf_files.py @@ -69,7 +69,7 @@ def filter_files( # limit search to subfolder if requested if subfolder: subfolder = root / subfolder - paths = [x for x in paths if x.parent == Path(subfolder)] + paths = [x for x in paths if Path(subfolder) in x.parents] # _filter_by_variant uniquifies the paths and returns a set return sorted(_filter_by_variant(paths, variant)) diff --git a/invokeai/backend/quantization/bnb_nf4.py b/invokeai/backend/quantization/bnb_nf4.py index 28a0861449b..105bf1474c1 100644 --- a/invokeai/backend/quantization/bnb_nf4.py +++ b/invokeai/backend/quantization/bnb_nf4.py @@ -116,7 +116,7 @@ def _convert_linear_layers_to_nf4( child.in_features, child.out_features, bias=has_bias, - compute_dtype=torch.float16, + compute_dtype=compute_dtype, compress_statistics=compress_statistics, ) if has_bias: From 98151ce7594b9770c4a22eae603a44ab985755e5 Mon Sep 17 00:00:00 2001 From: Brandon Rising Date: Mon, 19 Aug 2024 14:41:28 -0400 Subject: [PATCH 035/113] Select dev/schnell based on state dict, use correct max seq len based on dev/schnell, and shift in inference, separate vae flux params into separate config --- invokeai/app/invocations/flux_text_encoder.py | 16 +- .../app/invocations/flux_text_to_image.py | 3 +- invokeai/app/invocations/model.py | 17 ++- .../model_manager/load/model_loaders/flux.py | 3 +- invokeai/backend/model_manager/probe.py | 9 +- invokeai/configs/flux/flux1-dev.yaml | 16 +- invokeai/configs/flux/flux1-schnell.yaml | 17 +-- invokeai/configs/flux/flux1-vae.yaml | 16 ++ .../frontend/web/src/services/api/schema.ts | 139 +++++++++++++++--- 9 files changed, 170 insertions(+), 66 deletions(-) create mode 100644 invokeai/configs/flux/flux1-vae.yaml diff --git a/invokeai/app/invocations/flux_text_encoder.py b/invokeai/app/invocations/flux_text_encoder.py index 9cecd89bcaf..7b3f0745562 100644 --- a/invokeai/app/invocations/flux_text_encoder.py +++ b/invokeai/app/invocations/flux_text_encoder.py @@ -1,4 +1,5 @@ import torch +from typing import Literal from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5Tokenizer from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation @@ -23,11 +24,12 @@ class FluxTextEncoderInvocation(BaseInvocation): description=FieldDescriptions.clip, input=Input.Connection, ) - t5Encoder: T5EncoderField = InputField( + t5_encoder: T5EncoderField = InputField( title="T5Encoder", description=FieldDescriptions.t5Encoder, input=Input.Connection, ) + max_seq_len: Literal[256, 512] = InputField(description="Max sequence length for the desired flux model") positive_prompt: str = InputField(description="Positive prompt for text-to-image generation.") # TODO(ryand): Should we create a new return type for this invocation? This ConditioningOutput is clearly not @@ -43,21 +45,15 @@ def invoke(self, context: InvocationContext) -> ConditioningOutput: return ConditioningOutput.build(conditioning_name) def _encode_prompt(self, context: InvocationContext) -> tuple[torch.Tensor, torch.Tensor]: - # TODO: Determine the T5 max sequence length based on the model. - # if self.model == "flux-schnell": - max_seq_len = 256 - # # elif self.model == "flux-dev": - # # max_seq_len = 512 - # else: - # raise ValueError(f"Unknown model: {self.model}") + max_seq_len = self.max_seq_len # Load CLIP. clip_tokenizer_info = context.models.load(self.clip.tokenizer) clip_text_encoder_info = context.models.load(self.clip.text_encoder) # Load T5. - t5_tokenizer_info = context.models.load(self.t5Encoder.tokenizer) - t5_text_encoder_info = context.models.load(self.t5Encoder.text_encoder) + t5_tokenizer_info = context.models.load(self.t5_encoder.tokenizer) + t5_text_encoder_info = context.models.load(self.t5_encoder.text_encoder) with ( clip_text_encoder_info as clip_text_encoder, diff --git a/invokeai/app/invocations/flux_text_to_image.py b/invokeai/app/invocations/flux_text_to_image.py index cba2dbbc234..b6f2d6dedd7 100644 --- a/invokeai/app/invocations/flux_text_to_image.py +++ b/invokeai/app/invocations/flux_text_to_image.py @@ -19,6 +19,7 @@ from invokeai.backend.flux.sampling import denoise, get_noise, get_schedule, unpack from invokeai.backend.stable_diffusion.diffusion.conditioning_data import FLUXConditioningInfo from invokeai.backend.util.devices import TorchDevice +from invokeai.backend.model_manager.config import CheckpointConfigBase @invocation( @@ -89,7 +90,7 @@ def _run_diffusion( img, img_ids = self._prepare_latent_img_patches(x) # HACK(ryand): Find a better way to determine if this is a schnell model or not. - is_schnell = "schnell" in transformer_info.config.path if transformer_info.config else "" + is_schnell = "schnell" in transformer_info.config.config_path if transformer_info.config and isinstance(transformer_info.config, CheckpointConfigBase) else "" timesteps = get_schedule( num_steps=self.num_steps, image_seq_len=img.shape[1], diff --git a/invokeai/app/invocations/model.py b/invokeai/app/invocations/model.py index a6ec64d5c78..3d5f38927d7 100644 --- a/invokeai/app/invocations/model.py +++ b/invokeai/app/invocations/model.py @@ -1,4 +1,5 @@ import copy +import yaml from time import sleep from typing import Dict, List, Literal, Optional @@ -16,6 +17,7 @@ from invokeai.app.services.shared.invocation_context import InvocationContext from invokeai.app.shared.models import FreeUConfig from invokeai.backend.model_manager.config import AnyModelConfig, BaseModelType, ModelFormat, ModelType, SubModelType +from invokeai.backend.model_manager.config import CheckpointConfigBase class ModelIdentifierField(BaseModel): @@ -154,8 +156,9 @@ class FluxModelLoaderOutput(BaseInvocationOutput): transformer: TransformerField = OutputField(description=FieldDescriptions.transformer, title="Transformer") clip: CLIPField = OutputField(description=FieldDescriptions.clip, title="CLIP") - t5Encoder: T5EncoderField = OutputField(description=FieldDescriptions.t5Encoder, title="T5 Encoder") + t5_encoder: T5EncoderField = OutputField(description=FieldDescriptions.t5Encoder, title="T5 Encoder") vae: VAEField = OutputField(description=FieldDescriptions.vae, title="VAE") + max_seq_len: Literal[256, 512] = OutputField(description=FieldDescriptions.vae, title="Max Seq Length") @invocation("flux_model_loader", title="Flux Main Model", tags=["model", "flux"], category="model", version="1.0.3") @@ -189,12 +192,22 @@ def invoke(self, context: InvocationContext) -> FluxModelLoaderOutput: ModelType.VAE, BaseModelType.Flux, ) + transformer_config = context.models.get_config(transformer) + assert isinstance(transformer_config, CheckpointConfigBase) + legacy_config_path = context.config.get().legacy_conf_path / transformer_config.config_path + config_path = legacy_config_path.as_posix() + with open(config_path, "r") as stream: + try: + flux_conf = yaml.safe_load(stream) + except: + raise return FluxModelLoaderOutput( transformer=TransformerField(transformer=transformer), clip=CLIPField(tokenizer=tokenizer, text_encoder=clip_encoder, loras=[], skipped_layers=0), - t5Encoder=T5EncoderField(tokenizer=tokenizer2, text_encoder=t5_encoder), + t5_encoder=T5EncoderField(tokenizer=tokenizer2, text_encoder=t5_encoder), vae=VAEField(vae=vae), + max_seq_len=flux_conf['max_seq_len'] ) def _get_model(self, context: InvocationContext, submodel: SubModelType) -> ModelIdentifierField: diff --git a/invokeai/backend/model_manager/load/model_loaders/flux.py b/invokeai/backend/model_manager/load/model_loaders/flux.py index 658f626dbbe..11a6ebcf6d2 100644 --- a/invokeai/backend/model_manager/load/model_loaders/flux.py +++ b/invokeai/backend/model_manager/load/model_loaders/flux.py @@ -32,7 +32,6 @@ ) from invokeai.backend.model_manager.load.model_loader_registry import ModelLoaderRegistry from invokeai.backend.model_manager.load.model_loaders.generic_diffusers import GenericDiffusersLoader -from invokeai.backend.util.devices import TorchDevice from invokeai.backend.util.silence_warnings import SilenceWarnings from invokeai.backend.quantization.bnb_nf4 import quantize_model_nf4 @@ -60,7 +59,7 @@ def _load_model( raise dataclass_fields = {f.name for f in fields(AutoEncoderParams)} - filtered_data = {k: v for k, v in flux_conf["params"]["ae_params"].items() if k in dataclass_fields} + filtered_data = {k: v for k, v in flux_conf["params"].items() if k in dataclass_fields} params = AutoEncoderParams(**filtered_data) with SilenceWarnings(): diff --git a/invokeai/backend/model_manager/probe.py b/invokeai/backend/model_manager/probe.py index dbc2275d851..6ce090d651a 100644 --- a/invokeai/backend/model_manager/probe.py +++ b/invokeai/backend/model_manager/probe.py @@ -324,7 +324,12 @@ def _get_checkpoint_config_path( if model_type is ModelType.Main: if base_type == BaseModelType.Flux: # TODO: Decide between dev/schnell - config_file = "flux/flux1-schnell.yaml" + checkpoint = ModelProbe._scan_and_load_checkpoint(model_path) + state_dict = checkpoint.get("state_dict") or checkpoint + if 'guidance_in.out_layer.weight' in state_dict: + config_file = "flux/flux1-dev.yaml" + else: + config_file = "flux/flux1-schnell.yaml" else: config_file = LEGACY_CONFIGS[base_type][variant_type] if isinstance(config_file, dict): # need another tier for sd-2.x models @@ -338,7 +343,7 @@ def _get_checkpoint_config_path( ) elif model_type is ModelType.VAE: config_file = ( - "flux/flux1-schnell.yaml" + "flux/flux1-vae.yaml" if base_type is BaseModelType.Flux else "stable-diffusion/v1-inference.yaml" if base_type is BaseModelType.StableDiffusion1 diff --git a/invokeai/configs/flux/flux1-dev.yaml b/invokeai/configs/flux/flux1-dev.yaml index 3f76f11cd4e..40a5b26a973 100644 --- a/invokeai/configs/flux/flux1-dev.yaml +++ b/invokeai/configs/flux/flux1-dev.yaml @@ -1,6 +1,6 @@ repo_id: "black-forest-labs/FLUX.1-dev" repo_ae: "ae.safetensors" -max_length: 512 +max_seq_len: 512 params: in_channels: 64 vec_in_dim: 768 @@ -17,17 +17,3 @@ params: theta: 10_000 qkv_bias: True guidance_embed: True - ae_params: - resolution: 256 - in_channels: 3 - ch: 128 - out_ch: 3 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - z_channels: 16 - scale_factor: 0.3611 - shift_factor: 0.1159 diff --git a/invokeai/configs/flux/flux1-schnell.yaml b/invokeai/configs/flux/flux1-schnell.yaml index bea1824e35a..2e9208c2c4a 100644 --- a/invokeai/configs/flux/flux1-schnell.yaml +++ b/invokeai/configs/flux/flux1-schnell.yaml @@ -1,7 +1,6 @@ repo_id: "black-forest-labs/FLUX.1-schnell" repo_ae: "ae.safetensors" -t5_encoder: "google/t5-v1_1-xxl" -max_length: 512 +max_seq_len: 256 params: in_channels: 64 vec_in_dim: 768 @@ -18,17 +17,3 @@ params: theta: 10_000 qkv_bias: True guidance_embed: False - ae_params: - resolution: 256 - in_channels: 3 - ch: 128 - out_ch: 3 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - z_channels: 16 - scale_factor: 0.3611 - shift_factor: 0.1159 diff --git a/invokeai/configs/flux/flux1-vae.yaml b/invokeai/configs/flux/flux1-vae.yaml new file mode 100644 index 00000000000..2949378a2ba --- /dev/null +++ b/invokeai/configs/flux/flux1-vae.yaml @@ -0,0 +1,16 @@ +repo_id: "black-forest-labs/FLUX.1-schnell" +repo_path: "ae.safetensors" +params: + resolution: 256 + in_channels: 3 + ch: 128 + out_ch: 3 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + z_channels: 16 + scale_factor: 0.3611 + shift_factor: 0.1159 \ No newline at end of file diff --git a/invokeai/frontend/web/src/services/api/schema.ts b/invokeai/frontend/web/src/services/api/schema.ts index 8045e3e0c71..16a82a2cf60 100644 --- a/invokeai/frontend/web/src/services/api/schema.ts +++ b/invokeai/frontend/web/src/services/api/schema.ts @@ -3735,11 +3735,11 @@ export type components = { cover_image?: string | null; /** * Format + * @description Format of the provided checkpoint model * @default checkpoint - * @constant * @enum {string} */ - format: "checkpoint"; + format: "checkpoint" | "bnb_quantized_nf4b"; /** * Config Path * @description path to the checkpoint model config file @@ -5750,7 +5750,7 @@ export type components = { * @default null * @enum {string} */ - t5_encoder?: "base" | "16b_quantized" | "8b_quantized"; + t5_encoder?: "base" | "8b_quantized"; /** * type * @default flux_model_loader @@ -5778,12 +5778,18 @@ export type components = { * T5 Encoder * @description T5 tokenizer and text encoder */ - t5Encoder: components["schemas"]["T5EncoderField"]; + t5_encoder: components["schemas"]["T5EncoderField"]; /** * VAE * @description VAE */ vae: components["schemas"]["VAEField"]; + /** + * Max Seq Length + * @description VAE + * @enum {integer} + */ + max_seq_len: 256 | 512; /** * type * @default flux_model_loader_output @@ -5822,7 +5828,14 @@ export type components = { * @description T5 tokenizer and text encoder * @default null */ - t5Encoder?: components["schemas"]["T5EncoderField"]; + t5_encoder?: components["schemas"]["T5EncoderField"]; + /** + * Max Seq Len + * @description Max sequence length for the desired flux model + * @default null + * @enum {integer} + */ + max_seq_len?: 256 | 512; /** * Positive Prompt * @description Positive prompt for text-to-image generation. @@ -9694,6 +9707,96 @@ export type components = { * @enum {integer} */ LogLevel: 0 | 10 | 20 | 30 | 40 | 50; + /** + * MainBnbQuantized4bCheckpointConfig + * @description Model config for main checkpoint models. + */ + MainBnbQuantized4bCheckpointConfig: { + /** + * Key + * @description A unique key for this model. + */ + key: string; + /** + * Hash + * @description The hash of the model file(s). + */ + hash: string; + /** + * Path + * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. + */ + path: string; + /** + * Name + * @description Name of the model. + */ + name: string; + /** @description The base model. */ + base: components["schemas"]["BaseModelType"]; + /** + * Description + * @description Model description + */ + description?: string | null; + /** + * Source + * @description The original source of the model (path, URL or repo_id). + */ + source: string; + /** @description The type of source */ + source_type: components["schemas"]["ModelSourceType"]; + /** + * Source Api Response + * @description The original API response from the source, as stringified JSON. + */ + source_api_response?: string | null; + /** + * Cover Image + * @description Url for image to preview model + */ + cover_image?: string | null; + /** + * Type + * @default main + * @constant + * @enum {string} + */ + type: "main"; + /** + * Trigger Phrases + * @description Set of trigger phrases for this model + */ + trigger_phrases?: string[] | null; + /** @description Default settings for this model */ + default_settings?: components["schemas"]["MainModelDefaultSettings"] | null; + /** @default normal */ + variant?: components["schemas"]["ModelVariantType"]; + /** + * Format + * @description Format of the provided checkpoint model + * @default checkpoint + * @enum {string} + */ + format: "checkpoint" | "bnb_quantized_nf4b"; + /** + * Config Path + * @description path to the checkpoint model config file + */ + config_path: string; + /** + * Converted At + * @description When this model was last converted to diffusers + */ + converted_at?: number | null; + /** @default epsilon */ + prediction_type?: components["schemas"]["SchedulerPredictionType"]; + /** + * Upcast Attention + * @default false + */ + upcast_attention?: boolean; + }; /** * MainCheckpointConfig * @description Model config for main checkpoint models. @@ -9761,11 +9864,11 @@ export type components = { variant?: components["schemas"]["ModelVariantType"]; /** * Format + * @description Format of the provided checkpoint model * @default checkpoint - * @constant * @enum {string} */ - format: "checkpoint"; + format: "checkpoint" | "bnb_quantized_nf4b"; /** * Config Path * @description path to the checkpoint model config file @@ -10670,7 +10773,7 @@ export type components = { * @description Storage format of model. * @enum {string} */ - ModelFormat: "diffusers" | "checkpoint" | "lycoris" | "onnx" | "olive" | "embedding_file" | "embedding_folder" | "invokeai" | "t5_encoder" | "t5_encoder_8b" | "t5_encoder_4b"; + ModelFormat: "diffusers" | "checkpoint" | "lycoris" | "onnx" | "olive" | "embedding_file" | "embedding_folder" | "invokeai" | "t5_encoder" | "t5_encoder_8b" | "t5_encoder_4b" | "bnb_quantized_nf4b"; /** ModelIdentifierField */ ModelIdentifierField: { /** @@ -10970,7 +11073,7 @@ export type components = { * Config Out * @description After successful installation, this will hold the configuration object. */ - config_out?: (components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]) | null; + config_out?: (components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]) | null; /** * Inplace * @description Leave model in its current location; otherwise install under models directory @@ -11056,7 +11159,7 @@ export type components = { * Config * @description The model's config */ - config: components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; + config: components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; /** * @description The submodel type, if any * @default null @@ -11077,7 +11180,7 @@ export type components = { * Config * @description The model's config */ - config: components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; + config: components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; /** * @description The submodel type, if any * @default null @@ -11218,7 +11321,7 @@ export type components = { */ ModelsList: { /** Models */ - models: (components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"])[]; + models: (components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"])[]; }; /** * Multiply Integers @@ -15087,11 +15190,11 @@ export type components = { cover_image?: string | null; /** * Format + * @description Format of the provided checkpoint model * @default checkpoint - * @constant * @enum {string} */ - format: "checkpoint"; + format: "checkpoint" | "bnb_quantized_nf4b"; /** * Config Path * @description path to the checkpoint model config file @@ -15619,7 +15722,7 @@ export interface operations { [name: string]: unknown; }; content: { - "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; + "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; }; }; /** @description Validation Error */ @@ -15651,7 +15754,7 @@ export interface operations { [name: string]: unknown; }; content: { - "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; + "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; }; }; /** @description Bad request */ @@ -15748,7 +15851,7 @@ export interface operations { [name: string]: unknown; }; content: { - "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; + "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; }; }; /** @description Bad request */ @@ -16248,7 +16351,7 @@ export interface operations { [name: string]: unknown; }; content: { - "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; + "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; }; }; /** @description Bad request */ From 5d7e15445dac4c075463ac9bf768282d260f1f85 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Tue, 20 Aug 2024 14:39:33 +0000 Subject: [PATCH 036/113] Fix FLUX output image clamping. And a few other minor fixes to make inference work with the full bfloat16 FLUX transformer model. --- invokeai/app/invocations/flux_text_to_image.py | 10 +++++++--- invokeai/backend/flux/sampling.py | 11 ++++++++++- .../model_manager/load/model_loaders/flux.py | 14 ++++++++------ 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/invokeai/app/invocations/flux_text_to_image.py b/invokeai/app/invocations/flux_text_to_image.py index b6f2d6dedd7..e08b4f38fd4 100644 --- a/invokeai/app/invocations/flux_text_to_image.py +++ b/invokeai/app/invocations/flux_text_to_image.py @@ -17,9 +17,9 @@ from invokeai.backend.flux.model import Flux from invokeai.backend.flux.modules.autoencoder import AutoEncoder from invokeai.backend.flux.sampling import denoise, get_noise, get_schedule, unpack +from invokeai.backend.model_manager.config import CheckpointConfigBase from invokeai.backend.stable_diffusion.diffusion.conditioning_data import FLUXConditioningInfo from invokeai.backend.util.devices import TorchDevice -from invokeai.backend.model_manager.config import CheckpointConfigBase @invocation( @@ -90,7 +90,11 @@ def _run_diffusion( img, img_ids = self._prepare_latent_img_patches(x) # HACK(ryand): Find a better way to determine if this is a schnell model or not. - is_schnell = "schnell" in transformer_info.config.config_path if transformer_info.config and isinstance(transformer_info.config, CheckpointConfigBase) else "" + is_schnell = ( + "schnell" in transformer_info.config.config_path + if transformer_info.config and isinstance(transformer_info.config, CheckpointConfigBase) + else "" + ) timesteps = get_schedule( num_steps=self.num_steps, image_seq_len=img.shape[1], @@ -161,7 +165,7 @@ def _run_vae_decoding( latents.to(torch.float32) img = vae.decode(latents) - img.clamp(-1, 1) + img = img.clamp(-1, 1) img = rearrange(img[0], "c h w -> h w c") img_pil = Image.fromarray((127.5 * (img + 1.0)).byte().cpu().numpy()) diff --git a/invokeai/backend/flux/sampling.py b/invokeai/backend/flux/sampling.py index 89d9d417e0d..3e3c933d4e2 100644 --- a/invokeai/backend/flux/sampling.py +++ b/invokeai/backend/flux/sampling.py @@ -104,9 +104,18 @@ def denoise( timesteps: list[float], guidance: float = 4.0, ): + dtype = model.txt_in.bias.dtype + + # TODO(ryand): This shouldn't be necessary if we manage the dtypes properly in the caller. + img = img.to(dtype=dtype) + img_ids = img_ids.to(dtype=dtype) + txt = txt.to(dtype=dtype) + txt_ids = txt_ids.to(dtype=dtype) + vec = vec.to(dtype=dtype) + # this is ignored for schnell guidance_vec = torch.full((img.shape[0],), guidance, device=img.device, dtype=img.dtype) - for t_curr, t_prev in zip(timesteps[:-1], timesteps[1:], strict=False): + for t_curr, t_prev in zip(timesteps[:-1], timesteps[1:], strict=True): t_vec = torch.full((img.shape[0],), t_curr, dtype=img.dtype, device=img.device) pred = model( img=img, diff --git a/invokeai/backend/model_manager/load/model_loaders/flux.py b/invokeai/backend/model_manager/load/model_loaders/flux.py index 11a6ebcf6d2..3ba933bf480 100644 --- a/invokeai/backend/model_manager/load/model_loaders/flux.py +++ b/invokeai/backend/model_manager/load/model_loaders/flux.py @@ -1,12 +1,12 @@ # Copyright (c) 2024, Brandon W. Rising and the InvokeAI Development Team """Class for Flux model loading in InvokeAI.""" -import accelerate -import torch from dataclasses import fields from pathlib import Path from typing import Any, Optional +import accelerate +import torch import yaml from safetensors.torch import load_file from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5Tokenizer @@ -25,15 +25,15 @@ from invokeai.backend.model_manager.config import ( CheckpointConfigBase, CLIPEmbedDiffusersConfig, - MainCheckpointConfig, MainBnbQuantized4bCheckpointConfig, + MainCheckpointConfig, T5EncoderConfig, VAECheckpointConfig, ) from invokeai.backend.model_manager.load.model_loader_registry import ModelLoaderRegistry from invokeai.backend.model_manager.load.model_loaders.generic_diffusers import GenericDiffusersLoader -from invokeai.backend.util.silence_warnings import SilenceWarnings from invokeai.backend.quantization.bnb_nf4 import quantize_model_nf4 +from invokeai.backend.util.silence_warnings import SilenceWarnings app_config = get_config() @@ -109,7 +109,9 @@ def _load_model( case SubModelType.Tokenizer2: return T5Tokenizer.from_pretrained(Path(config.path) / "tokenizer_2", max_length=512) case SubModelType.TextEncoder2: - return T5EncoderModel.from_pretrained(Path(config.path) / "text_encoder_2") #TODO: Fix hf subfolder install + return T5EncoderModel.from_pretrained( + Path(config.path) / "text_encoder_2" + ) # TODO: Fix hf subfolder install raise Exception("Only Checkpoint Flux models are currently supported.") @@ -153,7 +155,7 @@ def _load_from_singlefile( params = FluxParams(**filtered_data) with SilenceWarnings(): - model = load_class(params).to(self._torch_dtype) + model = load_class(params) sd = load_file(model_path) model.load_state_dict(sd, strict=False, assign=True) return model From 870ecd30d8725a0977d9a6d3dfa1effec91b4b33 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Tue, 20 Aug 2024 14:52:05 +0000 Subject: [PATCH 037/113] Add tqdm progress bar to FLUX denoising. --- invokeai/backend/flux/sampling.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/invokeai/backend/flux/sampling.py b/invokeai/backend/flux/sampling.py index 3e3c933d4e2..5d670c3e699 100644 --- a/invokeai/backend/flux/sampling.py +++ b/invokeai/backend/flux/sampling.py @@ -4,6 +4,7 @@ import torch from einops import rearrange, repeat from torch import Tensor +from tqdm import tqdm from .model import Flux from .modules.conditioner import HFEncoder @@ -115,7 +116,7 @@ def denoise( # this is ignored for schnell guidance_vec = torch.full((img.shape[0],), guidance, device=img.device, dtype=img.dtype) - for t_curr, t_prev in zip(timesteps[:-1], timesteps[1:], strict=True): + for t_curr, t_prev in tqdm(list(zip(timesteps[:-1], timesteps[1:], strict=True))): t_vec = torch.full((img.shape[0],), t_curr, dtype=img.dtype, device=img.device) pred = model( img=img, From 24829b9b28d5263bf97dcd4e2efc4a753a2ed3c0 Mon Sep 17 00:00:00 2001 From: Brandon Rising Date: Tue, 20 Aug 2024 12:37:12 -0400 Subject: [PATCH 038/113] Fix support for 8b quantized t5 encoders, update exception messages in flux loaders --- invokeai/app/invocations/model.py | 4 +- invokeai/backend/model_manager/config.py | 9 +++++ .../model_manager/load/model_loaders/flux.py | 39 +++++++++++++++---- 3 files changed, 42 insertions(+), 10 deletions(-) diff --git a/invokeai/app/invocations/model.py b/invokeai/app/invocations/model.py index 3d5f38927d7..9c9d8eb8342 100644 --- a/invokeai/app/invocations/model.py +++ b/invokeai/app/invocations/model.py @@ -143,9 +143,9 @@ def invoke(self, context: InvocationContext) -> ModelIdentifierOutput: "format": ModelFormat.T5Encoder, }, "8b_quantized": { - "repo": "invokeai/flux_dev::t5_xxl_encoder/optimum_quanto_qfloat8", + "repo": "invokeai/flux_schnell::t5_xxl_encoder/optimum_quanto_qfloat8", "name": "t5_8b_quantized_encoder", - "format": ModelFormat.T5Encoder, + "format": ModelFormat.T5Encoder8b, }, } diff --git a/invokeai/backend/model_manager/config.py b/invokeai/backend/model_manager/config.py index ce6b8ed8ccb..5dd74dbacca 100644 --- a/invokeai/backend/model_manager/config.py +++ b/invokeai/backend/model_manager/config.py @@ -225,6 +225,14 @@ def get_tag() -> Tag: return Tag(f"{ModelType.T5Encoder.value}.{ModelFormat.T5Encoder.value}") +class T5Encoder8bConfig(T5EncoderConfigBase): + format: Literal[ModelFormat.T5Encoder8b] = ModelFormat.T5Encoder8b + + @staticmethod + def get_tag() -> Tag: + return Tag(f"{ModelType.T5Encoder.value}.{ModelFormat.T5Encoder8b.value}") + + class LoRALyCORISConfig(LoRAConfigBase): """Model config for LoRA/Lycoris models.""" @@ -460,6 +468,7 @@ def get_model_discriminator_value(v: Any) -> str: Annotated[LoRALyCORISConfig, LoRALyCORISConfig.get_tag()], Annotated[LoRADiffusersConfig, LoRADiffusersConfig.get_tag()], Annotated[T5EncoderConfig, T5EncoderConfig.get_tag()], + Annotated[T5Encoder8bConfig, T5Encoder8bConfig.get_tag()], Annotated[TextualInversionFileConfig, TextualInversionFileConfig.get_tag()], Annotated[TextualInversionFolderConfig, TextualInversionFolderConfig.get_tag()], Annotated[IPAdapterInvokeAIConfig, IPAdapterInvokeAIConfig.get_tag()], diff --git a/invokeai/backend/model_manager/load/model_loaders/flux.py b/invokeai/backend/model_manager/load/model_loaders/flux.py index 3ba933bf480..58729369658 100644 --- a/invokeai/backend/model_manager/load/model_loaders/flux.py +++ b/invokeai/backend/model_manager/load/model_loaders/flux.py @@ -28,12 +28,14 @@ MainBnbQuantized4bCheckpointConfig, MainCheckpointConfig, T5EncoderConfig, + T5Encoder8bConfig, VAECheckpointConfig, ) from invokeai.backend.model_manager.load.model_loader_registry import ModelLoaderRegistry from invokeai.backend.model_manager.load.model_loaders.generic_diffusers import GenericDiffusersLoader from invokeai.backend.quantization.bnb_nf4 import quantize_model_nf4 from invokeai.backend.util.silence_warnings import SilenceWarnings +from invokeai.backend.quantization.fast_quantized_transformers_model import FastQuantizedTransformersModel app_config = get_config() @@ -82,7 +84,7 @@ def _load_model( submodel_type: Optional[SubModelType] = None, ) -> AnyModel: if not isinstance(config, CLIPEmbedDiffusersConfig): - raise Exception("Only Checkpoint Flux models are currently supported.") + raise Exception("Only CLIPEmbedDiffusersConfig models are currently supported here.") match submodel_type: case SubModelType.Tokenizer: @@ -90,7 +92,28 @@ def _load_model( case SubModelType.TextEncoder: return CLIPTextModel.from_pretrained(config.path) - raise Exception("Only Checkpoint Flux models are currently supported.") + raise Exception("Only Tokenizer and TextEncoder submodels are currently supported.") + + +@ModelLoaderRegistry.register(base=BaseModelType.Any, type=ModelType.T5Encoder, format=ModelFormat.T5Encoder8b) +class T5Encoder8bCheckpointModel(GenericDiffusersLoader): + """Class to load main models.""" + + def _load_model( + self, + config: AnyModelConfig, + submodel_type: Optional[SubModelType] = None, + ) -> AnyModel: + if not isinstance(config, T5Encoder8bConfig): + raise Exception("Only T5Encoder8bConfig models are currently supported here.") + + match submodel_type: + case SubModelType.Tokenizer2: + return T5Tokenizer.from_pretrained(Path(config.path) / "tokenizer_2", max_length=512) + case SubModelType.TextEncoder2: + return FastQuantizedTransformersModel.from_pretrained(Path(config.path) / "text_encoder_2") + + raise Exception("Only Tokenizer and TextEncoder submodels are currently supported.") @ModelLoaderRegistry.register(base=BaseModelType.Any, type=ModelType.T5Encoder, format=ModelFormat.T5Encoder) @@ -103,7 +126,7 @@ def _load_model( submodel_type: Optional[SubModelType] = None, ) -> AnyModel: if not isinstance(config, T5EncoderConfig): - raise Exception("Only Checkpoint Flux models are currently supported.") + raise Exception("Only T5EncoderConfig models are currently supported here.") match submodel_type: case SubModelType.Tokenizer2: @@ -113,7 +136,7 @@ def _load_model( Path(config.path) / "text_encoder_2" ) # TODO: Fix hf subfolder install - raise Exception("Only Checkpoint Flux models are currently supported.") + raise Exception("Only Tokenizer and TextEncoder submodels are currently supported.") @ModelLoaderRegistry.register(base=BaseModelType.Flux, type=ModelType.Main, format=ModelFormat.Checkpoint) @@ -126,7 +149,7 @@ def _load_model( submodel_type: Optional[SubModelType] = None, ) -> AnyModel: if not isinstance(config, CheckpointConfigBase): - raise Exception("Only Checkpoint Flux models are currently supported.") + raise Exception("Only CheckpointConfigBase models are currently supported here.") legacy_config_path = app_config.legacy_conf_path / config.config_path config_path = legacy_config_path.as_posix() with open(config_path, "r") as stream: @@ -139,7 +162,7 @@ def _load_model( case SubModelType.Transformer: return self._load_from_singlefile(config, flux_conf) - raise Exception("Only Checkpoint Flux models are currently supported.") + raise Exception("Only Transformer submodels are currently supported.") def _load_from_singlefile( self, @@ -171,7 +194,7 @@ def _load_model( submodel_type: Optional[SubModelType] = None, ) -> AnyModel: if not isinstance(config, CheckpointConfigBase): - raise Exception("Only Checkpoint Flux models are currently supported.") + raise Exception("Only CheckpointConfigBase models are currently supported here.") legacy_config_path = app_config.legacy_conf_path / config.config_path config_path = legacy_config_path.as_posix() with open(config_path, "r") as stream: @@ -184,7 +207,7 @@ def _load_model( case SubModelType.Transformer: return self._load_from_singlefile(config, flux_conf) - raise Exception("Only Checkpoint Flux models are currently supported.") + raise Exception("Only Transformer submodels are currently supported.") def _load_from_singlefile( self, From f36c6d0fffcce42074db1b171409747f1591b5e6 Mon Sep 17 00:00:00 2001 From: Brandon Rising Date: Tue, 20 Aug 2024 13:05:31 -0400 Subject: [PATCH 039/113] Fix styling/lint --- invokeai/app/invocations/flux_text_encoder.py | 3 +- invokeai/app/invocations/model.py | 14 +- .../model_install/model_install_default.py | 2 +- invokeai/backend/bnb.py | 517 ------------------ invokeai/backend/flux/modules/layers.py | 2 +- invokeai/backend/flux/sampling.py | 4 +- invokeai/backend/load_flux_model.py | 129 ----- .../load_flux_model_bnb_llm_int8_old.py | 124 ----- invokeai/backend/model_manager/config.py | 4 +- .../model_manager/load/model_loaders/flux.py | 4 +- invokeai/backend/model_manager/probe.py | 10 +- .../load_flux_model_bnb_llm_int8.py | 2 +- 12 files changed, 27 insertions(+), 788 deletions(-) delete mode 100644 invokeai/backend/bnb.py delete mode 100644 invokeai/backend/load_flux_model.py delete mode 100644 invokeai/backend/load_flux_model_bnb_llm_int8_old.py diff --git a/invokeai/app/invocations/flux_text_encoder.py b/invokeai/app/invocations/flux_text_encoder.py index 7b3f0745562..a57124d2bc8 100644 --- a/invokeai/app/invocations/flux_text_encoder.py +++ b/invokeai/app/invocations/flux_text_encoder.py @@ -1,5 +1,6 @@ -import torch from typing import Literal + +import torch from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5Tokenizer from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation diff --git a/invokeai/app/invocations/model.py b/invokeai/app/invocations/model.py index 9c9d8eb8342..e104dacde0e 100644 --- a/invokeai/app/invocations/model.py +++ b/invokeai/app/invocations/model.py @@ -1,8 +1,8 @@ import copy -import yaml from time import sleep from typing import Dict, List, Literal, Optional +import yaml from pydantic import BaseModel, Field from invokeai.app.invocations.baseinvocation import ( @@ -16,8 +16,14 @@ from invokeai.app.services.model_records import ModelRecordChanges from invokeai.app.services.shared.invocation_context import InvocationContext from invokeai.app.shared.models import FreeUConfig -from invokeai.backend.model_manager.config import AnyModelConfig, BaseModelType, ModelFormat, ModelType, SubModelType -from invokeai.backend.model_manager.config import CheckpointConfigBase +from invokeai.backend.model_manager.config import ( + AnyModelConfig, + BaseModelType, + CheckpointConfigBase, + ModelFormat, + ModelType, + SubModelType, +) class ModelIdentifierField(BaseModel): @@ -207,7 +213,7 @@ def invoke(self, context: InvocationContext) -> FluxModelLoaderOutput: clip=CLIPField(tokenizer=tokenizer, text_encoder=clip_encoder, loras=[], skipped_layers=0), t5_encoder=T5EncoderField(tokenizer=tokenizer2, text_encoder=t5_encoder), vae=VAEField(vae=vae), - max_seq_len=flux_conf['max_seq_len'] + max_seq_len=flux_conf["max_seq_len"], ) def _get_model(self, context: InvocationContext, submodel: SubModelType) -> ModelIdentifierField: diff --git a/invokeai/app/services/model_install/model_install_default.py b/invokeai/app/services/model_install/model_install_default.py index 0369b86fb42..4ff48034385 100644 --- a/invokeai/app/services/model_install/model_install_default.py +++ b/invokeai/app/services/model_install/model_install_default.py @@ -784,7 +784,7 @@ def _multifile_download( if subfolder: top = Path(remote_files[0].path.parts[0]) # e.g. "sdxl-turbo/" path_to_remove = top / subfolder # sdxl-turbo/vae/ - subfolder_rename = subfolder.name.replace('/', '_').replace('\\', '_') + subfolder_rename = subfolder.name.replace("/", "_").replace("\\", "_") path_to_add = Path(f"{top}_{subfolder_rename}") else: path_to_remove = Path(".") diff --git a/invokeai/backend/bnb.py b/invokeai/backend/bnb.py deleted file mode 100644 index 1022a1d1dcc..00000000000 --- a/invokeai/backend/bnb.py +++ /dev/null @@ -1,517 +0,0 @@ -from typing import Any, Optional, Set, Type - -import bitsandbytes as bnb -import torch - -# The utils in this file take ideas from -# https://github.com/Lightning-AI/pytorch-lightning/blob/1551a16b94f5234a4a78801098f64d0732ef5cb5/src/lightning/fabric/plugins/precision/bitsandbytes.py - - -# Patterns: -# - Quantize: -# - Initialize model on meta device -# - Replace layers -# - Load state_dict to cpu -# - Load state_dict into model -# - Quantize on GPU -# - Extract state_dict -# - Save - -# - Load: -# - Initialize model on meta device -# - Replace layers -# - Load state_dict to cpu -# - Load state_dict into model on cpu -# - Move to GPU - - -# class InvokeInt8Params(bnb.nn.Int8Params): -# """Overrides `bnb.nn.Int8Params` to add the following functionality: -# - Make it possible to load a quantized state dict without putting the weight on a "cuda" device. -# """ - -# def quantize(self, device: Optional[torch.device] = None): -# device = device or torch.device("cuda") -# if device.type != "cuda": -# raise RuntimeError(f"Int8Params quantization is only supported on CUDA devices ({device=}).") - -# # https://github.com/TimDettmers/bitsandbytes/blob/0.41.0/bitsandbytes/nn/modules.py#L291-L302 -# B = self.data.contiguous().half().cuda(device) -# if self.has_fp16_weights: -# self.data = B -# else: -# # we store the 8-bit rows-major weight -# # we convert this weight to the turning/ampere weight during the first inference pass -# CB, CBt, SCB, SCBt, coo_tensorB = bnb.functional.double_quant(B) -# del CBt -# del SCBt -# self.data = CB -# self.CB = CB -# self.SCB = SCB - - -class Invoke2Linear8bitLt(torch.nn.Linear): - """This class is the base module for the [LLM.int8()](https://arxiv.org/abs/2208.07339) algorithm.""" - - def __init__( - self, - input_features: int, - output_features: int, - bias=True, - has_fp16_weights=True, - memory_efficient_backward=False, - threshold=0.0, - index=None, - device=None, - ): - """ - Initialize Linear8bitLt class. - - Args: - input_features (`int`): - Number of input features of the linear layer. - output_features (`int`): - Number of output features of the linear layer. - bias (`bool`, defaults to `True`): - Whether the linear class uses the bias term as well. - """ - super().__init__(input_features, output_features, bias, device) - assert not memory_efficient_backward, "memory_efficient_backward is no longer required and the argument is deprecated in 0.37.0 and will be removed in 0.39.0" - self.state = bnb.MatmulLtState() - self.index = index - - self.state.threshold = threshold - self.state.has_fp16_weights = has_fp16_weights - self.state.memory_efficient_backward = memory_efficient_backward - if threshold > 0.0 and not has_fp16_weights: - self.state.use_pool = True - - self.weight = Int8Params(self.weight.data, has_fp16_weights=has_fp16_weights, requires_grad=has_fp16_weights) - self._register_load_state_dict_pre_hook(maybe_rearrange_weight) - - def _save_to_state_dict(self, destination, prefix, keep_vars): - super()._save_to_state_dict(destination, prefix, keep_vars) - - # we only need to save SCB as extra data, because CB for quantized weights is already stored in weight.data - scb_name = "SCB" - - # case 1: .cuda was called, SCB is in self.weight - param_from_weight = getattr(self.weight, scb_name) - # case 2: self.init_8bit_state was called, SCB is in self.state - param_from_state = getattr(self.state, scb_name) - # case 3: SCB is in self.state, weight layout reordered after first forward() - layout_reordered = self.state.CxB is not None - - key_name = prefix + f"{scb_name}" - format_name = prefix + "weight_format" - - if not self.state.has_fp16_weights: - if param_from_weight is not None: - destination[key_name] = param_from_weight if keep_vars else param_from_weight.detach() - destination[format_name] = torch.tensor(0, dtype=torch.uint8) - elif param_from_state is not None and not layout_reordered: - destination[key_name] = param_from_state if keep_vars else param_from_state.detach() - destination[format_name] = torch.tensor(0, dtype=torch.uint8) - elif param_from_state is not None: - destination[key_name] = param_from_state if keep_vars else param_from_state.detach() - weights_format = self.state.formatB - # At this point `weights_format` is an str - if weights_format not in LINEAR_8BIT_WEIGHTS_FORMAT_MAPPING: - raise ValueError(f"Unrecognized weights format {weights_format}") - - weights_format = LINEAR_8BIT_WEIGHTS_FORMAT_MAPPING[weights_format] - - destination[format_name] = torch.tensor(weights_format, dtype=torch.uint8) - - def _load_from_state_dict( - self, - state_dict, - prefix, - local_metadata, - strict, - missing_keys, - unexpected_keys, - error_msgs, - ): - super()._load_from_state_dict( - state_dict, - prefix, - local_metadata, - strict, - missing_keys, - unexpected_keys, - error_msgs, - ) - unexpected_copy = list(unexpected_keys) - - for key in unexpected_copy: - input_name = key[len(prefix) :] - if input_name == "SCB": - if self.weight.SCB is None: - # buffers not yet initialized, can't access them directly without quantizing first - raise RuntimeError( - "Loading a quantized checkpoint into non-quantized Linear8bitLt is " - "not supported. Please call module.cuda() before module.load_state_dict()", - ) - - input_param = state_dict[key] - self.weight.SCB.copy_(input_param) - - if self.state.SCB is not None: - self.state.SCB = self.weight.SCB - - unexpected_keys.remove(key) - - def init_8bit_state(self): - self.state.CB = self.weight.CB - self.state.SCB = self.weight.SCB - self.weight.CB = None - self.weight.SCB = None - - def forward(self, x: torch.Tensor): - self.state.is_training = self.training - if self.weight.CB is not None: - self.init_8bit_state() - - # weights are cast automatically as Int8Params, but the bias has to be cast manually - if self.bias is not None and self.bias.dtype != x.dtype: - self.bias.data = self.bias.data.to(x.dtype) - - out = bnb.matmul(x, self.weight, bias=self.bias, state=self.state) - - if not self.state.has_fp16_weights: - if self.state.CB is not None and self.state.CxB is not None: - # we converted 8-bit row major to turing/ampere format in the first inference pass - # we no longer need the row-major weight - del self.state.CB - self.weight.data = self.state.CxB - return out - - -class InvokeLinear8bitLt(bnb.nn.Linear8bitLt): - """Wraps `bnb.nn.Linear8bitLt` and adds the following functionality: - - enables instantiation directly on the device - - re-quantizaton when loading the state dict - """ - - def __init__( - self, *args: Any, device: Optional[torch.device] = None, threshold: float = 6.0, **kwargs: Any - ) -> None: - super().__init__(*args, device=device, threshold=threshold, **kwargs) - # If the device is CUDA or we are under a CUDA context manager, quantize the weight here, so we don't end up - # filling the device memory with float32 weights which could lead to OOM - # if torch.tensor(0, device=device).device.type == "cuda": - # self.quantize_() - # self._register_load_state_dict_pre_hook(partial(_quantize_on_load_hook, self.quantize_)) - # self.register_load_state_dict_post_hook(_ignore_missing_weights_hook) - - def _load_from_state_dict( - self, - state_dict, - prefix, - local_metadata, - strict, - missing_keys, - unexpected_keys, - error_msgs, - ): - super()._load_from_state_dict( - state_dict, - prefix, - local_metadata, - strict, - missing_keys, - unexpected_keys, - error_msgs, - ) - unexpected_copy = list(unexpected_keys) - - for key in unexpected_copy: - input_name = key[len(prefix) :] - if input_name == "SCB": - if self.weight.SCB is None: - # buffers not yet initialized, can't access them directly without quantizing first - raise RuntimeError( - "Loading a quantized checkpoint into non-quantized Linear8bitLt is " - "not supported. Please call module.cuda() before module.load_state_dict()", - ) - - input_param = state_dict[key] - self.weight.SCB.copy_(input_param) - - if self.state.SCB is not None: - self.state.SCB = self.weight.SCB - - unexpected_keys.remove(key) - - def quantize_(self, weight: Optional[torch.Tensor] = None, device: Optional[torch.device] = None) -> None: - """Inplace quantize.""" - if weight is None: - weight = self.weight.data - if weight.data.dtype == torch.int8: - # already quantized - return - assert isinstance(self.weight, bnb.nn.Int8Params) - self.weight = self.quantize(self.weight, weight, device) - - @staticmethod - def quantize( - int8params: bnb.nn.Int8Params, weight: torch.Tensor, device: Optional[torch.device] - ) -> bnb.nn.Int8Params: - device = device or torch.device("cuda") - if device.type != "cuda": - raise RuntimeError(f"Unexpected device type: {device.type}") - # https://github.com/TimDettmers/bitsandbytes/blob/0.41.0/bitsandbytes/nn/modules.py#L291-L302 - B = weight.contiguous().to(device=device, dtype=torch.float16) - if int8params.has_fp16_weights: - int8params.data = B - else: - CB, CBt, SCB, SCBt, _ = bnb.functional.double_quant(B) - del CBt - del SCBt - int8params.data = CB - int8params.CB = CB - int8params.SCB = SCB - return int8params - - -# class _Linear4bit(bnb.nn.Linear4bit): -# """Wraps `bnb.nn.Linear4bit` to enable: instantiation directly on the device, re-quantizaton when loading the -# state dict, meta-device initialization, and materialization.""" - -# def __init__(self, *args: Any, device: Optional[torch.device] = None, **kwargs: Any) -> None: -# super().__init__(*args, device=device, **kwargs) -# self.weight = cast(bnb.nn.Params4bit, self.weight) # type: ignore[has-type] -# self.bias = cast(Optional[torch.nn.Parameter], self.bias) # type: ignore[has-type] -# # if the device is CUDA or we are under a CUDA context manager, quantize the weight here, so we don't end up -# # filling the device memory with float32 weights which could lead to OOM -# if torch.tensor(0, device=device).device.type == "cuda": -# self.quantize_() -# self._register_load_state_dict_pre_hook(partial(_quantize_on_load_hook, self.quantize_)) -# self.register_load_state_dict_post_hook(_ignore_missing_weights_hook) - -# def quantize_(self, weight: Optional[torch.Tensor] = None, device: Optional[torch.device] = None) -> None: -# """Inplace quantize.""" -# if weight is None: -# weight = self.weight.data -# if weight.data.dtype == torch.uint8: -# # already quantized -# return -# assert isinstance(self.weight, bnb.nn.Params4bit) -# self.weight = self.quantize(self.weight, weight, device) - -# @staticmethod -# def quantize( -# params4bit: bnb.nn.Params4bit, weight: torch.Tensor, device: Optional[torch.device] -# ) -> bnb.nn.Params4bit: -# device = device or torch.device("cuda") -# if device.type != "cuda": -# raise RuntimeError(f"Unexpected device type: {device.type}") -# # https://github.com/TimDettmers/bitsandbytes/blob/0.41.0/bitsandbytes/nn/modules.py#L156-L159 -# w = weight.contiguous().to(device=device, dtype=torch.half) -# w_4bit, quant_state = bnb.functional.quantize_4bit( -# w, -# blocksize=params4bit.blocksize, -# compress_statistics=params4bit.compress_statistics, -# quant_type=params4bit.quant_type, -# ) -# return _replace_param(params4bit, w_4bit, quant_state) - -# def to_empty(self, *, device: _DEVICE, recurse: bool = True) -> Self: -# if self.weight.dtype == torch.uint8: # was quantized -# # cannot init the quantized params directly -# weight = torch.empty(self.weight.quant_state.shape, device=device, dtype=torch.half) -# else: -# weight = torch.empty_like(self.weight.data, device=device) -# device = torch.device(device) -# if device.type == "cuda": # re-quantize -# self.quantize_(weight, device) -# else: -# self.weight = _replace_param(self.weight, weight) -# if self.bias is not None: -# self.bias = _replace_param(self.bias, torch.empty_like(self.bias, device=device)) -# return self - - -def convert_model_to_bnb_llm_int8(model: torch.nn.Module, ignore_modules: set[str]): - linear_cls = InvokeLinear8bitLt - _convert_linear_layers(model, linear_cls, ignore_modules) - - # TODO(ryand): Is this necessary? - # set the compute dtype if necessary - # for m in model.modules(): - # if isinstance(m, bnb.nn.Linear4bit): - # m.compute_dtype = self.dtype - # m.compute_type_is_set = False - - -# class BitsandbytesPrecision(Precision): -# """Plugin for quantizing weights with `bitsandbytes `__. - -# .. warning:: This is an :ref:`experimental ` feature. - -# .. note:: -# The optimizer is not automatically replaced with ``bitsandbytes.optim.Adam8bit`` or equivalent 8-bit optimizers. - -# Args: -# mode: The quantization mode to use. -# dtype: The compute dtype to use. -# ignore_modules: The submodules whose Linear layers should not be replaced, for example. ``{"lm_head"}``. -# This might be desirable for numerical stability. The string will be checked in as a prefix, so a value like -# "transformer.blocks" will ignore all linear layers in all of the transformer blocks. -# """ - -# def __init__( -# self, -# mode: Literal["nf4", "nf4-dq", "fp4", "fp4-dq", "int8", "int8-training"], -# dtype: Optional[torch.dtype] = None, -# ignore_modules: Optional[Set[str]] = None, -# ) -> None: -# if dtype is None: -# # try to be smart about the default selection -# if mode.startswith("int8"): -# dtype = torch.float16 -# else: -# dtype = ( -# torch.bfloat16 if torch.cuda.is_available() and torch.cuda.is_bf16_supported() else torch.float16 -# ) -# if mode.startswith("int8") and dtype is not torch.float16: -# # this limitation is mentioned in https://huggingface.co/blog/hf-bitsandbytes-integration#usage -# raise ValueError(f"{mode!r} only works with `dtype=torch.float16`, but you chose `{dtype}`") - -# globals_ = globals() -# mode_to_cls = { -# "nf4": globals_["_NF4Linear"], -# "nf4-dq": globals_["_NF4DQLinear"], -# "fp4": globals_["_FP4Linear"], -# "fp4-dq": globals_["_FP4DQLinear"], -# "int8-training": globals_["_Linear8bitLt"], -# "int8": globals_["_Int8LinearInference"], -# } -# self._linear_cls = mode_to_cls[mode] -# self.dtype = dtype -# self.ignore_modules = ignore_modules or set() - -# @override -# def convert_module(self, module: torch.nn.Module) -> torch.nn.Module: -# # avoid naive users thinking they quantized their model -# if not any(isinstance(m, torch.nn.Linear) for m in module.modules()): -# raise TypeError( -# "You are using the bitsandbytes precision plugin, but your model has no Linear layers. This plugin" -# " won't work for your model." -# ) - -# # convert modules if they haven't been converted already -# if not any(isinstance(m, (bnb.nn.Linear8bitLt, bnb.nn.Linear4bit)) for m in module.modules()): -# # this will not quantize the model but only replace the layer classes -# _convert_layers(module, self._linear_cls, self.ignore_modules) - -# # set the compute dtype if necessary -# for m in module.modules(): -# if isinstance(m, bnb.nn.Linear4bit): -# m.compute_dtype = self.dtype -# m.compute_type_is_set = False -# return module - - -# def _quantize_on_load_hook(quantize_fn: Callable[[torch.Tensor], None], state_dict: OrderedDict, *_: Any) -> None: -# # There is only one key that ends with `*.weight`, the other one is the bias -# weight_key = next((name for name in state_dict if name.endswith("weight")), None) -# if weight_key is None: -# return -# # Load the weight from the state dict and re-quantize it -# weight = state_dict.pop(weight_key) -# quantize_fn(weight) - - -# def _ignore_missing_weights_hook(module: torch.nn.Module, incompatible_keys: _IncompatibleKeys) -> None: -# # since we manually loaded the weight in the `_quantize_on_load_hook` hook, we need to avoid this missing key false -# # positive -# for key in reversed(incompatible_keys.missing_keys): -# if key.endswith("weight"): -# incompatible_keys.missing_keys.remove(key) - - -def _convert_linear_layers( - module: torch.nn.Module, linear_cls: Type, ignore_modules: Set[str], prefix: str = "" -) -> None: - for name, child in module.named_children(): - fullname = f"{prefix}.{name}" if prefix else name - if isinstance(child, torch.nn.Linear) and not any(fullname.startswith(s) for s in ignore_modules): - has_bias = child.bias is not None - # since we are going to copy over the child's data, the device doesn't matter. I chose CPU - # to avoid spiking CUDA memory even though initialization is slower - # 4bit layers support quantizing from meta-device params so this is only relevant for 8-bit - _Linear4bit = globals()["_Linear4bit"] - device = torch.device("meta" if issubclass(linear_cls, _Linear4bit) else "cpu") - replacement = linear_cls( - child.in_features, - child.out_features, - bias=has_bias, - device=device, - ) - if has_bias: - replacement.bias = _replace_param(replacement.bias, child.bias.data.clone()) - state = {"quant_state": replacement.weight.quant_state if issubclass(linear_cls, _Linear4bit) else None} - replacement.weight = _replace_param(replacement.weight, child.weight.data.clone(), **state) - module.__setattr__(name, replacement) - else: - _convert_linear_layers(child, linear_cls, ignore_modules, prefix=fullname) - - -# def _replace_linear_layers( -# model: torch.nn.Module, -# linear_layer_type: Literal["Linear8bitLt", "Linear4bit"], -# modules_to_not_convert: set[str], -# current_key_name: str | None = None, -# ): -# has_been_replaced = False -# for name, module in model.named_children(): -# if current_key_name is None: -# current_key_name = [] -# current_key_name.append(name) -# if isinstance(module, torch.nn.Linear) and name not in modules_to_not_convert: -# # Check if the current key is not in the `modules_to_not_convert` -# current_key_name_str = ".".join(current_key_name) -# proceed = True -# for key in modules_to_not_convert: -# if ( -# (key in current_key_name_str) and (key + "." in current_key_name_str) -# ) or key == current_key_name_str: -# proceed = False -# break -# if proceed: -# # Load bnb module with empty weight and replace ``nn.Linear` module -# if bnb_quantization_config.load_in_8bit: -# bnb_module = bnb.nn.Linear8bitLt( -# module.in_features, -# module.out_features, -# module.bias is not None, -# has_fp16_weights=False, -# threshold=bnb_quantization_config.llm_int8_threshold, -# ) -# elif bnb_quantization_config.load_in_4bit: -# bnb_module = bnb.nn.Linear4bit( -# module.in_features, -# module.out_features, -# module.bias is not None, -# bnb_quantization_config.bnb_4bit_compute_dtype, -# compress_statistics=bnb_quantization_config.bnb_4bit_use_double_quant, -# quant_type=bnb_quantization_config.bnb_4bit_quant_type, -# ) -# else: -# raise ValueError("load_in_8bit and load_in_4bit can't be both False") -# bnb_module.weight.data = module.weight.data -# if module.bias is not None: -# bnb_module.bias.data = module.bias.data -# bnb_module.requires_grad_(False) -# setattr(model, name, bnb_module) -# has_been_replaced = True -# if len(list(module.children())) > 0: -# _, _has_been_replaced = _replace_with_bnb_layers( -# module, bnb_quantization_config, modules_to_not_convert, current_key_name -# ) -# has_been_replaced = has_been_replaced | _has_been_replaced -# # Remove the last key for recursion -# current_key_name.pop(-1) -# return model, has_been_replaced diff --git a/invokeai/backend/flux/modules/layers.py b/invokeai/backend/flux/modules/layers.py index 4f9d515dafc..d93dddba0fc 100644 --- a/invokeai/backend/flux/modules/layers.py +++ b/invokeai/backend/flux/modules/layers.py @@ -5,7 +5,7 @@ from einops import rearrange from torch import Tensor, nn -from ..math import attention, rope +from invokeai.backend.flux.math import attention, rope class EmbedND(nn.Module): diff --git a/invokeai/backend/flux/sampling.py b/invokeai/backend/flux/sampling.py index 5d670c3e699..675728a94b0 100644 --- a/invokeai/backend/flux/sampling.py +++ b/invokeai/backend/flux/sampling.py @@ -6,8 +6,8 @@ from torch import Tensor from tqdm import tqdm -from .model import Flux -from .modules.conditioner import HFEncoder +from invokeai.backend.flux.model import Flux +from invokeai.backend.flux.modules.conditioner import HFEncoder def get_noise( diff --git a/invokeai/backend/load_flux_model.py b/invokeai/backend/load_flux_model.py deleted file mode 100644 index 92731223963..00000000000 --- a/invokeai/backend/load_flux_model.py +++ /dev/null @@ -1,129 +0,0 @@ -import json -import os -import time -from pathlib import Path -from typing import Union - -import torch -from diffusers.models.model_loading_utils import load_state_dict -from diffusers.models.transformers.transformer_flux import FluxTransformer2DModel -from diffusers.utils import ( - CONFIG_NAME, - SAFE_WEIGHTS_INDEX_NAME, - SAFETENSORS_WEIGHTS_NAME, - _get_checkpoint_shard_files, - is_accelerate_available, -) -from optimum.quanto import qfloat8 -from optimum.quanto.models import QuantizedDiffusersModel -from optimum.quanto.models.shared_dict import ShardedStateDict - -from invokeai.backend.requantize import requantize - - -class QuantizedFluxTransformer2DModel(QuantizedDiffusersModel): - base_class = FluxTransformer2DModel - - @classmethod - def from_pretrained(cls, model_name_or_path: Union[str, os.PathLike]): - if cls.base_class is None: - raise ValueError("The `base_class` attribute needs to be configured.") - - if not is_accelerate_available(): - raise ValueError("Reloading a quantized diffusers model requires the accelerate library.") - from accelerate import init_empty_weights - - if os.path.isdir(model_name_or_path): - # Look for a quantization map - qmap_path = os.path.join(model_name_or_path, cls._qmap_name()) - if not os.path.exists(qmap_path): - raise ValueError(f"No quantization map found in {model_name_or_path}: is this a quantized model ?") - - # Look for original model config file. - model_config_path = os.path.join(model_name_or_path, CONFIG_NAME) - if not os.path.exists(model_config_path): - raise ValueError(f"{CONFIG_NAME} not found in {model_name_or_path}.") - - with open(qmap_path, "r", encoding="utf-8") as f: - qmap = json.load(f) - - with open(model_config_path, "r", encoding="utf-8") as f: - original_model_cls_name = json.load(f)["_class_name"] - configured_cls_name = cls.base_class.__name__ - if configured_cls_name != original_model_cls_name: - raise ValueError( - f"Configured base class ({configured_cls_name}) differs from what was derived from the provided configuration ({original_model_cls_name})." - ) - - # Create an empty model - config = cls.base_class.load_config(model_name_or_path) - with init_empty_weights(): - model = cls.base_class.from_config(config) - - # Look for the index of a sharded checkpoint - checkpoint_file = os.path.join(model_name_or_path, SAFE_WEIGHTS_INDEX_NAME) - if os.path.exists(checkpoint_file): - # Convert the checkpoint path to a list of shards - _, sharded_metadata = _get_checkpoint_shard_files(model_name_or_path, checkpoint_file) - # Create a mapping for the sharded safetensor files - state_dict = ShardedStateDict(model_name_or_path, sharded_metadata["weight_map"]) - else: - # Look for a single checkpoint file - checkpoint_file = os.path.join(model_name_or_path, SAFETENSORS_WEIGHTS_NAME) - if not os.path.exists(checkpoint_file): - raise ValueError(f"No safetensor weights found in {model_name_or_path}.") - # Get state_dict from model checkpoint - state_dict = load_state_dict(checkpoint_file) - - # Requantize and load quantized weights from state_dict - requantize(model, state_dict=state_dict, quantization_map=qmap) - model.eval() - return cls(model) - else: - raise NotImplementedError("Reloading quantized models directly from the hub is not supported yet.") - - -def load_flux_transformer(path: Path) -> FluxTransformer2DModel: - # model = FluxTransformer2DModel.from_pretrained(path, local_files_only=True, torch_dtype=torch.bfloat16) - model_8bit_path = path / "quantized" - if model_8bit_path.exists(): - # The quantized model exists, load it. - # TODO(ryand): The requantize(...) operation in from_pretrained(...) is very slow. This seems like - # something that we should be able to make much faster. - q_model = QuantizedFluxTransformer2DModel.from_pretrained(model_8bit_path) - - # Access the underlying wrapped model. - # We access the wrapped model, even though it is private, because it simplifies the type checking by - # always returning a FluxTransformer2DModel from this function. - model = q_model._wrapped - else: - # The quantized model does not exist yet, quantize and save it. - # TODO(ryand): Loading in float16 and then quantizing seems to result in NaNs. In order to run this on - # GPUs that don't support bfloat16, we would need to host the quantized model instead of generating it - # here. - model = FluxTransformer2DModel.from_pretrained(path, local_files_only=True, torch_dtype=torch.bfloat16) - assert isinstance(model, FluxTransformer2DModel) - - q_model = QuantizedFluxTransformer2DModel.quantize(model, weights=qfloat8) - - model_8bit_path.mkdir(parents=True, exist_ok=True) - q_model.save_pretrained(model_8bit_path) - - # (See earlier comment about accessing the wrapped model.) - model = q_model._wrapped - - assert isinstance(model, FluxTransformer2DModel) - return model - - -def main(): - start = time.time() - model = load_flux_transformer( - Path("/data/invokeai/models/.download_cache/black-forest-labs_flux.1-schnell/FLUX.1-schnell/transformer/") - ) - print(f"Time to load: {time.time() - start}s") - print("hi") - - -if __name__ == "__main__": - main() diff --git a/invokeai/backend/load_flux_model_bnb_llm_int8_old.py b/invokeai/backend/load_flux_model_bnb_llm_int8_old.py deleted file mode 100644 index f7e1471928e..00000000000 --- a/invokeai/backend/load_flux_model_bnb_llm_int8_old.py +++ /dev/null @@ -1,124 +0,0 @@ -import time -from pathlib import Path - -import accelerate -import torch -from accelerate.utils import BnbQuantizationConfig, load_and_quantize_model -from accelerate.utils.bnb import get_keys_to_not_convert -from diffusers.models.transformers.transformer_flux import FluxTransformer2DModel -from safetensors.torch import load_file - -from invokeai.backend.bnb import quantize_model_llm_int8 - -# Docs: -# https://huggingface.co/docs/accelerate/usage_guides/quantization -# https://huggingface.co/docs/bitsandbytes/v0.43.3/en/integrations#accelerate - - -def get_parameter_device(parameter: torch.nn.Module): - return next(parameter.parameters()).device - - -# def quantize_model_llm_int8(model: torch.nn.Module, modules_to_not_convert: set[str], llm_int8_threshold: int = 6): -# """Apply bitsandbytes LLM.8bit() quantization to the model.""" -# model_device = get_parameter_device(model) -# if model_device.type != "meta": -# # Note: This is not strictly required, but I can't think of a good reason to quantize a model that's not on the -# # meta device, so we enforce it for now. -# raise RuntimeError("The model should be on the meta device to apply LLM.8bit() quantization.") - -# bnb_quantization_config = BnbQuantizationConfig( -# load_in_8bit=True, -# llm_int8_threshold=llm_int8_threshold, -# ) - -# with accelerate.init_empty_weights(): -# model = replace_with_bnb_layers(model, bnb_quantization_config, modules_to_not_convert=modules_to_not_convert) - -# return model - - -def load_flux_transformer(path: Path) -> FluxTransformer2DModel: - model_config = FluxTransformer2DModel.load_config(path, local_files_only=True) - with accelerate.init_empty_weights(): - empty_model = FluxTransformer2DModel.from_config(model_config) - assert isinstance(empty_model, FluxTransformer2DModel) - - bnb_quantization_config = BnbQuantizationConfig( - load_in_8bit=True, - llm_int8_threshold=6, - ) - - model_8bit_path = path / "bnb_llm_int8" - if model_8bit_path.exists(): - # The quantized model already exists, load it and return it. - # Note that the model loading code is the same when loading from quantized vs original weights. The only - # difference is the weights_location. - # model = load_and_quantize_model( - # empty_model, - # weights_location=model_8bit_path, - # bnb_quantization_config=bnb_quantization_config, - # # device_map="auto", - # device_map={"": "cpu"}, - # ) - - # TODO: Handle the keys that were not quantized (get_keys_to_not_convert). - model = quantize_model_llm_int8(empty_model, modules_to_not_convert=set()) - - # model = quantize_model_llm_int8(empty_model, set()) - - # Load sharded state dict. - files = list(path.glob("*.safetensors")) - state_dict = dict() - for file in files: - sd = load_file(file) - state_dict.update(sd) - - else: - # The quantized model does not exist yet, quantize and save it. - model = load_and_quantize_model( - empty_model, - weights_location=path, - bnb_quantization_config=bnb_quantization_config, - device_map="auto", - ) - - keys_to_not_convert = get_keys_to_not_convert(empty_model) # TODO - - model_8bit_path.mkdir(parents=True, exist_ok=True) - accl = accelerate.Accelerator() - accl.save_model(model, model_8bit_path) - - # --------------------- - - # model = quantize_model_llm_int8(empty_model, set()) - - # # Load sharded state dict. - # files = list(path.glob("*.safetensors")) - # state_dict = dict() - # for file in files: - # sd = load_file(file) - # state_dict.update(sd) - - # # Load the state dict into the model. The bitsandbytes layers know how to load from both quantized and - # # non-quantized state dicts. - # result = model.load_state_dict(state_dict, strict=True) - # model = model.to("cuda") - - # --------------------- - - assert isinstance(model, FluxTransformer2DModel) - return model - - -def main(): - start = time.time() - model = load_flux_transformer( - Path("/data/invokeai/models/.download_cache/black-forest-labs_flux.1-schnell/FLUX.1-schnell/transformer/") - ) - print(f"Time to load: {time.time() - start}s") - print("hi") - - -if __name__ == "__main__": - main() diff --git a/invokeai/backend/model_manager/config.py b/invokeai/backend/model_manager/config.py index 5dd74dbacca..34cc993d39c 100644 --- a/invokeai/backend/model_manager/config.py +++ b/invokeai/backend/model_manager/config.py @@ -194,7 +194,9 @@ def json_schema_extra(schema: dict[str, Any], model_class: Type[BaseModel]) -> N class CheckpointConfigBase(ModelConfigBase): """Model config for checkpoint-style models.""" - format: Literal[ModelFormat.Checkpoint, ModelFormat.BnbQuantizednf4b] = Field(description="Format of the provided checkpoint model", default=ModelFormat.Checkpoint) + format: Literal[ModelFormat.Checkpoint, ModelFormat.BnbQuantizednf4b] = Field( + description="Format of the provided checkpoint model", default=ModelFormat.Checkpoint + ) config_path: str = Field(description="path to the checkpoint model config file") converted_at: Optional[float] = Field( description="When this model was last converted to diffusers", default_factory=time.time diff --git a/invokeai/backend/model_manager/load/model_loaders/flux.py b/invokeai/backend/model_manager/load/model_loaders/flux.py index 58729369658..6502339a243 100644 --- a/invokeai/backend/model_manager/load/model_loaders/flux.py +++ b/invokeai/backend/model_manager/load/model_loaders/flux.py @@ -27,15 +27,15 @@ CLIPEmbedDiffusersConfig, MainBnbQuantized4bCheckpointConfig, MainCheckpointConfig, - T5EncoderConfig, T5Encoder8bConfig, + T5EncoderConfig, VAECheckpointConfig, ) from invokeai.backend.model_manager.load.model_loader_registry import ModelLoaderRegistry from invokeai.backend.model_manager.load.model_loaders.generic_diffusers import GenericDiffusersLoader from invokeai.backend.quantization.bnb_nf4 import quantize_model_nf4 -from invokeai.backend.util.silence_warnings import SilenceWarnings from invokeai.backend.quantization.fast_quantized_transformers_model import FastQuantizedTransformersModel +from invokeai.backend.util.silence_warnings import SilenceWarnings app_config = get_config() diff --git a/invokeai/backend/model_manager/probe.py b/invokeai/backend/model_manager/probe.py index 6ce090d651a..a3364da7697 100644 --- a/invokeai/backend/model_manager/probe.py +++ b/invokeai/backend/model_manager/probe.py @@ -177,10 +177,10 @@ def probe( fields["repo_variant"] = fields.get("repo_variant") or probe.get_repo_variant() # additional fields needed for main and controlnet models - if ( - fields["type"] in [ModelType.Main, ModelType.ControlNet, ModelType.VAE] - and fields["format"] in [ModelFormat.Checkpoint, ModelFormat.BnbQuantizednf4b] - ): + if fields["type"] in [ModelType.Main, ModelType.ControlNet, ModelType.VAE] and fields["format"] in [ + ModelFormat.Checkpoint, + ModelFormat.BnbQuantizednf4b, + ]: ckpt_config_path = cls._get_checkpoint_config_path( model_path, model_type=fields["type"], @@ -326,7 +326,7 @@ def _get_checkpoint_config_path( # TODO: Decide between dev/schnell checkpoint = ModelProbe._scan_and_load_checkpoint(model_path) state_dict = checkpoint.get("state_dict") or checkpoint - if 'guidance_in.out_layer.weight' in state_dict: + if "guidance_in.out_layer.weight" in state_dict: config_file = "flux/flux1-dev.yaml" else: config_file = "flux/flux1-schnell.yaml" diff --git a/invokeai/backend/quantization/load_flux_model_bnb_llm_int8.py b/invokeai/backend/quantization/load_flux_model_bnb_llm_int8.py index fd54210cbe5..876f299add1 100644 --- a/invokeai/backend/quantization/load_flux_model_bnb_llm_int8.py +++ b/invokeai/backend/quantization/load_flux_model_bnb_llm_int8.py @@ -64,7 +64,7 @@ def main(): with log_time("Load state dict into model"): # Load sharded state dict. files = list(model_path.glob("*.safetensors")) - state_dict = dict() + state_dict = {} for file in files: sd = load_file(file) state_dict.update(sd) From bebc6d3afdb0d56b11cc34a1aa9b570b8affafff Mon Sep 17 00:00:00 2001 From: Brandon Rising Date: Tue, 20 Aug 2024 14:41:17 -0400 Subject: [PATCH 040/113] Add t5 encoders and clip embeds to the model manager --- .../subpanels/ModelManagerPanel/ModelList.tsx | 30 +++++++- .../ModelManagerPanel/ModelTypeFilter.tsx | 2 + .../Invocation/fields/InputFieldRenderer.tsx | 1 - .../src/services/api/hooks/modelsByType.ts | 4 + .../frontend/web/src/services/api/schema.ts | 77 +++++++++++++++++-- .../frontend/web/src/services/api/types.ts | 16 ++++ 6 files changed, 120 insertions(+), 10 deletions(-) diff --git a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelManagerPanel/ModelList.tsx b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelManagerPanel/ModelList.tsx index 755a6e21fb2..b1c071bed3e 100644 --- a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelManagerPanel/ModelList.tsx +++ b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelManagerPanel/ModelList.tsx @@ -5,6 +5,7 @@ import type { FilterableModelType } from 'features/modelManagerV2/store/modelMan import { memo, useMemo } from 'react'; import { useTranslation } from 'react-i18next'; import { + useClipEmbedModels, useControlNetModels, useEmbeddingModels, useIPAdapterModels, @@ -13,6 +14,7 @@ import { useRefinerModels, useSpandrelImageToImageModels, useT2IAdapterModels, + useT5EncoderModels, useVAEModels, } from 'services/api/hooks/modelsByType'; import type { AnyModelConfig } from 'services/api/types'; @@ -73,6 +75,18 @@ const ModelList = () => { [vaeModels, searchTerm, filteredModelType] ); + const [t5EncoderModels, { isLoading: isLoadingT5EncoderModels }] = useT5EncoderModels(); + const filteredT5EncoderModels = useMemo( + () => modelsFilter(t5EncoderModels, searchTerm, filteredModelType), + [t5EncoderModels, searchTerm, filteredModelType] + ); + + const [clipEmbedModels, { isLoading: isLoadingClipEmbedModels }] = useClipEmbedModels(); + const filteredClipEmbedModels = useMemo( + () => modelsFilter(clipEmbedModels, searchTerm, filteredModelType), + [clipEmbedModels, searchTerm, filteredModelType] + ); + const [spandrelImageToImageModels, { isLoading: isLoadingSpandrelImageToImageModels }] = useSpandrelImageToImageModels(); const filteredSpandrelImageToImageModels = useMemo( @@ -90,7 +104,9 @@ const ModelList = () => { filteredT2IAdapterModels.length + filteredIPAdapterModels.length + filteredVAEModels.length + - filteredSpandrelImageToImageModels.length + filteredSpandrelImageToImageModels.length + + t5EncoderModels.length + + clipEmbedModels.length ); }, [ filteredControlNetModels.length, @@ -102,6 +118,8 @@ const ModelList = () => { filteredT2IAdapterModels.length, filteredVAEModels.length, filteredSpandrelImageToImageModels.length, + t5EncoderModels.length, + clipEmbedModels.length, ]); return ( @@ -154,6 +172,16 @@ const ModelList = () => { {!isLoadingT2IAdapterModels && filteredT2IAdapterModels.length > 0 && ( )} + {/* T5 Encoders List */} + {isLoadingT5EncoderModels && } + {!isLoadingT5EncoderModels && filteredT5EncoderModels.length > 0 && ( + + )} + {/* Clip Embed List */} + {isLoadingClipEmbedModels && } + {!isLoadingClipEmbedModels && filteredClipEmbedModels.length > 0 && ( + + )} {/* Spandrel Image to Image List */} {isLoadingSpandrelImageToImageModels && ( diff --git a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelManagerPanel/ModelTypeFilter.tsx b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelManagerPanel/ModelTypeFilter.tsx index 9db3334e89e..91dba7d71ff 100644 --- a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelManagerPanel/ModelTypeFilter.tsx +++ b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelManagerPanel/ModelTypeFilter.tsx @@ -19,6 +19,8 @@ export const ModelTypeFilter = memo(() => { controlnet: 'ControlNet', vae: 'VAE', t2i_adapter: t('common.t2iAdapter'), + t5_encoder: 'T5Encoder', + clip_embed: 'Clip Embed', ip_adapter: t('common.ipAdapter'), clip_vision: 'Clip Vision', spandrel_image_to_image: 'Image-to-Image', diff --git a/invokeai/frontend/web/src/features/nodes/components/flow/nodes/Invocation/fields/InputFieldRenderer.tsx b/invokeai/frontend/web/src/features/nodes/components/flow/nodes/Invocation/fields/InputFieldRenderer.tsx index 6ec51aba130..ba09ce68400 100644 --- a/invokeai/frontend/web/src/features/nodes/components/flow/nodes/Invocation/fields/InputFieldRenderer.tsx +++ b/invokeai/frontend/web/src/features/nodes/components/flow/nodes/Invocation/fields/InputFieldRenderer.tsx @@ -72,7 +72,6 @@ type InputFieldProps = { const InputFieldRenderer = ({ nodeId, fieldName }: InputFieldProps) => { const fieldInstance = useFieldInputInstance(nodeId, fieldName); const fieldTemplate = useFieldInputTemplate(nodeId, fieldName); - window.console.log("Hit 0") if (isStringFieldInputInstance(fieldInstance) && isStringFieldInputTemplate(fieldTemplate)) { return ; diff --git a/invokeai/frontend/web/src/services/api/hooks/modelsByType.ts b/invokeai/frontend/web/src/services/api/hooks/modelsByType.ts index b1e5e00e657..2d061e3157d 100644 --- a/invokeai/frontend/web/src/services/api/hooks/modelsByType.ts +++ b/invokeai/frontend/web/src/services/api/hooks/modelsByType.ts @@ -3,6 +3,7 @@ import { useMemo } from 'react'; import { modelConfigsAdapterSelectors, useGetModelConfigsQuery } from 'services/api/endpoints/models'; import type { AnyModelConfig } from 'services/api/types'; import { + isClipEmbedModelConfig, isControlNetModelConfig, isControlNetOrT2IAdapterModelConfig, isFluxMainModelModelConfig, @@ -14,6 +15,7 @@ import { isSDXLMainModelModelConfig, isSpandrelImageToImageModelConfig, isT2IAdapterModelConfig, + isT5EncoderModelConfig, isTIModelConfig, isVAEModelConfig, } from 'services/api/types'; @@ -42,6 +44,8 @@ export const useLoRAModels = buildModelsHook(isLoRAModelConfig); export const useControlNetAndT2IAdapterModels = buildModelsHook(isControlNetOrT2IAdapterModelConfig); export const useControlNetModels = buildModelsHook(isControlNetModelConfig); export const useT2IAdapterModels = buildModelsHook(isT2IAdapterModelConfig); +export const useT5EncoderModels = buildModelsHook(isT5EncoderModelConfig); +export const useClipEmbedModels = buildModelsHook(isClipEmbedModelConfig); export const useSpandrelImageToImageModels = buildModelsHook(isSpandrelImageToImageModelConfig); export const useIPAdapterModels = buildModelsHook(isIPAdapterModelConfig); export const useEmbeddingModels = buildModelsHook(isTIModelConfig); diff --git a/invokeai/frontend/web/src/services/api/schema.ts b/invokeai/frontend/web/src/services/api/schema.ts index 16a82a2cf60..e34bb5e523d 100644 --- a/invokeai/frontend/web/src/services/api/schema.ts +++ b/invokeai/frontend/web/src/services/api/schema.ts @@ -11073,7 +11073,7 @@ export type components = { * Config Out * @description After successful installation, this will hold the configuration object. */ - config_out?: (components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]) | null; + config_out?: (components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["T5Encoder8bConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]) | null; /** * Inplace * @description Leave model in its current location; otherwise install under models directory @@ -11159,7 +11159,7 @@ export type components = { * Config * @description The model's config */ - config: components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; + config: components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["T5Encoder8bConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; /** * @description The submodel type, if any * @default null @@ -11180,7 +11180,7 @@ export type components = { * Config * @description The model's config */ - config: components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; + config: components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["T5Encoder8bConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; /** * @description The submodel type, if any * @default null @@ -11321,7 +11321,7 @@ export type components = { */ ModelsList: { /** Models */ - models: (components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"])[]; + models: (components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["T5Encoder8bConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"])[]; }; /** * Multiply Integers @@ -14432,6 +14432,67 @@ export type components = { */ type: "t2i_adapter_output"; }; + /** T5Encoder8bConfig */ + T5Encoder8bConfig: { + /** + * Key + * @description A unique key for this model. + */ + key: string; + /** + * Hash + * @description The hash of the model file(s). + */ + hash: string; + /** + * Path + * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. + */ + path: string; + /** + * Name + * @description Name of the model. + */ + name: string; + /** @description The base model. */ + base: components["schemas"]["BaseModelType"]; + /** + * Description + * @description Model description + */ + description?: string | null; + /** + * Source + * @description The original source of the model (path, URL or repo_id). + */ + source: string; + /** @description The type of source */ + source_type: components["schemas"]["ModelSourceType"]; + /** + * Source Api Response + * @description The original API response from the source, as stringified JSON. + */ + source_api_response?: string | null; + /** + * Cover Image + * @description Url for image to preview model + */ + cover_image?: string | null; + /** + * Type + * @default t5_encoder + * @constant + * @enum {string} + */ + type: "t5_encoder"; + /** + * Format + * @default t5_encoder_8b + * @constant + * @enum {string} + */ + format: "t5_encoder_8b"; + }; /** T5EncoderConfig */ T5EncoderConfig: { /** @@ -15722,7 +15783,7 @@ export interface operations { [name: string]: unknown; }; content: { - "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; + "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["T5Encoder8bConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; }; }; /** @description Validation Error */ @@ -15754,7 +15815,7 @@ export interface operations { [name: string]: unknown; }; content: { - "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; + "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["T5Encoder8bConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; }; }; /** @description Bad request */ @@ -15851,7 +15912,7 @@ export interface operations { [name: string]: unknown; }; content: { - "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; + "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["T5Encoder8bConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; }; }; /** @description Bad request */ @@ -16351,7 +16412,7 @@ export interface operations { [name: string]: unknown; }; content: { - "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; + "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["T5Encoder8bConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; }; }; /** @description Bad request */ diff --git a/invokeai/frontend/web/src/services/api/types.ts b/invokeai/frontend/web/src/services/api/types.ts index bde4d65ad00..79536606fa0 100644 --- a/invokeai/frontend/web/src/services/api/types.ts +++ b/invokeai/frontend/web/src/services/api/types.ts @@ -51,6 +51,9 @@ export type VAEModelConfig = S['VAECheckpointConfig'] | S['VAEDiffusersConfig']; export type ControlNetModelConfig = S['ControlNetDiffusersConfig'] | S['ControlNetCheckpointConfig']; export type IPAdapterModelConfig = S['IPAdapterInvokeAIConfig'] | S['IPAdapterCheckpointConfig']; export type T2IAdapterModelConfig = S['T2IAdapterConfig']; +export type ClipEmbedModelConfig = S['CLIPEmbedDiffusersConfig']; +export type T5EncoderModelConfig = S['T5EncoderConfig']; +export type T5Encoder8bModelConfig = S['T5Encoder8bConfig']; export type SpandrelImageToImageModelConfig = S['SpandrelImageToImageConfig']; type TextualInversionModelConfig = S['TextualInversionFileConfig'] | S['TextualInversionFolderConfig']; type DiffusersModelConfig = S['MainDiffusersConfig']; @@ -62,6 +65,9 @@ export type AnyModelConfig = | VAEModelConfig | ControlNetModelConfig | IPAdapterModelConfig + | T5EncoderModelConfig + | T5Encoder8bModelConfig + | ClipEmbedModelConfig | T2IAdapterModelConfig | SpandrelImageToImageModelConfig | TextualInversionModelConfig @@ -88,6 +94,16 @@ export const isT2IAdapterModelConfig = (config: AnyModelConfig): config is T2IAd return config.type === 't2i_adapter'; }; +export const isT5EncoderModelConfig = ( + config: AnyModelConfig +): config is T5EncoderModelConfig | T5Encoder8bModelConfig => { + return config.type === 't5_encoder'; +}; + +export const isClipEmbedModelConfig = (config: AnyModelConfig): config is ClipEmbedModelConfig => { + return config.type === 'clip_embed'; +}; + export const isSpandrelImageToImageModelConfig = ( config: AnyModelConfig ): config is SpandrelImageToImageModelConfig => { From 3f845d97537320a4ee6b14724246c6a999c5d219 Mon Sep 17 00:00:00 2001 From: Brandon Rising Date: Tue, 20 Aug 2024 15:31:22 -0400 Subject: [PATCH 041/113] Some cleanup of the tags and description of flux nodes --- invokeai/app/invocations/flux_text_encoder.py | 3 ++- invokeai/app/invocations/flux_text_to_image.py | 2 +- invokeai/frontend/web/src/services/api/schema.ts | 5 ++++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/invokeai/app/invocations/flux_text_encoder.py b/invokeai/app/invocations/flux_text_encoder.py index a57124d2bc8..1ad3ad2f2aa 100644 --- a/invokeai/app/invocations/flux_text_encoder.py +++ b/invokeai/app/invocations/flux_text_encoder.py @@ -15,11 +15,12 @@ @invocation( "flux_text_encoder", title="FLUX Text Encoding", - tags=["image"], + tags=["image", "flux"], category="image", version="1.0.0", ) class FluxTextEncoderInvocation(BaseInvocation): + """Encodes and preps a prompt for a flux image.""" clip: CLIPField = InputField( title="CLIP", description=FieldDescriptions.clip, diff --git a/invokeai/app/invocations/flux_text_to_image.py b/invokeai/app/invocations/flux_text_to_image.py index e08b4f38fd4..f67cf4a1550 100644 --- a/invokeai/app/invocations/flux_text_to_image.py +++ b/invokeai/app/invocations/flux_text_to_image.py @@ -25,7 +25,7 @@ @invocation( "flux_text_to_image", title="FLUX Text to Image", - tags=["image"], + tags=["image", "flux"], category="image", version="1.0.0", ) diff --git a/invokeai/frontend/web/src/services/api/schema.ts b/invokeai/frontend/web/src/services/api/schema.ts index e34bb5e523d..efaf3d482c1 100644 --- a/invokeai/frontend/web/src/services/api/schema.ts +++ b/invokeai/frontend/web/src/services/api/schema.ts @@ -5798,7 +5798,10 @@ export type components = { */ type: "flux_model_loader_output"; }; - /** FLUX Text Encoding */ + /** + * FLUX Text Encoding + * @description Encodes and preps a prompt for a flux image. + */ FluxTextEncoderInvocation: { /** * Id From 8b3e386844b486c549e6e8a87854328c30996e7b Mon Sep 17 00:00:00 2001 From: Mary Hipp Date: Tue, 20 Aug 2024 20:17:16 -0400 Subject: [PATCH 042/113] exclude flux models from main model dropdown --- invokeai/frontend/web/src/services/api/hooks/modelsByType.ts | 4 ++-- invokeai/frontend/web/src/services/api/types.ts | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/invokeai/frontend/web/src/services/api/hooks/modelsByType.ts b/invokeai/frontend/web/src/services/api/hooks/modelsByType.ts index 2d061e3157d..273db5a9ded 100644 --- a/invokeai/frontend/web/src/services/api/hooks/modelsByType.ts +++ b/invokeai/frontend/web/src/services/api/hooks/modelsByType.ts @@ -9,7 +9,7 @@ import { isFluxMainModelModelConfig, isIPAdapterModelConfig, isLoRAModelConfig, - isNonRefinerMainModelConfig, + isNonRefinerNonFluxMainModelConfig, isNonSDXLMainModelConfig, isRefinerMainModelModelConfig, isSDXLMainModelModelConfig, @@ -35,7 +35,7 @@ const buildModelsHook = return [modelConfigs, result] as const; }; -export const useMainModels = buildModelsHook(isNonRefinerMainModelConfig); +export const useMainModels = buildModelsHook(isNonRefinerNonFluxMainModelConfig); export const useNonSDXLMainModels = buildModelsHook(isNonSDXLMainModelConfig); export const useRefinerModels = buildModelsHook(isRefinerMainModelModelConfig); export const useFluxModels = buildModelsHook(isFluxMainModelModelConfig); diff --git a/invokeai/frontend/web/src/services/api/types.ts b/invokeai/frontend/web/src/services/api/types.ts index 79536606fa0..045e1508192 100644 --- a/invokeai/frontend/web/src/services/api/types.ts +++ b/invokeai/frontend/web/src/services/api/types.ts @@ -126,6 +126,10 @@ export const isNonRefinerMainModelConfig = (config: AnyModelConfig): config is M return config.type === 'main' && config.base !== 'sdxl-refiner'; }; +export const isNonRefinerNonFluxMainModelConfig = (config: AnyModelConfig): config is MainModelConfig => { + return config.type === 'main' && config.base !== 'sdxl-refiner' && config.base !== 'flux'; +}; + export const isRefinerMainModelModelConfig = (config: AnyModelConfig): config is MainModelConfig => { return config.type === 'main' && config.base === 'sdxl-refiner'; }; From 35c263ab7b2bdf3bb68d437ae7e81a68578717d0 Mon Sep 17 00:00:00 2001 From: Mary Hipp Date: Tue, 20 Aug 2024 20:37:18 -0400 Subject: [PATCH 043/113] add default workflow for flux t2i --- .../default_workflows/Flux Text to Image.json | 256 ++++++++++++++++++ 1 file changed, 256 insertions(+) create mode 100644 invokeai/app/services/workflow_records/default_workflows/Flux Text to Image.json diff --git a/invokeai/app/services/workflow_records/default_workflows/Flux Text to Image.json b/invokeai/app/services/workflow_records/default_workflows/Flux Text to Image.json new file mode 100644 index 00000000000..4be8ebf07c5 --- /dev/null +++ b/invokeai/app/services/workflow_records/default_workflows/Flux Text to Image.json @@ -0,0 +1,256 @@ +{ + "name": "Flux Text to Image", + "author": "InvokeAI", + "description": "A simple text-to-image workflow using Flux Dev or Flux Schnell", + "version": "1.0.0", + "contact": "", + "tags": "text2image, flux", + "notes": "", + "exposedFields": [ + { + "nodeId": "90701a55-0a0f-444d-ab7d-ea9b7361dd44", + "fieldName": "model" + }, + { + "nodeId": "90701a55-0a0f-444d-ab7d-ea9b7361dd44", + "fieldName": "t5_encoder" + }, + { + "nodeId": "7187b891-8b9e-41f2-bad0-579c14c92faf", + "fieldName": "positive_prompt" + } + ], + "meta": { + "version": "3.0.0", + "category": "default" + }, + "nodes": [ + { + "id": "4754c534-a5f3-4ad0-9382-7887985e668c", + "type": "invocation", + "data": { + "id": "4754c534-a5f3-4ad0-9382-7887985e668c", + "type": "rand_int", + "version": "1.0.1", + "label": "", + "notes": "", + "isOpen": true, + "isIntermediate": true, + "useCache": false, + "inputs": { + "low": { + "name": "low", + "label": "", + "value": 0 + }, + "high": { + "name": "high", + "label": "", + "value": 2147483647 + } + } + }, + "position": { + "x": 822.9899179655476, + "y": 360.9657214885052 + } + }, + { + "id": "159bdf1b-79e7-4174-b86e-d40e646964c8", + "type": "invocation", + "data": { + "id": "159bdf1b-79e7-4174-b86e-d40e646964c8", + "type": "flux_text_to_image", + "version": "1.0.0", + "label": "", + "notes": "", + "isOpen": true, + "isIntermediate": false, + "useCache": false, + "inputs": { + "board": { + "name": "board", + "label": "" + }, + "metadata": { + "name": "metadata", + "label": "" + }, + "transformer": { + "name": "transformer", + "label": "" + }, + "vae": { + "name": "vae", + "label": "" + }, + "positive_text_conditioning": { + "name": "positive_text_conditioning", + "label": "" + }, + "width": { + "name": "width", + "label": "", + "value": 1024 + }, + "height": { + "name": "height", + "label": "", + "value": 1024 + }, + "num_steps": { + "name": "num_steps", + "label": "", + "value": 4 + }, + "guidance": { + "name": "guidance", + "label": "", + "value": 4 + }, + "seed": { + "name": "seed", + "label": "", + "value": 0 + } + } + }, + "position": { + "x": 1216.3900791301849, + "y": 5.500841807102248 + } + }, + { + "id": "7187b891-8b9e-41f2-bad0-579c14c92faf", + "type": "invocation", + "data": { + "id": "7187b891-8b9e-41f2-bad0-579c14c92faf", + "type": "flux_text_encoder", + "version": "1.0.0", + "label": "", + "notes": "", + "isOpen": true, + "isIntermediate": true, + "useCache": false, + "inputs": { + "clip": { + "name": "clip", + "label": "" + }, + "t5_encoder": { + "name": "t5_encoder", + "label": "" + }, + "max_seq_len": { + "name": "max_seq_len", + "label": "", + "value": 256 + }, + "positive_prompt": { + "name": "positive_prompt", + "label": "", + "value": "dog eating an ice cream cone while watching the TV" + } + } + }, + "position": { + "x": 809.5428272455715, + "y": 111.5674004989348 + } + }, + { + "id": "90701a55-0a0f-444d-ab7d-ea9b7361dd44", + "type": "invocation", + "data": { + "id": "90701a55-0a0f-444d-ab7d-ea9b7361dd44", + "type": "flux_model_loader", + "version": "1.0.3", + "label": "", + "notes": "", + "isOpen": true, + "isIntermediate": true, + "useCache": false, + "inputs": { + "model": { + "name": "model", + "label": "", + "value": { + "key": "a1deb125-2781-482c-8a71-9a22e76fd956", + "hash": "random:40bd0a5b8b2c6edf8f5611e049000329b952efc6a1a24b4f77ca4ae3dbecaf6a", + "name": "flux1-schnell-bnb_nf4", + "base": "flux", + "type": "main" + } + }, + "t5_encoder": { + "name": "t5_encoder", + "label": "", + "value": "8b_quantized" + } + } + }, + "position": { + "x": 407.297070550788, + "y": 37.50301331772734 + } + } + ], + "edges": [ + { + "id": "reactflow__edge-4754c534-a5f3-4ad0-9382-7887985e668cvalue-159bdf1b-79e7-4174-b86e-d40e646964c8seed", + "type": "default", + "source": "4754c534-a5f3-4ad0-9382-7887985e668c", + "target": "159bdf1b-79e7-4174-b86e-d40e646964c8", + "sourceHandle": "value", + "targetHandle": "seed" + }, + { + "id": "reactflow__edge-90701a55-0a0f-444d-ab7d-ea9b7361dd44vae-159bdf1b-79e7-4174-b86e-d40e646964c8vae", + "type": "default", + "source": "90701a55-0a0f-444d-ab7d-ea9b7361dd44", + "target": "159bdf1b-79e7-4174-b86e-d40e646964c8", + "sourceHandle": "vae", + "targetHandle": "vae" + }, + { + "id": "reactflow__edge-90701a55-0a0f-444d-ab7d-ea9b7361dd44transformer-159bdf1b-79e7-4174-b86e-d40e646964c8transformer", + "type": "default", + "source": "90701a55-0a0f-444d-ab7d-ea9b7361dd44", + "target": "159bdf1b-79e7-4174-b86e-d40e646964c8", + "sourceHandle": "transformer", + "targetHandle": "transformer" + }, + { + "id": "reactflow__edge-7187b891-8b9e-41f2-bad0-579c14c92fafconditioning-159bdf1b-79e7-4174-b86e-d40e646964c8positive_text_conditioning", + "type": "default", + "source": "7187b891-8b9e-41f2-bad0-579c14c92faf", + "target": "159bdf1b-79e7-4174-b86e-d40e646964c8", + "sourceHandle": "conditioning", + "targetHandle": "positive_text_conditioning" + }, + { + "id": "reactflow__edge-90701a55-0a0f-444d-ab7d-ea9b7361dd44max_seq_len-7187b891-8b9e-41f2-bad0-579c14c92fafmax_seq_len", + "type": "default", + "source": "90701a55-0a0f-444d-ab7d-ea9b7361dd44", + "target": "7187b891-8b9e-41f2-bad0-579c14c92faf", + "sourceHandle": "max_seq_len", + "targetHandle": "max_seq_len" + }, + { + "id": "reactflow__edge-90701a55-0a0f-444d-ab7d-ea9b7361dd44t5_encoder-7187b891-8b9e-41f2-bad0-579c14c92faft5_encoder", + "type": "default", + "source": "90701a55-0a0f-444d-ab7d-ea9b7361dd44", + "target": "7187b891-8b9e-41f2-bad0-579c14c92faf", + "sourceHandle": "t5_encoder", + "targetHandle": "t5_encoder" + }, + { + "id": "reactflow__edge-90701a55-0a0f-444d-ab7d-ea9b7361dd44clip-7187b891-8b9e-41f2-bad0-579c14c92fafclip", + "type": "default", + "source": "90701a55-0a0f-444d-ab7d-ea9b7361dd44", + "target": "7187b891-8b9e-41f2-bad0-579c14c92faf", + "sourceHandle": "clip", + "targetHandle": "clip" + } + ] +} From ec360ee1cb5e0de0259d10b176139cdc4aea1555 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Wed, 21 Aug 2024 13:27:54 +0000 Subject: [PATCH 044/113] Rename t5Encoder -> t5_encoder. --- invokeai/app/invocations/fields.py | 2 +- invokeai/app/invocations/flux_text_encoder.py | 3 ++- invokeai/app/invocations/model.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/invokeai/app/invocations/fields.py b/invokeai/app/invocations/fields.py index ba2c75aa132..2abd6b39b75 100644 --- a/invokeai/app/invocations/fields.py +++ b/invokeai/app/invocations/fields.py @@ -126,7 +126,7 @@ class FieldDescriptions: negative_cond = "Negative conditioning tensor" noise = "Noise tensor" clip = "CLIP (tokenizer, text encoder, LoRAs) and skipped layer count" - t5Encoder = "T5 tokenizer and text encoder" + t5_encoder = "T5 tokenizer and text encoder" unet = "UNet (scheduler, LoRAs)" transformer = "Transformer" vae = "VAE" diff --git a/invokeai/app/invocations/flux_text_encoder.py b/invokeai/app/invocations/flux_text_encoder.py index 1ad3ad2f2aa..7b08201e18f 100644 --- a/invokeai/app/invocations/flux_text_encoder.py +++ b/invokeai/app/invocations/flux_text_encoder.py @@ -21,6 +21,7 @@ ) class FluxTextEncoderInvocation(BaseInvocation): """Encodes and preps a prompt for a flux image.""" + clip: CLIPField = InputField( title="CLIP", description=FieldDescriptions.clip, @@ -28,7 +29,7 @@ class FluxTextEncoderInvocation(BaseInvocation): ) t5_encoder: T5EncoderField = InputField( title="T5Encoder", - description=FieldDescriptions.t5Encoder, + description=FieldDescriptions.t5_encoder, input=Input.Connection, ) max_seq_len: Literal[256, 512] = InputField(description="Max sequence length for the desired flux model") diff --git a/invokeai/app/invocations/model.py b/invokeai/app/invocations/model.py index e104dacde0e..7f4c4a891a2 100644 --- a/invokeai/app/invocations/model.py +++ b/invokeai/app/invocations/model.py @@ -162,7 +162,7 @@ class FluxModelLoaderOutput(BaseInvocationOutput): transformer: TransformerField = OutputField(description=FieldDescriptions.transformer, title="Transformer") clip: CLIPField = OutputField(description=FieldDescriptions.clip, title="CLIP") - t5_encoder: T5EncoderField = OutputField(description=FieldDescriptions.t5Encoder, title="T5 Encoder") + t5_encoder: T5EncoderField = OutputField(description=FieldDescriptions.t5_encoder, title="T5 Encoder") vae: VAEField = OutputField(description=FieldDescriptions.vae, title="VAE") max_seq_len: Literal[256, 512] = OutputField(description=FieldDescriptions.vae, title="Max Seq Length") From c822c3dbfff66864a66a42cfcb6f4b7bf85bc04a Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Wed, 21 Aug 2024 13:45:22 +0000 Subject: [PATCH 045/113] Address minor review comments. --- invokeai/app/invocations/fields.py | 2 +- invokeai/app/invocations/flux_text_encoder.py | 12 ++++++------ invokeai/app/invocations/flux_text_to_image.py | 8 +++++--- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/invokeai/app/invocations/fields.py b/invokeai/app/invocations/fields.py index 2abd6b39b75..1b52f27fb54 100644 --- a/invokeai/app/invocations/fields.py +++ b/invokeai/app/invocations/fields.py @@ -135,7 +135,7 @@ class FieldDescriptions: vae_model = "VAE model to load" lora_model = "LoRA model to load" main_model = "Main model (UNet, VAE, CLIP) to load" - flux_model = "Flux model (Transformer, VAE, CLIP) to load" + flux_model = "Flux model (Transformer) to load" sdxl_main_model = "SDXL Main model (UNet, VAE, CLIP1, CLIP2) to load" sdxl_refiner_model = "SDXL Refiner Main Modde (UNet, VAE, CLIP2) to load" onnx_main_model = "ONNX Main model (UNet, VAE, CLIP) to load" diff --git a/invokeai/app/invocations/flux_text_encoder.py b/invokeai/app/invocations/flux_text_encoder.py index 7b08201e18f..54c6ff2b332 100644 --- a/invokeai/app/invocations/flux_text_encoder.py +++ b/invokeai/app/invocations/flux_text_encoder.py @@ -15,8 +15,8 @@ @invocation( "flux_text_encoder", title="FLUX Text Encoding", - tags=["image", "flux"], - category="image", + tags=["prompt", "conditioning", "flux"], + category="conditioning", version="1.0.0", ) class FluxTextEncoderInvocation(BaseInvocation): @@ -32,7 +32,9 @@ class FluxTextEncoderInvocation(BaseInvocation): description=FieldDescriptions.t5_encoder, input=Input.Connection, ) - max_seq_len: Literal[256, 512] = InputField(description="Max sequence length for the desired flux model") + t5_max_seq_len: Literal[256, 512] = InputField( + description="Max sequence length for the T5 encoder. Expected to be 256 for FLUX schnell models and 512 for FLUX dev models." + ) positive_prompt: str = InputField(description="Positive prompt for text-to-image generation.") # TODO(ryand): Should we create a new return type for this invocation? This ConditioningOutput is clearly not @@ -48,8 +50,6 @@ def invoke(self, context: InvocationContext) -> ConditioningOutput: return ConditioningOutput.build(conditioning_name) def _encode_prompt(self, context: InvocationContext) -> tuple[torch.Tensor, torch.Tensor]: - max_seq_len = self.max_seq_len - # Load CLIP. clip_tokenizer_info = context.models.load(self.clip.tokenizer) clip_text_encoder_info = context.models.load(self.clip.text_encoder) @@ -70,7 +70,7 @@ def _encode_prompt(self, context: InvocationContext) -> tuple[torch.Tensor, torc assert isinstance(t5_tokenizer, T5Tokenizer) clip_encoder = HFEncoder(clip_text_encoder, clip_tokenizer, True, 77) - t5_encoder = HFEncoder(t5_text_encoder, t5_tokenizer, False, max_seq_len) + t5_encoder = HFEncoder(t5_text_encoder, t5_tokenizer, False, self.t5_max_seq_len) prompt = [self.positive_prompt] prompt_embeds = t5_encoder(prompt) diff --git a/invokeai/app/invocations/flux_text_to_image.py b/invokeai/app/invocations/flux_text_to_image.py index f67cf4a1550..d2789b86f04 100644 --- a/invokeai/app/invocations/flux_text_to_image.py +++ b/invokeai/app/invocations/flux_text_to_image.py @@ -33,7 +33,7 @@ class FluxTextToImageInvocation(BaseInvocation, WithMetadata, WithBoard): """Text-to-image generation using a FLUX model.""" transformer: TransformerField = InputField( - description=FieldDescriptions.unet, + description=FieldDescriptions.flux_model, input=Input.Connection, title="Transformer", ) @@ -46,10 +46,12 @@ class FluxTextToImageInvocation(BaseInvocation, WithMetadata, WithBoard): ) width: int = InputField(default=1024, multiple_of=16, description="Width of the generated image.") height: int = InputField(default=1024, multiple_of=16, description="Height of the generated image.") - num_steps: int = InputField(default=4, description="Number of diffusion steps.") + num_steps: int = InputField( + default=4, description="Number of diffusion steps. Recommend values are schnell: 4, dev: 50." + ) guidance: float = InputField( default=4.0, - description="The guidance strength. Higher values adhere more strictly to the prompt, and will produce less diverse images.", + description="The guidance strength. Higher values adhere more strictly to the prompt, and will produce less diverse images. FLUX dev only, ignored for schnell.", ) seed: int = InputField(default=0, description="Randomness seed for reproducibility.") From 19238ede4098279112d3801eb576f49a4c27c6d7 Mon Sep 17 00:00:00 2001 From: Brandon Rising Date: Wed, 21 Aug 2024 11:18:07 -0400 Subject: [PATCH 046/113] Update doc string for import_local_model and remove access_token since it's only usable for local file paths --- .../app/services/shared/invocation_context.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/invokeai/app/services/shared/invocation_context.py b/invokeai/app/services/shared/invocation_context.py index 9ba1bf68f34..ce52d088090 100644 --- a/invokeai/app/services/shared/invocation_context.py +++ b/invokeai/app/services/shared/invocation_context.py @@ -468,17 +468,24 @@ def import_local_model( self, model_path: Path, config: Optional[ModelRecordChanges] = None, - access_token: Optional[str] = None, inplace: Optional[bool] = False, ): """ - TODO: Fill out description of this method + Import the model file located at the given local file path and return its ModelInstallJob. + + This can be used to single-file models or directories. + + Args: + model_path: A pathlib.Path object pointing to a model file or directory + config: Optional ModelRecordChanges to define manual probe overrides + inplace: Optional boolean to declare whether or not to install the model in the models dir + + Returns: + ModelInstallJob object defining the install job to be used in tracking the job """ if not model_path.exists(): raise Exception("Models provided to import_local_model must already exist on disk") - return self._services.model_manager.install.heuristic_import( - str(model_path), config=config, access_token=access_token, inplace=inplace - ) + return self._services.model_manager.install.heuristic_import(str(model_path), config=config, inplace=inplace) def load_local_model( self, From ede26a76022b4d78af5700f5ee0f6882a97e1bd5 Mon Sep 17 00:00:00 2001 From: Brandon Rising Date: Wed, 21 Aug 2024 11:30:16 -0400 Subject: [PATCH 047/113] Switch inheritance class of flux model loaders --- .../model_manager/load/model_loaders/flux.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/invokeai/backend/model_manager/load/model_loaders/flux.py b/invokeai/backend/model_manager/load/model_loaders/flux.py index 6502339a243..44444092e9e 100644 --- a/invokeai/backend/model_manager/load/model_loaders/flux.py +++ b/invokeai/backend/model_manager/load/model_loaders/flux.py @@ -31,8 +31,8 @@ T5EncoderConfig, VAECheckpointConfig, ) +from invokeai.backend.model_manager.load.load_default import ModelLoader from invokeai.backend.model_manager.load.model_loader_registry import ModelLoaderRegistry -from invokeai.backend.model_manager.load.model_loaders.generic_diffusers import GenericDiffusersLoader from invokeai.backend.quantization.bnb_nf4 import quantize_model_nf4 from invokeai.backend.quantization.fast_quantized_transformers_model import FastQuantizedTransformersModel from invokeai.backend.util.silence_warnings import SilenceWarnings @@ -41,7 +41,7 @@ @ModelLoaderRegistry.register(base=BaseModelType.Flux, type=ModelType.VAE, format=ModelFormat.Checkpoint) -class FluxVAELoader(GenericDiffusersLoader): +class FluxVAELoader(ModelLoader): """Class to load VAE models.""" def _load_model( @@ -75,7 +75,7 @@ def _load_model( @ModelLoaderRegistry.register(base=BaseModelType.Any, type=ModelType.CLIPEmbed, format=ModelFormat.Diffusers) -class ClipCheckpointModel(GenericDiffusersLoader): +class ClipCheckpointModel(ModelLoader): """Class to load main models.""" def _load_model( @@ -96,7 +96,7 @@ def _load_model( @ModelLoaderRegistry.register(base=BaseModelType.Any, type=ModelType.T5Encoder, format=ModelFormat.T5Encoder8b) -class T5Encoder8bCheckpointModel(GenericDiffusersLoader): +class T5Encoder8bCheckpointModel(ModelLoader): """Class to load main models.""" def _load_model( @@ -117,7 +117,7 @@ def _load_model( @ModelLoaderRegistry.register(base=BaseModelType.Any, type=ModelType.T5Encoder, format=ModelFormat.T5Encoder) -class T5EncoderCheckpointModel(GenericDiffusersLoader): +class T5EncoderCheckpointModel(ModelLoader): """Class to load main models.""" def _load_model( @@ -140,7 +140,7 @@ def _load_model( @ModelLoaderRegistry.register(base=BaseModelType.Flux, type=ModelType.Main, format=ModelFormat.Checkpoint) -class FluxCheckpointModel(GenericDiffusersLoader): +class FluxCheckpointModel(ModelLoader): """Class to load main models.""" def _load_model( @@ -185,7 +185,7 @@ def _load_from_singlefile( @ModelLoaderRegistry.register(base=BaseModelType.Flux, type=ModelType.Main, format=ModelFormat.BnbQuantizednf4b) -class FluxBnbQuantizednf4bCheckpointModel(GenericDiffusersLoader): +class FluxBnbQuantizednf4bCheckpointModel(ModelLoader): """Class to load main models.""" def _load_model( From f0408bbb42402a157cda19c4074db47009368eb6 Mon Sep 17 00:00:00 2001 From: Brandon Rising Date: Wed, 21 Aug 2024 11:59:04 -0400 Subject: [PATCH 048/113] Various styling and exception type updates --- invokeai/app/invocations/model.py | 7 +- .../app/services/shared/invocation_context.py | 2 +- .../model_manager/load/model_loaders/flux.py | 77 ++++++++----------- 3 files changed, 35 insertions(+), 51 deletions(-) diff --git a/invokeai/app/invocations/model.py b/invokeai/app/invocations/model.py index 7f4c4a891a2..984266239c8 100644 --- a/invokeai/app/invocations/model.py +++ b/invokeai/app/invocations/model.py @@ -183,7 +183,7 @@ def invoke(self, context: InvocationContext) -> FluxModelLoaderOutput: model_key = self.model.key if not context.models.exists(model_key): - raise Exception(f"Unknown model: {model_key}") + raise ValueError(f"Unknown model: {model_key}") transformer = self._get_model(context, SubModelType.Transformer) tokenizer = self._get_model(context, SubModelType.Tokenizer) tokenizer2 = self._get_model(context, SubModelType.Tokenizer2) @@ -203,10 +203,7 @@ def invoke(self, context: InvocationContext) -> FluxModelLoaderOutput: legacy_config_path = context.config.get().legacy_conf_path / transformer_config.config_path config_path = legacy_config_path.as_posix() with open(config_path, "r") as stream: - try: - flux_conf = yaml.safe_load(stream) - except: - raise + flux_conf = yaml.safe_load(stream) return FluxModelLoaderOutput( transformer=TransformerField(transformer=transformer), diff --git a/invokeai/app/services/shared/invocation_context.py b/invokeai/app/services/shared/invocation_context.py index ce52d088090..23189b85ab2 100644 --- a/invokeai/app/services/shared/invocation_context.py +++ b/invokeai/app/services/shared/invocation_context.py @@ -484,7 +484,7 @@ def import_local_model( ModelInstallJob object defining the install job to be used in tracking the job """ if not model_path.exists(): - raise Exception("Models provided to import_local_model must already exist on disk") + raise ValueError(f"Models provided to import_local_model must already exist on disk at {model_path.as_posix()}") return self._services.model_manager.install.heuristic_import(str(model_path), config=config, inplace=inplace) def load_local_model( diff --git a/invokeai/backend/model_manager/load/model_loaders/flux.py b/invokeai/backend/model_manager/load/model_loaders/flux.py index 44444092e9e..58264ebc256 100644 --- a/invokeai/backend/model_manager/load/model_loaders/flux.py +++ b/invokeai/backend/model_manager/load/model_loaders/flux.py @@ -49,29 +49,24 @@ def _load_model( config: AnyModelConfig, submodel_type: Optional[SubModelType] = None, ) -> AnyModel: - if isinstance(config, VAECheckpointConfig): - model_path = Path(config.path) - load_class = AutoEncoder - legacy_config_path = app_config.legacy_conf_path / config.config_path - config_path = legacy_config_path.as_posix() - with open(config_path, "r") as stream: - try: - flux_conf = yaml.safe_load(stream) - except: - raise - - dataclass_fields = {f.name for f in fields(AutoEncoderParams)} - filtered_data = {k: v for k, v in flux_conf["params"].items() if k in dataclass_fields} - params = AutoEncoderParams(**filtered_data) - - with SilenceWarnings(): - model = load_class(params) - sd = load_file(model_path) - model.load_state_dict(sd, strict=False, assign=True) - - return model - else: - return super()._load_model(config, submodel_type) + if not isinstance(config, VAECheckpointConfig): + raise ValueError("Only VAECheckpointConfig models are currently supported here.") + model_path = Path(config.path) + legacy_config_path = app_config.legacy_conf_path / config.config_path + config_path = legacy_config_path.as_posix() + with open(config_path, "r") as stream: + flux_conf = yaml.safe_load(stream) + + dataclass_fields = {f.name for f in fields(AutoEncoderParams)} + filtered_data = {k: v for k, v in flux_conf["params"].items() if k in dataclass_fields} + params = AutoEncoderParams(**filtered_data) + + with SilenceWarnings(): + model = AutoEncoder(params) + sd = load_file(model_path) + model.load_state_dict(sd, strict=False, assign=True) + + return model @ModelLoaderRegistry.register(base=BaseModelType.Any, type=ModelType.CLIPEmbed, format=ModelFormat.Diffusers) @@ -84,7 +79,7 @@ def _load_model( submodel_type: Optional[SubModelType] = None, ) -> AnyModel: if not isinstance(config, CLIPEmbedDiffusersConfig): - raise Exception("Only CLIPEmbedDiffusersConfig models are currently supported here.") + raise ValueError("Only CLIPEmbedDiffusersConfig models are currently supported here.") match submodel_type: case SubModelType.Tokenizer: @@ -92,7 +87,7 @@ def _load_model( case SubModelType.TextEncoder: return CLIPTextModel.from_pretrained(config.path) - raise Exception("Only Tokenizer and TextEncoder submodels are currently supported.") + raise ValueError("Only Tokenizer and TextEncoder submodels are currently supported.") @ModelLoaderRegistry.register(base=BaseModelType.Any, type=ModelType.T5Encoder, format=ModelFormat.T5Encoder8b) @@ -105,7 +100,7 @@ def _load_model( submodel_type: Optional[SubModelType] = None, ) -> AnyModel: if not isinstance(config, T5Encoder8bConfig): - raise Exception("Only T5Encoder8bConfig models are currently supported here.") + raise ValueError("Only T5Encoder8bConfig models are currently supported here.") match submodel_type: case SubModelType.Tokenizer2: @@ -113,7 +108,7 @@ def _load_model( case SubModelType.TextEncoder2: return FastQuantizedTransformersModel.from_pretrained(Path(config.path) / "text_encoder_2") - raise Exception("Only Tokenizer and TextEncoder submodels are currently supported.") + raise ValueError("Only Tokenizer and TextEncoder submodels are currently supported.") @ModelLoaderRegistry.register(base=BaseModelType.Any, type=ModelType.T5Encoder, format=ModelFormat.T5Encoder) @@ -126,7 +121,7 @@ def _load_model( submodel_type: Optional[SubModelType] = None, ) -> AnyModel: if not isinstance(config, T5EncoderConfig): - raise Exception("Only T5EncoderConfig models are currently supported here.") + raise ValueError("Only T5EncoderConfig models are currently supported here.") match submodel_type: case SubModelType.Tokenizer2: @@ -136,7 +131,7 @@ def _load_model( Path(config.path) / "text_encoder_2" ) # TODO: Fix hf subfolder install - raise Exception("Only Tokenizer and TextEncoder submodels are currently supported.") + raise ValueError("Only Tokenizer and TextEncoder submodels are currently supported.") @ModelLoaderRegistry.register(base=BaseModelType.Flux, type=ModelType.Main, format=ModelFormat.Checkpoint) @@ -149,20 +144,17 @@ def _load_model( submodel_type: Optional[SubModelType] = None, ) -> AnyModel: if not isinstance(config, CheckpointConfigBase): - raise Exception("Only CheckpointConfigBase models are currently supported here.") + raise ValueError("Only CheckpointConfigBase models are currently supported here.") legacy_config_path = app_config.legacy_conf_path / config.config_path config_path = legacy_config_path.as_posix() with open(config_path, "r") as stream: - try: - flux_conf = yaml.safe_load(stream) - except: - raise + flux_conf = yaml.safe_load(stream) match submodel_type: case SubModelType.Transformer: return self._load_from_singlefile(config, flux_conf) - raise Exception("Only Transformer submodels are currently supported.") + raise ValueError("Only Transformer submodels are currently supported.") def _load_from_singlefile( self, @@ -170,7 +162,6 @@ def _load_from_singlefile( flux_conf: Any, ) -> AnyModel: assert isinstance(config, MainCheckpointConfig) - load_class = Flux params = None model_path = Path(config.path) dataclass_fields = {f.name for f in fields(FluxParams)} @@ -178,7 +169,7 @@ def _load_from_singlefile( params = FluxParams(**filtered_data) with SilenceWarnings(): - model = load_class(params) + model = Flux(params) sd = load_file(model_path) model.load_state_dict(sd, strict=False, assign=True) return model @@ -194,20 +185,17 @@ def _load_model( submodel_type: Optional[SubModelType] = None, ) -> AnyModel: if not isinstance(config, CheckpointConfigBase): - raise Exception("Only CheckpointConfigBase models are currently supported here.") + raise ValueError("Only CheckpointConfigBase models are currently supported here.") legacy_config_path = app_config.legacy_conf_path / config.config_path config_path = legacy_config_path.as_posix() with open(config_path, "r") as stream: - try: - flux_conf = yaml.safe_load(stream) - except: - raise + flux_conf = yaml.safe_load(stream) match submodel_type: case SubModelType.Transformer: return self._load_from_singlefile(config, flux_conf) - raise Exception("Only Transformer submodels are currently supported.") + raise ValueError("Only Transformer submodels are currently supported.") def _load_from_singlefile( self, @@ -215,7 +203,6 @@ def _load_from_singlefile( flux_conf: Any, ) -> AnyModel: assert isinstance(config, MainBnbQuantized4bCheckpointConfig) - load_class = Flux params = None model_path = Path(config.path) dataclass_fields = {f.name for f in fields(FluxParams)} @@ -224,7 +211,7 @@ def _load_from_singlefile( with SilenceWarnings(): with accelerate.init_empty_weights(): - model = load_class(params) + model = Flux(params) model = quantize_model_nf4(model, modules_to_not_convert=set(), compute_dtype=torch.bfloat16) sd = load_file(model_path) model.load_state_dict(sd, strict=False, assign=True) From 9e888b1cb99bbb6108b37d196fa124bc46ba0206 Mon Sep 17 00:00:00 2001 From: Brandon Rising Date: Wed, 21 Aug 2024 12:37:25 -0400 Subject: [PATCH 049/113] More flux loader cleanup --- .../model_manager/load/model_loaders/flux.py | 20 +++++++++---------- invokeai/backend/model_manager/probe.py | 1 + 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/invokeai/backend/model_manager/load/model_loaders/flux.py b/invokeai/backend/model_manager/load/model_loaders/flux.py index 58264ebc256..ebc3333eea9 100644 --- a/invokeai/backend/model_manager/load/model_loaders/flux.py +++ b/invokeai/backend/model_manager/load/model_loaders/flux.py @@ -64,7 +64,7 @@ def _load_model( with SilenceWarnings(): model = AutoEncoder(params) sd = load_file(model_path) - model.load_state_dict(sd, strict=False, assign=True) + model.load_state_dict(sd, assign=True) return model @@ -83,11 +83,11 @@ def _load_model( match submodel_type: case SubModelType.Tokenizer: - return CLIPTokenizer.from_pretrained(config.path, max_length=77) + return CLIPTokenizer.from_pretrained(config.path) case SubModelType.TextEncoder: return CLIPTextModel.from_pretrained(config.path) - raise ValueError("Only Tokenizer and TextEncoder submodels are currently supported.") + raise ValueError(f"Only Tokenizer and TextEncoder submodels are currently supported. Received: {submodel_type.value if submodel_type else 'None'}") @ModelLoaderRegistry.register(base=BaseModelType.Any, type=ModelType.T5Encoder, format=ModelFormat.T5Encoder8b) @@ -108,7 +108,7 @@ def _load_model( case SubModelType.TextEncoder2: return FastQuantizedTransformersModel.from_pretrained(Path(config.path) / "text_encoder_2") - raise ValueError("Only Tokenizer and TextEncoder submodels are currently supported.") + raise ValueError(f"Only Tokenizer and TextEncoder submodels are currently supported. Received: {submodel_type.value if submodel_type else 'None'}") @ModelLoaderRegistry.register(base=BaseModelType.Any, type=ModelType.T5Encoder, format=ModelFormat.T5Encoder) @@ -131,7 +131,7 @@ def _load_model( Path(config.path) / "text_encoder_2" ) # TODO: Fix hf subfolder install - raise ValueError("Only Tokenizer and TextEncoder submodels are currently supported.") + raise ValueError(f"Only Tokenizer and TextEncoder submodels are currently supported. Received: {submodel_type.value if submodel_type else 'None'}") @ModelLoaderRegistry.register(base=BaseModelType.Flux, type=ModelType.Main, format=ModelFormat.Checkpoint) @@ -154,7 +154,7 @@ def _load_model( case SubModelType.Transformer: return self._load_from_singlefile(config, flux_conf) - raise ValueError("Only Transformer submodels are currently supported.") + raise ValueError(f"Only Transformer submodels are currently supported. Received: {submodel_type.value if submodel_type else 'None'}") def _load_from_singlefile( self, @@ -162,7 +162,6 @@ def _load_from_singlefile( flux_conf: Any, ) -> AnyModel: assert isinstance(config, MainCheckpointConfig) - params = None model_path = Path(config.path) dataclass_fields = {f.name for f in fields(FluxParams)} filtered_data = {k: v for k, v in flux_conf["params"].items() if k in dataclass_fields} @@ -171,7 +170,7 @@ def _load_from_singlefile( with SilenceWarnings(): model = Flux(params) sd = load_file(model_path) - model.load_state_dict(sd, strict=False, assign=True) + model.load_state_dict(sd, assign=True) return model @@ -195,7 +194,7 @@ def _load_model( case SubModelType.Transformer: return self._load_from_singlefile(config, flux_conf) - raise ValueError("Only Transformer submodels are currently supported.") + raise ValueError(f"Only Transformer submodels are currently supported. Received: {submodel_type.value if submodel_type else 'None'}") def _load_from_singlefile( self, @@ -203,7 +202,6 @@ def _load_from_singlefile( flux_conf: Any, ) -> AnyModel: assert isinstance(config, MainBnbQuantized4bCheckpointConfig) - params = None model_path = Path(config.path) dataclass_fields = {f.name for f in fields(FluxParams)} filtered_data = {k: v for k, v in flux_conf["params"].items() if k in dataclass_fields} @@ -214,5 +212,5 @@ def _load_from_singlefile( model = Flux(params) model = quantize_model_nf4(model, modules_to_not_convert=set(), compute_dtype=torch.bfloat16) sd = load_file(model_path) - model.load_state_dict(sd, strict=False, assign=True) + model.load_state_dict(sd, assign=True) return model diff --git a/invokeai/backend/model_manager/probe.py b/invokeai/backend/model_manager/probe.py index a3364da7697..778dd583e59 100644 --- a/invokeai/backend/model_manager/probe.py +++ b/invokeai/backend/model_manager/probe.py @@ -224,6 +224,7 @@ def get_model_type_from_checkpoint(cls, model_path: Path, checkpoint: Optional[C for key in [str(k) for k in ckpt.keys()]: if key.startswith(("cond_stage_model.", "first_stage_model.", "model.diffusion_model.", "double_blocks.")): + # Keys starting with double_blocks are associated with Flux models return ModelType.Main elif key.startswith(("encoder.conv_in", "decoder.conv_in")): return ModelType.VAE From 6afb1139c82dd8c4d5883a9a800da822bcd054d1 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Wed, 21 Aug 2024 18:10:24 +0000 Subject: [PATCH 050/113] Remove duplicate log_time(...) function. --- .../quantization/load_flux_model_bnb_llm_int8.py | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/invokeai/backend/quantization/load_flux_model_bnb_llm_int8.py b/invokeai/backend/quantization/load_flux_model_bnb_llm_int8.py index 876f299add1..a24370967cc 100644 --- a/invokeai/backend/quantization/load_flux_model_bnb_llm_int8.py +++ b/invokeai/backend/quantization/load_flux_model_bnb_llm_int8.py @@ -1,5 +1,3 @@ -import time -from contextlib import contextmanager from pathlib import Path import accelerate @@ -7,17 +5,7 @@ from safetensors.torch import load_file, save_file from invokeai.backend.quantization.bnb_llm_int8 import quantize_model_llm_int8 - - -@contextmanager -def log_time(name: str): - """Helper context manager to log the time taken by a block of code.""" - start = time.time() - try: - yield None - finally: - end = time.time() - print(f"'{name}' took {end - start:.4f} secs") +from invokeai.backend.quantization.load_flux_model_bnb_nf4 import log_time def main(): From 519bf71dd3c797a5ea2e4a5803844012fafdb003 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Wed, 21 Aug 2024 18:19:47 +0000 Subject: [PATCH 051/113] Add docs to the requantize(...) function explaining why it was copied from optimum-quanto. --- invokeai/backend/requantize.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/invokeai/backend/requantize.py b/invokeai/backend/requantize.py index 5f506f487d1..aae85bed7c9 100644 --- a/invokeai/backend/requantize.py +++ b/invokeai/backend/requantize.py @@ -3,19 +3,21 @@ import torch from optimum.quanto.quantize import _quantize_submodule -# def custom_freeze(model: torch.nn.Module): -# for name, m in model.named_modules(): -# if isinstance(m, QModuleMixin): -# m.weight = -# m.freeze() - def requantize( model: torch.nn.Module, state_dict: Dict[str, Any], quantization_map: Dict[str, Dict[str, str]], - device: torch.device = None, + device: torch.device | None = None, ): + """This function was initially copied from: + https://github.com/huggingface/optimum-quanto/blob/832f7f5c3926c91fe4f923aaaf037a780ac3e6c3/optimum/quanto/quantize.py#L101 + + The function was modified to remove the `freeze()` call. The `freeze()` call is very slow and unnecessary when the + weights are about to be loaded from a state_dict. + + TODO(ryand): Unless I'm overlooking something, this should be contributed upstream to the `optimum-quanto` library. + """ if device is None: device = next(model.parameters()).device if device.type == "meta": @@ -45,6 +47,7 @@ def move_tensor(t, device): setattr(m, name, torch.nn.Parameter(move_tensor(param, "cpu"))) for name, param in m.named_buffers(recurse=False): setattr(m, name, move_tensor(param, "cpu")) + # Freeze model and move to target device # freeze(model) # model.to(device) From c549a49e60341a5a40c42545d27159854efda6a2 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Wed, 21 Aug 2024 18:21:05 +0000 Subject: [PATCH 052/113] Move requantize.py to the quatnization/ dir. --- invokeai/backend/quantization/__init__.py | 0 invokeai/backend/quantization/fast_quantized_diffusion_model.py | 2 +- .../backend/quantization/fast_quantized_transformers_model.py | 2 +- invokeai/backend/{ => quantization}/requantize.py | 0 4 files changed, 2 insertions(+), 2 deletions(-) create mode 100644 invokeai/backend/quantization/__init__.py rename invokeai/backend/{ => quantization}/requantize.py (100%) diff --git a/invokeai/backend/quantization/__init__.py b/invokeai/backend/quantization/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/invokeai/backend/quantization/fast_quantized_diffusion_model.py b/invokeai/backend/quantization/fast_quantized_diffusion_model.py index 65b64a69a17..6ad82b8e9ee 100644 --- a/invokeai/backend/quantization/fast_quantized_diffusion_model.py +++ b/invokeai/backend/quantization/fast_quantized_diffusion_model.py @@ -14,7 +14,7 @@ from optimum.quanto.models import QuantizedDiffusersModel from optimum.quanto.models.shared_dict import ShardedStateDict -from invokeai.backend.requantize import requantize +from invokeai.backend.quantization.requantize import requantize class FastQuantizedDiffusersModel(QuantizedDiffusersModel): diff --git a/invokeai/backend/quantization/fast_quantized_transformers_model.py b/invokeai/backend/quantization/fast_quantized_transformers_model.py index 72636a43fb1..b811b598e7c 100644 --- a/invokeai/backend/quantization/fast_quantized_transformers_model.py +++ b/invokeai/backend/quantization/fast_quantized_transformers_model.py @@ -9,7 +9,7 @@ from transformers.models.auto import AutoModelForTextEncoding from transformers.utils import SAFE_WEIGHTS_INDEX_NAME, SAFE_WEIGHTS_NAME, is_accelerate_available -from invokeai.backend.requantize import requantize +from invokeai.backend.quantization.requantize import requantize class FastQuantizedTransformersModel(QuantizedTransformersModel): diff --git a/invokeai/backend/requantize.py b/invokeai/backend/quantization/requantize.py similarity index 100% rename from invokeai/backend/requantize.py rename to invokeai/backend/quantization/requantize.py From 41fb09b5e9e222d08ed79c2a43caf04e92fd27fb Mon Sep 17 00:00:00 2001 From: maryhipp Date: Wed, 21 Aug 2024 14:25:53 -0400 Subject: [PATCH 053/113] update flux_model_loader node to take a T5 encoder from node field instead of hardcoded list, assume all models have been downloaded --- invokeai/app/invocations/fields.py | 1 + invokeai/app/invocations/model.py | 12 ++++++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/invokeai/app/invocations/fields.py b/invokeai/app/invocations/fields.py index 1b52f27fb54..6b7d7bef635 100644 --- a/invokeai/app/invocations/fields.py +++ b/invokeai/app/invocations/fields.py @@ -49,6 +49,7 @@ class UIType(str, Enum, metaclass=MetaEnum): ControlNetModel = "ControlNetModelField" IPAdapterModel = "IPAdapterModelField" T2IAdapterModel = "T2IAdapterModelField" + T5EncoderModel = "T5EncoderModelField" SpandrelImageToImageModel = "SpandrelImageToImageModelField" # endregion diff --git a/invokeai/app/invocations/model.py b/invokeai/app/invocations/model.py index 984266239c8..300e38b2312 100644 --- a/invokeai/app/invocations/model.py +++ b/invokeai/app/invocations/model.py @@ -177,7 +177,11 @@ class FluxModelLoaderInvocation(BaseInvocation): input=Input.Direct, ) - t5_encoder: T5_ENCODER_OPTIONS = InputField(description="The T5 Encoder model to use.") + t5_encoder: ModelIdentifierField = InputField( + description=FieldDescriptions.t5Encoder, + ui_type=UIType.T5EncoderModel, + input=Input.Direct, + ) def invoke(self, context: InvocationContext) -> FluxModelLoaderOutput: model_key = self.model.key @@ -231,9 +235,9 @@ def _get_model(self, context: InvocationContext, submodel: SubModelType) -> Mode return self._install_model( context, submodel, - T5_ENCODER_MAP[self.t5_encoder]["name"], - T5_ENCODER_MAP[self.t5_encoder]["repo"], - ModelFormat(T5_ENCODER_MAP[self.t5_encoder]["format"]), + self.t5_encoder.name, + "", + ModelFormat.T5Encoder, ModelType.T5Encoder, BaseModelType.Any, ) From c66ccadad9cf689eeffa158c700253b3cbd12e54 Mon Sep 17 00:00:00 2001 From: maryhipp Date: Wed, 21 Aug 2024 14:26:17 -0400 Subject: [PATCH 054/113] add case for clip embed models in probe --- invokeai/backend/model_manager/probe.py | 1 + 1 file changed, 1 insertion(+) diff --git a/invokeai/backend/model_manager/probe.py b/invokeai/backend/model_manager/probe.py index 778dd583e59..e552b1cf1e8 100644 --- a/invokeai/backend/model_manager/probe.py +++ b/invokeai/backend/model_manager/probe.py @@ -107,6 +107,7 @@ class ModelProbe(object): "ControlNetModel": ModelType.ControlNet, "CLIPVisionModelWithProjection": ModelType.CLIPVision, "T2IAdapter": ModelType.T2IAdapter, + "CLIPModel": ModelType.CLIPEmbed, } @classmethod From 9020a8a6e947fbefea8e73a0970a40ddfbf54157 Mon Sep 17 00:00:00 2001 From: maryhipp Date: Wed, 21 Aug 2024 14:26:43 -0400 Subject: [PATCH 055/113] add FLUX schnell starter models and submodels as dependenices or adhoc download options --- .../backend/model_manager/starter_models.py | 55 +++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/invokeai/backend/model_manager/starter_models.py b/invokeai/backend/model_manager/starter_models.py index c460a5e86e6..8be46882a06 100644 --- a/invokeai/backend/model_manager/starter_models.py +++ b/invokeai/backend/model_manager/starter_models.py @@ -51,10 +51,59 @@ class StarterModel(StarterModelWithoutDependencies): type=ModelType.TextualInversion, ) +t5_base_encoder = StarterModel( + name="t5_base_encoder", + base=BaseModelType.Any, + source="InvokeAI/flux_schnell::t5_xxl_encoder/base", + description="T5-XXL text encoder (used in FLUX pipelines). ~8GB", + type=ModelType.T5Encoder, +) + +t5_8b_quantized_encoder = StarterModel( + name="t5_8b_quantized_encoder", + base=BaseModelType.Any, + source="invokeai/flux_schnell::t5_xxl_encoder/optimum_quanto_qfloat8", + description="T5-XXL text encoder with optimum-quanto qfloat8 quantization (used in FLUX pipelines). ~6GB", + type=ModelType.T5Encoder, +) + +clip_l_encoder = StarterModel( + name="clip-vit-large-patch14", + base=BaseModelType.Any, + source="openai/clip-vit-large-patch14", + description="CLIP-L text encoder (used in FLUX pipelines). ~3GB", + type=ModelType.CLIPEmbed, +) + +flux_vae = StarterModel( + name="FLUX.1-schnell_ae", + base=BaseModelType.Flux, + source="black-forest-labs/FLUX.1-schnell::ae.safetensors", + description="FLUX VAE compatible with both schnell and dev variants.", + type=ModelType.VAE, +) + + # List of starter models, displayed on the frontend. # The order/sort of this list is not changed by the frontend - set it how you want it here. STARTER_MODELS: list[StarterModel] = [ # region: Main + StarterModel( + name="FLUX Schnell (Quantized)", + base=BaseModelType.Flux, + source="InvokeAI/flux_schnell::transformer/bnb_nf4/flux1-schnell-bnb_nf4.safetensors", + description="FLUX schnell transformer quantized to bitsandbytes NF4 format. Total size with dependencies: ~14GB", + type=ModelType.Main, + dependencies=[t5_8b_quantized_encoder, flux_vae, clip_l_encoder], + ), + StarterModel( + name="FLUX Schnell", + base=BaseModelType.Flux, + source="InvokeAI/flux_schnell::transformer/base/flux1-schnell.safetensors", + description="FLUX schnell transformer in bfloat16. Total size with dependencies: ~33GB", + type=ModelType.Main, + dependencies=[t5_base_encoder, flux_vae, clip_l_encoder], + ), StarterModel( name="CyberRealistic v4.1", base=BaseModelType.StableDiffusion1, @@ -125,6 +174,7 @@ class StarterModel(StarterModelWithoutDependencies): # endregion # region VAE sdxl_fp16_vae_fix, + flux_vae, # endregion # region LoRA StarterModel( @@ -450,6 +500,11 @@ class StarterModel(StarterModelWithoutDependencies): type=ModelType.SpandrelImageToImage, ), # endregion + # region TextEncoders + t5_base_encoder, + t5_8b_quantized_encoder, + clip_l_encoder, + # endregion ] assert len(STARTER_MODELS) == len({m.source for m in STARTER_MODELS}), "Duplicate starter models" From 72649207d05c67658abfc854aead3bf549b2ac2b Mon Sep 17 00:00:00 2001 From: Mary Hipp Date: Wed, 21 Aug 2024 13:26:08 -0400 Subject: [PATCH 056/113] fix(ui): only exclude flux main models from linear UI dropdown, not model manager list --- .../parameters/components/MainModel/ParamMainModelSelect.tsx | 4 ++-- invokeai/frontend/web/src/services/api/hooks/modelsByType.ts | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/invokeai/frontend/web/src/features/parameters/components/MainModel/ParamMainModelSelect.tsx b/invokeai/frontend/web/src/features/parameters/components/MainModel/ParamMainModelSelect.tsx index c9410dd7a45..89f7b0bc4d0 100644 --- a/invokeai/frontend/web/src/features/parameters/components/MainModel/ParamMainModelSelect.tsx +++ b/invokeai/frontend/web/src/features/parameters/components/MainModel/ParamMainModelSelect.tsx @@ -8,7 +8,7 @@ import { modelSelected } from 'features/parameters/store/actions'; import { selectGenerationSlice } from 'features/parameters/store/generationSlice'; import { memo, useCallback, useMemo } from 'react'; import { useTranslation } from 'react-i18next'; -import { useMainModels } from 'services/api/hooks/modelsByType'; +import { useSDMainModels } from 'services/api/hooks/modelsByType'; import type { MainModelConfig } from 'services/api/types'; const selectModel = createMemoizedSelector(selectGenerationSlice, (generation) => generation.model); @@ -17,7 +17,7 @@ const ParamMainModelSelect = () => { const dispatch = useAppDispatch(); const { t } = useTranslation(); const selectedModel = useAppSelector(selectModel); - const [modelConfigs, { isLoading }] = useMainModels(); + const [modelConfigs, { isLoading }] = useSDMainModels(); const tooltipLabel = useMemo(() => { if (!modelConfigs.length || !selectedModel) { return; diff --git a/invokeai/frontend/web/src/services/api/hooks/modelsByType.ts b/invokeai/frontend/web/src/services/api/hooks/modelsByType.ts index 273db5a9ded..6daddf7c4eb 100644 --- a/invokeai/frontend/web/src/services/api/hooks/modelsByType.ts +++ b/invokeai/frontend/web/src/services/api/hooks/modelsByType.ts @@ -9,6 +9,7 @@ import { isFluxMainModelModelConfig, isIPAdapterModelConfig, isLoRAModelConfig, + isNonRefinerMainModelConfig, isNonRefinerNonFluxMainModelConfig, isNonSDXLMainModelConfig, isRefinerMainModelModelConfig, @@ -35,7 +36,8 @@ const buildModelsHook = return [modelConfigs, result] as const; }; -export const useMainModels = buildModelsHook(isNonRefinerNonFluxMainModelConfig); +export const useSDMainModels = buildModelsHook(isNonRefinerNonFluxMainModelConfig); +export const useMainModels = buildModelsHook(isNonRefinerMainModelConfig); export const useNonSDXLMainModels = buildModelsHook(isNonSDXLMainModelConfig); export const useRefinerModels = buildModelsHook(isRefinerMainModelModelConfig); export const useFluxModels = buildModelsHook(isFluxMainModelModelConfig); From 3fe9582d4934df35a0e3bafecee281c0489eb4da Mon Sep 17 00:00:00 2001 From: Mary Hipp Date: Wed, 21 Aug 2024 13:26:52 -0400 Subject: [PATCH 057/113] fix(ui): pass base/type when installing models, add flux formats to MM badges --- .../StarterModels/StartModelsResultItem.tsx | 12 ++++++++++-- .../subpanels/ModelManagerPanel/ModelBaseBadge.tsx | 2 +- .../subpanels/ModelManagerPanel/ModelFormatBadge.tsx | 6 ++++++ 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/AddModelPanel/StarterModels/StartModelsResultItem.tsx b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/AddModelPanel/StarterModels/StartModelsResultItem.tsx index a3c9c82d0eb..4fc83908907 100644 --- a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/AddModelPanel/StarterModels/StartModelsResultItem.tsx +++ b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/AddModelPanel/StarterModels/StartModelsResultItem.tsx @@ -12,10 +12,18 @@ type Props = { export const StarterModelsResultItem = memo(({ result }: Props) => { const { t } = useTranslation(); const allSources = useMemo(() => { - const _allSources = [{ source: result.source, config: { name: result.name, description: result.description } }]; + const _allSources = [ + { + source: result.source, + config: { name: result.name, description: result.description, type: result.type, base: result.base }, + }, + ]; if (result.dependencies) { for (const d of result.dependencies) { - _allSources.push({ source: d.source, config: { name: d.name, description: d.description } }); + _allSources.push({ + source: d.source, + config: { name: d.name, description: d.description, type: d.type, base: d.base }, + }); } } return _allSources; diff --git a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelManagerPanel/ModelBaseBadge.tsx b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelManagerPanel/ModelBaseBadge.tsx index 2cf4e25354f..3eb0a91d672 100644 --- a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelManagerPanel/ModelBaseBadge.tsx +++ b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelManagerPanel/ModelBaseBadge.tsx @@ -13,7 +13,7 @@ const BASE_COLOR_MAP: Record = { 'sd-2': 'teal', sdxl: 'invokeBlue', 'sdxl-refiner': 'invokeBlue', - flux: 'invokeBlue', + flux: 'gold', }; const ModelBaseBadge = ({ base }: Props) => { diff --git a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelManagerPanel/ModelFormatBadge.tsx b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelManagerPanel/ModelFormatBadge.tsx index a4690662c35..167588ddd38 100644 --- a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelManagerPanel/ModelFormatBadge.tsx +++ b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelManagerPanel/ModelFormatBadge.tsx @@ -13,6 +13,9 @@ const FORMAT_NAME_MAP: Record = { invokeai: 'internal', embedding_file: 'embedding', embedding_folder: 'embedding', + t5_encoder: 't5_encoder', + t5_encoder_8b: 't5_encoder_8b', + bnb_quantized_nf4b: 'quantized', }; const FORMAT_COLOR_MAP: Record = { @@ -22,6 +25,9 @@ const FORMAT_COLOR_MAP: Record = { invokeai: 'base', embedding_file: 'base', embedding_folder: 'base', + t5_encoder: 'base', + t5_encoder_8b: 'base', + bnb_quantized_nf4b: 'base', }; const ModelFormatBadge = ({ format }: Props) => { From 24831d45500b71c6720c903c694c5bccb616a2b8 Mon Sep 17 00:00:00 2001 From: Mary Hipp Date: Wed, 21 Aug 2024 14:35:39 -0400 Subject: [PATCH 058/113] feat(ui): create new field for t5 encoder models in nodes --- invokeai/frontend/web/public/locales/en.json | 1 + .../Invocation/fields/InputFieldRenderer.tsx | 7 +++ .../T5EncoderModelFieldInputComponent.tsx | 60 +++++++++++++++++++ .../src/features/nodes/store/nodesSlice.ts | 7 +++ .../web/src/features/nodes/types/common.ts | 1 + .../web/src/features/nodes/types/field.ts | 31 ++++++++++ .../util/schema/buildFieldInputTemplate.ts | 16 +++++ .../frontend/web/src/services/api/schema.ts | 25 ++++---- 8 files changed, 133 insertions(+), 15 deletions(-) create mode 100644 invokeai/frontend/web/src/features/nodes/components/flow/nodes/Invocation/fields/inputs/T5EncoderModelFieldInputComponent.tsx diff --git a/invokeai/frontend/web/public/locales/en.json b/invokeai/frontend/web/public/locales/en.json index 1737bd4f297..a9ece94b969 100644 --- a/invokeai/frontend/web/public/locales/en.json +++ b/invokeai/frontend/web/public/locales/en.json @@ -784,6 +784,7 @@ "simpleModelPlaceholder": "URL or path to a local file or diffusers folder", "source": "Source", "starterModels": "Starter Models", + "starterModelsInModelManager": "Starter Models can be found in Model Manager", "syncModels": "Sync Models", "textualInversions": "Textual Inversions", "triggerPhrases": "Trigger Phrases", diff --git a/invokeai/frontend/web/src/features/nodes/components/flow/nodes/Invocation/fields/InputFieldRenderer.tsx b/invokeai/frontend/web/src/features/nodes/components/flow/nodes/Invocation/fields/InputFieldRenderer.tsx index ba09ce68400..c4e8da6eda7 100644 --- a/invokeai/frontend/web/src/features/nodes/components/flow/nodes/Invocation/fields/InputFieldRenderer.tsx +++ b/invokeai/frontend/web/src/features/nodes/components/flow/nodes/Invocation/fields/InputFieldRenderer.tsx @@ -40,6 +40,8 @@ import { isStringFieldInputTemplate, isT2IAdapterModelFieldInputInstance, isT2IAdapterModelFieldInputTemplate, + isT5EncoderModelFieldInputInstance, + isT5EncoderModelFieldInputTemplate, isVAEModelFieldInputInstance, isVAEModelFieldInputTemplate, } from 'features/nodes/types/field'; @@ -62,6 +64,7 @@ import SDXLMainModelFieldInputComponent from './inputs/SDXLMainModelFieldInputCo import SpandrelImageToImageModelFieldInputComponent from './inputs/SpandrelImageToImageModelFieldInputComponent'; import StringFieldInputComponent from './inputs/StringFieldInputComponent'; import T2IAdapterModelFieldInputComponent from './inputs/T2IAdapterModelFieldInputComponent'; +import T5EncoderModelFieldInputComponent from './inputs/T5EncoderModelFieldInputComponent'; import VAEModelFieldInputComponent from './inputs/VAEModelFieldInputComponent'; type InputFieldProps = { @@ -116,6 +119,10 @@ const InputFieldRenderer = ({ nodeId, fieldName }: InputFieldProps) => { return ; } + if (isT5EncoderModelFieldInputInstance(fieldInstance) && isT5EncoderModelFieldInputTemplate(fieldTemplate)) { + return ; + } + if (isLoRAModelFieldInputInstance(fieldInstance) && isLoRAModelFieldInputTemplate(fieldTemplate)) { return ; } diff --git a/invokeai/frontend/web/src/features/nodes/components/flow/nodes/Invocation/fields/inputs/T5EncoderModelFieldInputComponent.tsx b/invokeai/frontend/web/src/features/nodes/components/flow/nodes/Invocation/fields/inputs/T5EncoderModelFieldInputComponent.tsx new file mode 100644 index 00000000000..d92163c9c31 --- /dev/null +++ b/invokeai/frontend/web/src/features/nodes/components/flow/nodes/Invocation/fields/inputs/T5EncoderModelFieldInputComponent.tsx @@ -0,0 +1,60 @@ +import { Combobox, Flex, FormControl, Tooltip } from '@invoke-ai/ui-library'; +import { useAppDispatch, useAppSelector } from 'app/store/storeHooks'; +import { useGroupedModelCombobox } from 'common/hooks/useGroupedModelCombobox'; +import { fieldT5EncoderValueChanged } from 'features/nodes/store/nodesSlice'; +import type { T5EncoderModelFieldInputInstance, T5EncoderModelFieldInputTemplate } from 'features/nodes/types/field'; +import { memo, useCallback } from 'react'; +import { useTranslation } from 'react-i18next'; +import { useT5EncoderModels } from 'services/api/hooks/modelsByType'; +import type { T5Encoder8bModelConfig, T5EncoderModelConfig } from 'services/api/types'; + +import type { FieldComponentProps } from './types'; + +type Props = FieldComponentProps; + +const T5EncoderModelFieldInputComponent = (props: Props) => { + const { nodeId, field } = props; + const { t } = useTranslation(); + const disabledTabs = useAppSelector((s) => s.config.disabledTabs); + const dispatch = useAppDispatch(); + const [modelConfigs, { isLoading }] = useT5EncoderModels(); + const _onChange = useCallback( + (value: T5Encoder8bModelConfig | T5EncoderModelConfig | null) => { + if (!value) { + return; + } + dispatch( + fieldT5EncoderValueChanged({ + nodeId, + fieldName: field.name, + value, + }) + ); + }, + [dispatch, field.name, nodeId] + ); + const { options, value, onChange, placeholder, noOptionsMessage } = useGroupedModelCombobox({ + modelConfigs, + onChange: _onChange, + isLoading, + selectedModel: field.value, + }); + + return ( + + + + + + + + ); +}; + +export default memo(T5EncoderModelFieldInputComponent); diff --git a/invokeai/frontend/web/src/features/nodes/store/nodesSlice.ts b/invokeai/frontend/web/src/features/nodes/store/nodesSlice.ts index f9214c15727..6bcd5f276eb 100644 --- a/invokeai/frontend/web/src/features/nodes/store/nodesSlice.ts +++ b/invokeai/frontend/web/src/features/nodes/store/nodesSlice.ts @@ -23,6 +23,7 @@ import type { StatefulFieldValue, StringFieldValue, T2IAdapterModelFieldValue, + T5EncoderModelFieldValue, VAEModelFieldValue, } from 'features/nodes/types/field'; import { @@ -44,6 +45,7 @@ import { zStatefulFieldValue, zStringFieldValue, zT2IAdapterModelFieldValue, + zT5EncoderModelFieldValue, zVAEModelFieldValue, } from 'features/nodes/types/field'; import type { AnyNode, InvocationNodeEdge } from 'features/nodes/types/invocation'; @@ -341,6 +343,9 @@ export const nodesSlice = createSlice({ ) => { fieldValueReducer(state, action, zSpandrelImageToImageModelFieldValue); }, + fieldT5EncoderValueChanged: (state, action: FieldValueAction) => { + fieldValueReducer(state, action, zT5EncoderModelFieldValue); + }, fieldEnumModelValueChanged: (state, action: FieldValueAction) => { fieldValueReducer(state, action, zEnumFieldValue); }, @@ -402,6 +407,7 @@ export const { fieldSchedulerValueChanged, fieldStringValueChanged, fieldVaeModelValueChanged, + fieldT5EncoderValueChanged, nodeEditorReset, nodeIsIntermediateChanged, nodeIsOpenChanged, @@ -514,6 +520,7 @@ export const isAnyNodeOrEdgeMutation = isAnyOf( fieldSchedulerValueChanged, fieldStringValueChanged, fieldVaeModelValueChanged, + fieldT5EncoderValueChanged, nodesChanged, nodeIsIntermediateChanged, nodeIsOpenChanged, diff --git a/invokeai/frontend/web/src/features/nodes/types/common.ts b/invokeai/frontend/web/src/features/nodes/types/common.ts index 894d257f286..e8062713454 100644 --- a/invokeai/frontend/web/src/features/nodes/types/common.ts +++ b/invokeai/frontend/web/src/features/nodes/types/common.ts @@ -73,6 +73,7 @@ const zModelType = z.enum([ 'onnx', 'clip_vision', 'spandrel_image_to_image', + 't5_encoder', ]); const zSubModelType = z.enum([ 'unet', diff --git a/invokeai/frontend/web/src/features/nodes/types/field.ts b/invokeai/frontend/web/src/features/nodes/types/field.ts index 607a1005acb..ee0f61a0fea 100644 --- a/invokeai/frontend/web/src/features/nodes/types/field.ts +++ b/invokeai/frontend/web/src/features/nodes/types/field.ts @@ -147,6 +147,10 @@ const zSpandrelImageToImageModelFieldType = zFieldTypeBase.extend({ name: z.literal('SpandrelImageToImageModelField'), originalType: zStatelessFieldType.optional(), }); +const zT5EncoderModelFieldType = zFieldTypeBase.extend({ + name: z.literal('T5EncoderModelField'), + originalType: zStatelessFieldType.optional(), +}); const zSchedulerFieldType = zFieldTypeBase.extend({ name: z.literal('SchedulerField'), originalType: zStatelessFieldType.optional(), @@ -170,6 +174,7 @@ const zStatefulFieldType = z.union([ zIPAdapterModelFieldType, zT2IAdapterModelFieldType, zSpandrelImageToImageModelFieldType, + zT5EncoderModelFieldType, zColorFieldType, zSchedulerFieldType, ]); @@ -641,6 +646,29 @@ export const isSpandrelImageToImageModelFieldInputTemplate = ( zSpandrelImageToImageModelFieldInputTemplate.safeParse(val).success; // #endregion +// #region T5EncoderModelField + +export const zT5EncoderModelFieldValue = zModelIdentifierField.optional(); +const zT5EncoderModelFieldInputInstance = zFieldInputInstanceBase.extend({ + value: zT5EncoderModelFieldValue, +}); +const zT5EncoderModelFieldInputTemplate = zFieldInputTemplateBase.extend({ + type: zT5EncoderModelFieldType, + originalType: zFieldType.optional(), + default: zT5EncoderModelFieldValue, +}); + +export type T5EncoderModelFieldValue = z.infer; + +export type T5EncoderModelFieldInputInstance = z.infer; +export type T5EncoderModelFieldInputTemplate = z.infer; +export const isT5EncoderModelFieldInputInstance = (val: unknown): val is T5EncoderModelFieldInputInstance => + zT5EncoderModelFieldInputInstance.safeParse(val).success; +export const isT5EncoderModelFieldInputTemplate = (val: unknown): val is T5EncoderModelFieldInputTemplate => + zT5EncoderModelFieldInputTemplate.safeParse(val).success; + +// #endregio + // #region SchedulerField export const zSchedulerFieldValue = zSchedulerField.optional(); @@ -729,6 +757,7 @@ export const zStatefulFieldValue = z.union([ zIPAdapterModelFieldValue, zT2IAdapterModelFieldValue, zSpandrelImageToImageModelFieldValue, + zT5EncoderModelFieldValue, zColorFieldValue, zSchedulerFieldValue, ]); @@ -758,6 +787,7 @@ const zStatefulFieldInputInstance = z.union([ zIPAdapterModelFieldInputInstance, zT2IAdapterModelFieldInputInstance, zSpandrelImageToImageModelFieldInputInstance, + zT5EncoderModelFieldInputInstance, zColorFieldInputInstance, zSchedulerFieldInputInstance, ]); @@ -788,6 +818,7 @@ const zStatefulFieldInputTemplate = z.union([ zIPAdapterModelFieldInputTemplate, zT2IAdapterModelFieldInputTemplate, zSpandrelImageToImageModelFieldInputTemplate, + zT5EncoderModelFieldInputTemplate, zColorFieldInputTemplate, zSchedulerFieldInputTemplate, zStatelessFieldInputTemplate, diff --git a/invokeai/frontend/web/src/features/nodes/util/schema/buildFieldInputTemplate.ts b/invokeai/frontend/web/src/features/nodes/util/schema/buildFieldInputTemplate.ts index f4f3ef85afa..5149bd4d3a1 100644 --- a/invokeai/frontend/web/src/features/nodes/util/schema/buildFieldInputTemplate.ts +++ b/invokeai/frontend/web/src/features/nodes/util/schema/buildFieldInputTemplate.ts @@ -23,6 +23,7 @@ import type { StatelessFieldInputTemplate, StringFieldInputTemplate, T2IAdapterModelFieldInputTemplate, + T5EncoderModelFieldInputTemplate, VAEModelFieldInputTemplate, } from 'features/nodes/types/field'; import { isStatefulFieldType } from 'features/nodes/types/field'; @@ -223,6 +224,20 @@ const buildVAEModelFieldInputTemplate: FieldInputTemplateBuilder = ({ + schemaObject, + baseField, + fieldType, +}) => { + const template: T5EncoderModelFieldInputTemplate = { + ...baseField, + type: fieldType, + default: schemaObject.default ?? undefined, + }; + + return template; +}; + const buildLoRAModelFieldInputTemplate: FieldInputTemplateBuilder = ({ schemaObject, baseField, @@ -407,6 +422,7 @@ export const TEMPLATE_BUILDER_MAP: Record Date: Wed, 21 Aug 2024 14:23:39 -0400 Subject: [PATCH 059/113] tsc and lint fix --- .../web/src/features/nodes/types/common.ts | 1 + .../util/schema/buildFieldInputInstance.ts | 1 + .../frontend/web/src/services/api/schema.ts | 14240 ++++++++++++++++ .../frontend/web/src/services/api/types.ts | 2 +- 4 files changed, 14243 insertions(+), 1 deletion(-) diff --git a/invokeai/frontend/web/src/features/nodes/types/common.ts b/invokeai/frontend/web/src/features/nodes/types/common.ts index e8062713454..3fafcbce46f 100644 --- a/invokeai/frontend/web/src/features/nodes/types/common.ts +++ b/invokeai/frontend/web/src/features/nodes/types/common.ts @@ -74,6 +74,7 @@ const zModelType = z.enum([ 'clip_vision', 'spandrel_image_to_image', 't5_encoder', + 'clip_embed', ]); const zSubModelType = z.enum([ 'unet', diff --git a/invokeai/frontend/web/src/features/nodes/util/schema/buildFieldInputInstance.ts b/invokeai/frontend/web/src/features/nodes/util/schema/buildFieldInputInstance.ts index e8784a11638..8afda4e2a78 100644 --- a/invokeai/frontend/web/src/features/nodes/util/schema/buildFieldInputInstance.ts +++ b/invokeai/frontend/web/src/features/nodes/util/schema/buildFieldInputInstance.ts @@ -22,6 +22,7 @@ const FIELD_VALUE_FALLBACK_MAP: Record = SpandrelImageToImageModelField: undefined, VAEModelField: undefined, ControlNetModelField: undefined, + T5EncoderModelField: undefined, }; export const buildFieldInputInstance = (id: string, template: FieldInputTemplate): FieldInputInstance => { diff --git a/invokeai/frontend/web/src/services/api/schema.ts b/invokeai/frontend/web/src/services/api/schema.ts index 8c3849593ab..56fa7c03d2f 100644 --- a/invokeai/frontend/web/src/services/api/schema.ts +++ b/invokeai/frontend/web/src/services/api/schema.ts @@ -1381,6 +1381,7 @@ export type paths = { }; export type webhooks = Record; export type components = { +<<<<<<< HEAD schemas: { /** AddImagesToBoardResult */ AddImagesToBoardResult: { @@ -15685,6 +15686,14245 @@ export type components = { requestBodies: never; headers: never; pathItems: never; +======= + schemas: { + /** AddImagesToBoardResult */ + AddImagesToBoardResult: { + /** + * Board Id + * @description The id of the board the images were added to + */ + board_id: string; + /** + * Added Image Names + * @description The image names that were added to the board + */ + added_image_names: string[]; + }; + /** + * Add Integers + * @description Adds two numbers + */ + AddInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * A + * @description The first number + * @default 0 + */ + a?: number; + /** + * B + * @description The second number + * @default 0 + */ + b?: number; + /** + * type + * @default add + * @constant + * @enum {string} + */ + type: "add"; + }; + /** + * Alpha Mask to Tensor + * @description Convert a mask image to a tensor. Opaque regions are 1 and transparent regions are 0. + */ + AlphaMaskToTensorInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The mask image to convert. + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * Invert + * @description Whether to invert the mask. + * @default false + */ + invert?: boolean; + /** + * type + * @default alpha_mask_to_tensor + * @constant + * @enum {string} + */ + type: "alpha_mask_to_tensor"; + }; + /** + * AppConfig + * @description App Config Response + */ + AppConfig: { + /** + * Infill Methods + * @description List of available infill methods + */ + infill_methods: string[]; + /** + * Upscaling Methods + * @description List of upscaling methods + */ + upscaling_methods: components["schemas"]["Upscaler"][]; + /** + * Nsfw Methods + * @description List of NSFW checking methods + */ + nsfw_methods: string[]; + /** + * Watermarking Methods + * @description List of invisible watermark methods + */ + watermarking_methods: string[]; + }; + /** + * AppDependencyVersions + * @description App depencency Versions Response + */ + AppDependencyVersions: { + /** + * Accelerate + * @description accelerate version + */ + accelerate: string; + /** + * Compel + * @description compel version + */ + compel: string; + /** + * Cuda + * @description CUDA version + */ + cuda: string | null; + /** + * Diffusers + * @description diffusers version + */ + diffusers: string; + /** + * Numpy + * @description Numpy version + */ + numpy: string; + /** + * Opencv + * @description OpenCV version + */ + opencv: string; + /** + * Onnx + * @description ONNX version + */ + onnx: string; + /** + * Pillow + * @description Pillow (PIL) version + */ + pillow: string; + /** + * Python + * @description Python version + */ + python: string; + /** + * Torch + * @description PyTorch version + */ + torch: string; + /** + * Torchvision + * @description PyTorch Vision version + */ + torchvision: string; + /** + * Transformers + * @description transformers version + */ + transformers: string; + /** + * Xformers + * @description xformers version + */ + xformers: string | null; + }; + /** + * AppVersion + * @description App Version Response + */ + AppVersion: { + /** + * Version + * @description App version + */ + version: string; + }; + /** + * BaseMetadata + * @description Adds typing data for discriminated union. + */ + BaseMetadata: { + /** + * Name + * @description model's name + */ + name: string; + /** + * Type + * @default basemetadata + * @constant + * @enum {string} + */ + type?: "basemetadata"; + }; + /** + * BaseModelType + * @description Base model type. + * @enum {string} + */ + BaseModelType: "any" | "sd-1" | "sd-2" | "sdxl" | "sdxl-refiner" | "flux"; + /** Batch */ + Batch: { + /** + * Batch Id + * @description The ID of the batch + */ + batch_id?: string; + /** + * Data + * @description The batch data collection. + */ + data?: components["schemas"]["BatchDatum"][][] | null; + /** @description The graph to initialize the session with */ + graph: components["schemas"]["Graph"]; + /** @description The workflow to initialize the session with */ + workflow?: components["schemas"]["WorkflowWithoutID"] | null; + /** + * Runs + * @description Int stating how many times to iterate through all possible batch indices + * @default 1 + */ + runs: number; + }; + /** BatchDatum */ + BatchDatum: { + /** + * Node Path + * @description The node into which this batch data collection will be substituted. + */ + node_path: string; + /** + * Field Name + * @description The field into which this batch data collection will be substituted. + */ + field_name: string; + /** + * Items + * @description The list of items to substitute into the node/field. + */ + items?: (string | number)[]; + }; + /** + * BatchEnqueuedEvent + * @description Event model for batch_enqueued + */ + BatchEnqueuedEvent: { + /** + * Timestamp + * @description The timestamp of the event + */ + timestamp: number; + /** + * Queue Id + * @description The ID of the queue + */ + queue_id: string; + /** + * Batch Id + * @description The ID of the batch + */ + batch_id: string; + /** + * Enqueued + * @description The number of invocations enqueued + */ + enqueued: number; + /** + * Requested + * @description The number of invocations initially requested to be enqueued (may be less than enqueued if queue was full) + */ + requested: number; + /** + * Priority + * @description The priority of the batch + */ + priority: number; + }; + /** BatchStatus */ + BatchStatus: { + /** + * Queue Id + * @description The ID of the queue + */ + queue_id: string; + /** + * Batch Id + * @description The ID of the batch + */ + batch_id: string; + /** + * Pending + * @description Number of queue items with status 'pending' + */ + pending: number; + /** + * In Progress + * @description Number of queue items with status 'in_progress' + */ + in_progress: number; + /** + * Completed + * @description Number of queue items with status 'complete' + */ + completed: number; + /** + * Failed + * @description Number of queue items with status 'error' + */ + failed: number; + /** + * Canceled + * @description Number of queue items with status 'canceled' + */ + canceled: number; + /** + * Total + * @description Total number of queue items + */ + total: number; + }; + /** + * Blank Image + * @description Creates a blank image and forwards it to the pipeline + */ + BlankImageInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Width + * @description The width of the image + * @default 512 + */ + width?: number; + /** + * Height + * @description The height of the image + * @default 512 + */ + height?: number; + /** + * Mode + * @description The mode of the image + * @default RGB + * @enum {string} + */ + mode?: "RGB" | "RGBA"; + /** + * @description The color of the image + * @default { + * "r": 0, + * "g": 0, + * "b": 0, + * "a": 255 + * } + */ + color?: components["schemas"]["ColorField"]; + /** + * type + * @default blank_image + * @constant + * @enum {string} + */ + type: "blank_image"; + }; + /** + * Blend Latents + * @description Blend two latents using a given alpha. Latents must have same size. + */ + BlendLatentsInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description Latents tensor + * @default null + */ + latents_a?: components["schemas"]["LatentsField"]; + /** + * @description Latents tensor + * @default null + */ + latents_b?: components["schemas"]["LatentsField"]; + /** + * Alpha + * @description Blending factor. 0.0 = use input A only, 1.0 = use input B only, 0.5 = 50% mix of input A and input B. + * @default 0.5 + */ + alpha?: number; + /** + * type + * @default lblend + * @constant + * @enum {string} + */ + type: "lblend"; + }; + /** BoardChanges */ + BoardChanges: { + /** + * Board Name + * @description The board's new name. + */ + board_name?: string | null; + /** + * Cover Image Name + * @description The name of the board's new cover image. + */ + cover_image_name?: string | null; + /** + * Archived + * @description Whether or not the board is archived + */ + archived?: boolean | null; + }; + /** + * BoardDTO + * @description Deserialized board record with cover image URL and image count. + */ + BoardDTO: { + /** + * Board Id + * @description The unique ID of the board. + */ + board_id: string; + /** + * Board Name + * @description The name of the board. + */ + board_name: string; + /** + * Created At + * @description The created timestamp of the board. + */ + created_at: string; + /** + * Updated At + * @description The updated timestamp of the board. + */ + updated_at: string; + /** + * Deleted At + * @description The deleted timestamp of the board. + */ + deleted_at?: string | null; + /** + * Cover Image Name + * @description The name of the board's cover image. + */ + cover_image_name: string | null; + /** + * Archived + * @description Whether or not the board is archived. + */ + archived: boolean; + /** + * Is Private + * @description Whether the board is private. + */ + is_private?: boolean | null; + /** + * Image Count + * @description The number of images in the board. + */ + image_count: number; + }; + /** + * BoardField + * @description A board primitive field + */ + BoardField: { + /** + * Board Id + * @description The id of the board + */ + board_id: string; + }; + /** Body_add_image_to_board */ + Body_add_image_to_board: { + /** + * Board Id + * @description The id of the board to add to + */ + board_id: string; + /** + * Image Name + * @description The name of the image to add + */ + image_name: string; + }; + /** Body_add_images_to_board */ + Body_add_images_to_board: { + /** + * Board Id + * @description The id of the board to add to + */ + board_id: string; + /** + * Image Names + * @description The names of the images to add + */ + image_names: string[]; + }; + /** Body_cancel_by_batch_ids */ + Body_cancel_by_batch_ids: { + /** + * Batch Ids + * @description The list of batch_ids to cancel all queue items for + */ + batch_ids: string[]; + }; + /** Body_create_workflow */ + Body_create_workflow: { + /** @description The workflow to create */ + workflow: components["schemas"]["WorkflowWithoutID"]; + }; + /** Body_delete_images_from_list */ + Body_delete_images_from_list: { + /** + * Image Names + * @description The list of names of images to delete + */ + image_names: string[]; + }; + /** Body_download */ + Body_download: { + /** + * Source + * Format: uri + * @description download source + */ + source: string; + /** + * Dest + * @description download destination + */ + dest: string; + /** + * Priority + * @description queue priority + * @default 10 + */ + priority?: number; + /** + * Access Token + * @description token for authorization to download + */ + access_token?: string | null; + }; + /** Body_download_images_from_list */ + Body_download_images_from_list: { + /** + * Image Names + * @description The list of names of images to download + */ + image_names?: string[] | null; + /** + * Board Id + * @description The board from which image should be downloaded + */ + board_id?: string | null; + }; + /** Body_enqueue_batch */ + Body_enqueue_batch: { + /** @description Batch to process */ + batch: components["schemas"]["Batch"]; + /** + * Prepend + * @description Whether or not to prepend this batch in the queue + * @default false + */ + prepend?: boolean; + }; + /** Body_parse_dynamicprompts */ + Body_parse_dynamicprompts: { + /** + * Prompt + * @description The prompt to parse with dynamicprompts + */ + prompt: string; + /** + * Max Prompts + * @description The max number of prompts to generate + * @default 1000 + */ + max_prompts?: number; + /** + * Combinatorial + * @description Whether to use the combinatorial generator + * @default true + */ + combinatorial?: boolean; + }; + /** Body_remove_image_from_board */ + Body_remove_image_from_board: { + /** + * Image Name + * @description The name of the image to remove + */ + image_name: string; + }; + /** Body_remove_images_from_board */ + Body_remove_images_from_board: { + /** + * Image Names + * @description The names of the images to remove + */ + image_names: string[]; + }; + /** Body_star_images_in_list */ + Body_star_images_in_list: { + /** + * Image Names + * @description The list of names of images to star + */ + image_names: string[]; + }; + /** Body_unstar_images_in_list */ + Body_unstar_images_in_list: { + /** + * Image Names + * @description The list of names of images to unstar + */ + image_names: string[]; + }; + /** Body_update_model_image */ + Body_update_model_image: { + /** + * Image + * Format: binary + */ + image: Blob; + }; + /** Body_update_workflow */ + Body_update_workflow: { + /** @description The updated workflow */ + workflow: components["schemas"]["Workflow"]; + }; + /** Body_upload_image */ + Body_upload_image: { + /** + * File + * Format: binary + */ + file: Blob; + /** @description The metadata to associate with the image */ + metadata?: components["schemas"]["JsonValue"] | null; + }; + /** + * Boolean Collection Primitive + * @description A collection of boolean primitive values + */ + BooleanCollectionInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Collection + * @description The collection of boolean values + * @default [] + */ + collection?: boolean[]; + /** + * type + * @default boolean_collection + * @constant + * @enum {string} + */ + type: "boolean_collection"; + }; + /** + * BooleanCollectionOutput + * @description Base class for nodes that output a collection of booleans + */ + BooleanCollectionOutput: { + /** + * Collection + * @description The output boolean collection + */ + collection: boolean[]; + /** + * type + * @default boolean_collection_output + * @constant + * @enum {string} + */ + type: "boolean_collection_output"; + }; + /** + * Boolean Primitive + * @description A boolean primitive value + */ + BooleanInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Value + * @description The boolean value + * @default false + */ + value?: boolean; + /** + * type + * @default boolean + * @constant + * @enum {string} + */ + type: "boolean"; + }; + /** + * BooleanOutput + * @description Base class for nodes that output a single boolean + */ + BooleanOutput: { + /** + * Value + * @description The output boolean + */ + value: boolean; + /** + * type + * @default boolean_output + * @constant + * @enum {string} + */ + type: "boolean_output"; + }; + /** + * BoundingBoxCollectionOutput + * @description Base class for nodes that output a collection of bounding boxes + */ + BoundingBoxCollectionOutput: { + /** + * Bounding Boxes + * @description The output bounding boxes. + */ + collection: components["schemas"]["BoundingBoxField"][]; + /** + * type + * @default bounding_box_collection_output + * @constant + * @enum {string} + */ + type: "bounding_box_collection_output"; + }; + /** + * BoundingBoxField + * @description A bounding box primitive value. + */ + BoundingBoxField: { + /** + * X Min + * @description The minimum x-coordinate of the bounding box (inclusive). + */ + x_min: number; + /** + * X Max + * @description The maximum x-coordinate of the bounding box (exclusive). + */ + x_max: number; + /** + * Y Min + * @description The minimum y-coordinate of the bounding box (inclusive). + */ + y_min: number; + /** + * Y Max + * @description The maximum y-coordinate of the bounding box (exclusive). + */ + y_max: number; + /** + * Score + * @description The score associated with the bounding box. In the range [0, 1]. This value is typically set when the bounding box was produced by a detector and has an associated confidence score. + * @default null + */ + score?: number | null; + }; + /** + * Bounding Box + * @description Create a bounding box manually by supplying box coordinates + */ + BoundingBoxInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * X Min + * @description x-coordinate of the bounding box's top left vertex + * @default 0 + */ + x_min?: number; + /** + * Y Min + * @description y-coordinate of the bounding box's top left vertex + * @default 0 + */ + y_min?: number; + /** + * X Max + * @description x-coordinate of the bounding box's bottom right vertex + * @default 0 + */ + x_max?: number; + /** + * Y Max + * @description y-coordinate of the bounding box's bottom right vertex + * @default 0 + */ + y_max?: number; + /** + * type + * @default bounding_box + * @constant + * @enum {string} + */ + type: "bounding_box"; + }; + /** + * BoundingBoxOutput + * @description Base class for nodes that output a single bounding box + */ + BoundingBoxOutput: { + /** @description The output bounding box. */ + bounding_box: components["schemas"]["BoundingBoxField"]; + /** + * type + * @default bounding_box_output + * @constant + * @enum {string} + */ + type: "bounding_box_output"; + }; + /** + * BulkDownloadCompleteEvent + * @description Event model for bulk_download_complete + */ + BulkDownloadCompleteEvent: { + /** + * Timestamp + * @description The timestamp of the event + */ + timestamp: number; + /** + * Bulk Download Id + * @description The ID of the bulk image download + */ + bulk_download_id: string; + /** + * Bulk Download Item Id + * @description The ID of the bulk image download item + */ + bulk_download_item_id: string; + /** + * Bulk Download Item Name + * @description The name of the bulk image download item + */ + bulk_download_item_name: string; + }; + /** + * BulkDownloadErrorEvent + * @description Event model for bulk_download_error + */ + BulkDownloadErrorEvent: { + /** + * Timestamp + * @description The timestamp of the event + */ + timestamp: number; + /** + * Bulk Download Id + * @description The ID of the bulk image download + */ + bulk_download_id: string; + /** + * Bulk Download Item Id + * @description The ID of the bulk image download item + */ + bulk_download_item_id: string; + /** + * Bulk Download Item Name + * @description The name of the bulk image download item + */ + bulk_download_item_name: string; + /** + * Error + * @description The error message + */ + error: string; + }; + /** + * BulkDownloadStartedEvent + * @description Event model for bulk_download_started + */ + BulkDownloadStartedEvent: { + /** + * Timestamp + * @description The timestamp of the event + */ + timestamp: number; + /** + * Bulk Download Id + * @description The ID of the bulk image download + */ + bulk_download_id: string; + /** + * Bulk Download Item Id + * @description The ID of the bulk image download item + */ + bulk_download_item_id: string; + /** + * Bulk Download Item Name + * @description The name of the bulk image download item + */ + bulk_download_item_name: string; + }; + /** + * CLIPEmbedDiffusersConfig + * @description Model config for Clip Embeddings. + */ + CLIPEmbedDiffusersConfig: { + /** + * Key + * @description A unique key for this model. + */ + key: string; + /** + * Hash + * @description The hash of the model file(s). + */ + hash: string; + /** + * Path + * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. + */ + path: string; + /** + * Name + * @description Name of the model. + */ + name: string; + /** @description The base model. */ + base: components["schemas"]["BaseModelType"]; + /** + * Description + * @description Model description + */ + description?: string | null; + /** + * Source + * @description The original source of the model (path, URL or repo_id). + */ + source: string; + /** @description The type of source */ + source_type: components["schemas"]["ModelSourceType"]; + /** + * Source Api Response + * @description The original API response from the source, as stringified JSON. + */ + source_api_response?: string | null; + /** + * Cover Image + * @description Url for image to preview model + */ + cover_image?: string | null; + /** + * Format + * @default diffusers + * @constant + * @enum {string} + */ + format: "diffusers"; + /** @default */ + repo_variant?: components["schemas"]["ModelRepoVariant"] | null; + /** + * Type + * @default clip_embed + * @constant + * @enum {string} + */ + type: "clip_embed"; + }; + /** CLIPField */ + CLIPField: { + /** @description Info to load tokenizer submodel */ + tokenizer: components["schemas"]["ModelIdentifierField"]; + /** @description Info to load text_encoder submodel */ + text_encoder: components["schemas"]["ModelIdentifierField"]; + /** + * Skipped Layers + * @description Number of skipped layers in text_encoder + */ + skipped_layers: number; + /** + * Loras + * @description LoRAs to apply on model loading + */ + loras: components["schemas"]["LoRAField"][]; + }; + /** + * CLIPOutput + * @description Base class for invocations that output a CLIP field + */ + CLIPOutput: { + /** + * CLIP + * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count + */ + clip: components["schemas"]["CLIPField"]; + /** + * type + * @default clip_output + * @constant + * @enum {string} + */ + type: "clip_output"; + }; + /** + * CLIP Skip + * @description Skip layers in clip text_encoder model. + */ + CLIPSkipInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * CLIP + * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count + * @default null + */ + clip?: components["schemas"]["CLIPField"]; + /** + * Skipped Layers + * @description Number of layers to skip in text encoder + * @default 0 + */ + skipped_layers?: number; + /** + * type + * @default clip_skip + * @constant + * @enum {string} + */ + type: "clip_skip"; + }; + /** + * CLIPSkipInvocationOutput + * @description CLIP skip node output + */ + CLIPSkipInvocationOutput: { + /** + * CLIP + * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count + * @default null + */ + clip: components["schemas"]["CLIPField"] | null; + /** + * type + * @default clip_skip_output + * @constant + * @enum {string} + */ + type: "clip_skip_output"; + }; + /** + * CLIPVisionDiffusersConfig + * @description Model config for CLIPVision. + */ + CLIPVisionDiffusersConfig: { + /** + * Key + * @description A unique key for this model. + */ + key: string; + /** + * Hash + * @description The hash of the model file(s). + */ + hash: string; + /** + * Path + * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. + */ + path: string; + /** + * Name + * @description Name of the model. + */ + name: string; + /** @description The base model. */ + base: components["schemas"]["BaseModelType"]; + /** + * Description + * @description Model description + */ + description?: string | null; + /** + * Source + * @description The original source of the model (path, URL or repo_id). + */ + source: string; + /** @description The type of source */ + source_type: components["schemas"]["ModelSourceType"]; + /** + * Source Api Response + * @description The original API response from the source, as stringified JSON. + */ + source_api_response?: string | null; + /** + * Cover Image + * @description Url for image to preview model + */ + cover_image?: string | null; + /** + * Format + * @default diffusers + * @constant + * @enum {string} + */ + format: "diffusers"; + /** @default */ + repo_variant?: components["schemas"]["ModelRepoVariant"] | null; + /** + * Type + * @default clip_vision + * @constant + * @enum {string} + */ + type: "clip_vision"; + }; + /** + * CV2 Infill + * @description Infills transparent areas of an image using OpenCV Inpainting + */ + CV2InfillInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to process + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * type + * @default infill_cv2 + * @constant + * @enum {string} + */ + type: "infill_cv2"; + }; + /** + * Calculate Image Tiles Even Split + * @description Calculate the coordinates and overlaps of tiles that cover a target image shape. + */ + CalculateImageTilesEvenSplitInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Image Width + * @description The image width, in pixels, to calculate tiles for. + * @default 1024 + */ + image_width?: number; + /** + * Image Height + * @description The image height, in pixels, to calculate tiles for. + * @default 1024 + */ + image_height?: number; + /** + * Num Tiles X + * @description Number of tiles to divide image into on the x axis + * @default 2 + */ + num_tiles_x?: number; + /** + * Num Tiles Y + * @description Number of tiles to divide image into on the y axis + * @default 2 + */ + num_tiles_y?: number; + /** + * Overlap + * @description The overlap, in pixels, between adjacent tiles. + * @default 128 + */ + overlap?: number; + /** + * type + * @default calculate_image_tiles_even_split + * @constant + * @enum {string} + */ + type: "calculate_image_tiles_even_split"; + }; + /** + * Calculate Image Tiles + * @description Calculate the coordinates and overlaps of tiles that cover a target image shape. + */ + CalculateImageTilesInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Image Width + * @description The image width, in pixels, to calculate tiles for. + * @default 1024 + */ + image_width?: number; + /** + * Image Height + * @description The image height, in pixels, to calculate tiles for. + * @default 1024 + */ + image_height?: number; + /** + * Tile Width + * @description The tile width, in pixels. + * @default 576 + */ + tile_width?: number; + /** + * Tile Height + * @description The tile height, in pixels. + * @default 576 + */ + tile_height?: number; + /** + * Overlap + * @description The target overlap, in pixels, between adjacent tiles. Adjacent tiles will overlap by at least this amount + * @default 128 + */ + overlap?: number; + /** + * type + * @default calculate_image_tiles + * @constant + * @enum {string} + */ + type: "calculate_image_tiles"; + }; + /** + * Calculate Image Tiles Minimum Overlap + * @description Calculate the coordinates and overlaps of tiles that cover a target image shape. + */ + CalculateImageTilesMinimumOverlapInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Image Width + * @description The image width, in pixels, to calculate tiles for. + * @default 1024 + */ + image_width?: number; + /** + * Image Height + * @description The image height, in pixels, to calculate tiles for. + * @default 1024 + */ + image_height?: number; + /** + * Tile Width + * @description The tile width, in pixels. + * @default 576 + */ + tile_width?: number; + /** + * Tile Height + * @description The tile height, in pixels. + * @default 576 + */ + tile_height?: number; + /** + * Min Overlap + * @description Minimum overlap between adjacent tiles, in pixels. + * @default 128 + */ + min_overlap?: number; + /** + * type + * @default calculate_image_tiles_min_overlap + * @constant + * @enum {string} + */ + type: "calculate_image_tiles_min_overlap"; + }; + /** CalculateImageTilesOutput */ + CalculateImageTilesOutput: { + /** + * Tiles + * @description The tiles coordinates that cover a particular image shape. + */ + tiles: components["schemas"]["Tile"][]; + /** + * type + * @default calculate_image_tiles_output + * @constant + * @enum {string} + */ + type: "calculate_image_tiles_output"; + }; + /** + * CancelByBatchIDsResult + * @description Result of canceling by list of batch ids + */ + CancelByBatchIDsResult: { + /** + * Canceled + * @description Number of queue items canceled + */ + canceled: number; + }; + /** + * Canny Processor + * @description Canny edge detection for ControlNet + */ + CannyImageProcessorInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to process + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * Detect Resolution + * @description Pixel resolution for detection + * @default 512 + */ + detect_resolution?: number; + /** + * Image Resolution + * @description Pixel resolution for output image + * @default 512 + */ + image_resolution?: number; + /** + * Low Threshold + * @description The low threshold of the Canny pixel gradient (0-255) + * @default 100 + */ + low_threshold?: number; + /** + * High Threshold + * @description The high threshold of the Canny pixel gradient (0-255) + * @default 200 + */ + high_threshold?: number; + /** + * type + * @default canny_image_processor + * @constant + * @enum {string} + */ + type: "canny_image_processor"; + }; + /** + * Canvas Paste Back + * @description Combines two images by using the mask provided. Intended for use on the Unified Canvas. + */ + CanvasPasteBackInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The source image + * @default null + */ + source_image?: components["schemas"]["ImageField"]; + /** + * @description The target image + * @default null + */ + target_image?: components["schemas"]["ImageField"]; + /** + * @description The mask to use when pasting + * @default null + */ + mask?: components["schemas"]["ImageField"]; + /** + * Mask Blur + * @description The amount to blur the mask by + * @default 0 + */ + mask_blur?: number; + /** + * type + * @default canvas_paste_back + * @constant + * @enum {string} + */ + type: "canvas_paste_back"; + }; + /** + * Center Pad or Crop Image + * @description Pad or crop an image's sides from the center by specified pixels. Positive values are outside of the image. + */ + CenterPadCropInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to crop + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * Left + * @description Number of pixels to pad/crop from the left (negative values crop inwards, positive values pad outwards) + * @default 0 + */ + left?: number; + /** + * Right + * @description Number of pixels to pad/crop from the right (negative values crop inwards, positive values pad outwards) + * @default 0 + */ + right?: number; + /** + * Top + * @description Number of pixels to pad/crop from the top (negative values crop inwards, positive values pad outwards) + * @default 0 + */ + top?: number; + /** + * Bottom + * @description Number of pixels to pad/crop from the bottom (negative values crop inwards, positive values pad outwards) + * @default 0 + */ + bottom?: number; + /** + * type + * @default img_pad_crop + * @constant + * @enum {string} + */ + type: "img_pad_crop"; + }; + /** + * Classification + * @description The classification of an Invocation. + * - `Stable`: The invocation, including its inputs/outputs and internal logic, is stable. You may build workflows with it, having confidence that they will not break because of a change in this invocation. + * - `Beta`: The invocation is not yet stable, but is planned to be stable in the future. Workflows built around this invocation may break, but we are committed to supporting this invocation long-term. + * - `Prototype`: The invocation is not yet stable and may be removed from the application at any time. Workflows built around this invocation may break, and we are *not* committed to supporting this invocation. + * @enum {string} + */ + Classification: "stable" | "beta" | "prototype"; + /** + * ClearResult + * @description Result of clearing the session queue + */ + ClearResult: { + /** + * Deleted + * @description Number of queue items deleted + */ + deleted: number; + }; + /** + * CollectInvocation + * @description Collects values into a collection + */ + CollectInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Collection Item + * @description The item to collect (all inputs must be of the same type) + * @default null + */ + item?: unknown; + /** + * Collection + * @description The collection, will be provided on execution + * @default [] + */ + collection?: unknown[]; + /** + * type + * @default collect + * @constant + * @enum {string} + */ + type: "collect"; + }; + /** CollectInvocationOutput */ + CollectInvocationOutput: { + /** + * Collection + * @description The collection of input items + */ + collection: unknown[]; + /** + * type + * @default collect_output + * @constant + * @enum {string} + */ + type: "collect_output"; + }; + /** + * ColorCollectionOutput + * @description Base class for nodes that output a collection of colors + */ + ColorCollectionOutput: { + /** + * Collection + * @description The output colors + */ + collection: components["schemas"]["ColorField"][]; + /** + * type + * @default color_collection_output + * @constant + * @enum {string} + */ + type: "color_collection_output"; + }; + /** + * Color Correct + * @description Shifts the colors of a target image to match the reference image, optionally + * using a mask to only color-correct certain regions of the target image. + */ + ColorCorrectInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to color-correct + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * @description Reference image for color-correction + * @default null + */ + reference?: components["schemas"]["ImageField"]; + /** + * @description Mask to use when applying color-correction + * @default null + */ + mask?: components["schemas"]["ImageField"] | null; + /** + * Mask Blur Radius + * @description Mask blur radius + * @default 8 + */ + mask_blur_radius?: number; + /** + * type + * @default color_correct + * @constant + * @enum {string} + */ + type: "color_correct"; + }; + /** + * ColorField + * @description A color primitive field + */ + ColorField: { + /** + * R + * @description The red component + */ + r: number; + /** + * G + * @description The green component + */ + g: number; + /** + * B + * @description The blue component + */ + b: number; + /** + * A + * @description The alpha component + */ + a: number; + }; + /** + * Color Primitive + * @description A color primitive value + */ + ColorInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The color value + * @default { + * "r": 0, + * "g": 0, + * "b": 0, + * "a": 255 + * } + */ + color?: components["schemas"]["ColorField"]; + /** + * type + * @default color + * @constant + * @enum {string} + */ + type: "color"; + }; + /** + * Color Map Processor + * @description Generates a color map from the provided image + */ + ColorMapImageProcessorInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to process + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * Color Map Tile Size + * @description Tile size + * @default 64 + */ + color_map_tile_size?: number; + /** + * type + * @default color_map_image_processor + * @constant + * @enum {string} + */ + type: "color_map_image_processor"; + }; + /** + * ColorOutput + * @description Base class for nodes that output a single color + */ + ColorOutput: { + /** @description The output color */ + color: components["schemas"]["ColorField"]; + /** + * type + * @default color_output + * @constant + * @enum {string} + */ + type: "color_output"; + }; + /** + * Prompt + * @description Parse prompt using compel package to conditioning. + */ + CompelInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Prompt + * @description Prompt to be parsed by Compel to create a conditioning tensor + * @default + */ + prompt?: string; + /** + * CLIP + * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count + * @default null + */ + clip?: components["schemas"]["CLIPField"]; + /** + * @description A mask defining the region that this conditioning prompt applies to. + * @default null + */ + mask?: components["schemas"]["TensorField"] | null; + /** + * type + * @default compel + * @constant + * @enum {string} + */ + type: "compel"; + }; + /** + * Conditioning Collection Primitive + * @description A collection of conditioning tensor primitive values + */ + ConditioningCollectionInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Collection + * @description The collection of conditioning tensors + * @default [] + */ + collection?: components["schemas"]["ConditioningField"][]; + /** + * type + * @default conditioning_collection + * @constant + * @enum {string} + */ + type: "conditioning_collection"; + }; + /** + * ConditioningCollectionOutput + * @description Base class for nodes that output a collection of conditioning tensors + */ + ConditioningCollectionOutput: { + /** + * Collection + * @description The output conditioning tensors + */ + collection: components["schemas"]["ConditioningField"][]; + /** + * type + * @default conditioning_collection_output + * @constant + * @enum {string} + */ + type: "conditioning_collection_output"; + }; + /** + * ConditioningField + * @description A conditioning tensor primitive value + */ + ConditioningField: { + /** + * Conditioning Name + * @description The name of conditioning tensor + */ + conditioning_name: string; + /** + * @description The mask associated with this conditioning tensor. Excluded regions should be set to False, included regions should be set to True. + * @default null + */ + mask?: components["schemas"]["TensorField"] | null; + }; + /** + * Conditioning Primitive + * @description A conditioning tensor primitive value + */ + ConditioningInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description Conditioning tensor + * @default null + */ + conditioning?: components["schemas"]["ConditioningField"]; + /** + * type + * @default conditioning + * @constant + * @enum {string} + */ + type: "conditioning"; + }; + /** + * ConditioningOutput + * @description Base class for nodes that output a single conditioning tensor + */ + ConditioningOutput: { + /** @description Conditioning tensor */ + conditioning: components["schemas"]["ConditioningField"]; + /** + * type + * @default conditioning_output + * @constant + * @enum {string} + */ + type: "conditioning_output"; + }; + /** + * Content Shuffle Processor + * @description Applies content shuffle processing to image + */ + ContentShuffleImageProcessorInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to process + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * Detect Resolution + * @description Pixel resolution for detection + * @default 512 + */ + detect_resolution?: number; + /** + * Image Resolution + * @description Pixel resolution for output image + * @default 512 + */ + image_resolution?: number; + /** + * H + * @description Content shuffle `h` parameter + * @default 512 + */ + h?: number; + /** + * W + * @description Content shuffle `w` parameter + * @default 512 + */ + w?: number; + /** + * F + * @description Content shuffle `f` parameter + * @default 256 + */ + f?: number; + /** + * type + * @default content_shuffle_image_processor + * @constant + * @enum {string} + */ + type: "content_shuffle_image_processor"; + }; + /** ControlAdapterDefaultSettings */ + ControlAdapterDefaultSettings: { + /** Preprocessor */ + preprocessor: string | null; + }; + /** ControlField */ + ControlField: { + /** @description The control image */ + image: components["schemas"]["ImageField"]; + /** @description The ControlNet model to use */ + control_model: components["schemas"]["ModelIdentifierField"]; + /** + * Control Weight + * @description The weight given to the ControlNet + * @default 1 + */ + control_weight?: number | number[]; + /** + * Begin Step Percent + * @description When the ControlNet is first applied (% of total steps) + * @default 0 + */ + begin_step_percent?: number; + /** + * End Step Percent + * @description When the ControlNet is last applied (% of total steps) + * @default 1 + */ + end_step_percent?: number; + /** + * Control Mode + * @description The control mode to use + * @default balanced + * @enum {string} + */ + control_mode?: "balanced" | "more_prompt" | "more_control" | "unbalanced"; + /** + * Resize Mode + * @description The resize mode to use + * @default just_resize + * @enum {string} + */ + resize_mode?: "just_resize" | "crop_resize" | "fill_resize" | "just_resize_simple"; + }; + /** + * ControlNetCheckpointConfig + * @description Model config for ControlNet models (diffusers version). + */ + ControlNetCheckpointConfig: { + /** @description Default settings for this model */ + default_settings?: components["schemas"]["ControlAdapterDefaultSettings"] | null; + /** + * Key + * @description A unique key for this model. + */ + key: string; + /** + * Hash + * @description The hash of the model file(s). + */ + hash: string; + /** + * Path + * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. + */ + path: string; + /** + * Name + * @description Name of the model. + */ + name: string; + /** @description The base model. */ + base: components["schemas"]["BaseModelType"]; + /** + * Description + * @description Model description + */ + description?: string | null; + /** + * Source + * @description The original source of the model (path, URL or repo_id). + */ + source: string; + /** @description The type of source */ + source_type: components["schemas"]["ModelSourceType"]; + /** + * Source Api Response + * @description The original API response from the source, as stringified JSON. + */ + source_api_response?: string | null; + /** + * Cover Image + * @description Url for image to preview model + */ + cover_image?: string | null; + /** + * Format + * @description Format of the provided checkpoint model + * @default checkpoint + * @enum {string} + */ + format: "checkpoint" | "bnb_quantized_nf4b"; + /** + * Config Path + * @description path to the checkpoint model config file + */ + config_path: string; + /** + * Converted At + * @description When this model was last converted to diffusers + */ + converted_at?: number | null; + /** + * Type + * @default controlnet + * @constant + * @enum {string} + */ + type: "controlnet"; + }; + /** + * ControlNetDiffusersConfig + * @description Model config for ControlNet models (diffusers version). + */ + ControlNetDiffusersConfig: { + /** @description Default settings for this model */ + default_settings?: components["schemas"]["ControlAdapterDefaultSettings"] | null; + /** + * Key + * @description A unique key for this model. + */ + key: string; + /** + * Hash + * @description The hash of the model file(s). + */ + hash: string; + /** + * Path + * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. + */ + path: string; + /** + * Name + * @description Name of the model. + */ + name: string; + /** @description The base model. */ + base: components["schemas"]["BaseModelType"]; + /** + * Description + * @description Model description + */ + description?: string | null; + /** + * Source + * @description The original source of the model (path, URL or repo_id). + */ + source: string; + /** @description The type of source */ + source_type: components["schemas"]["ModelSourceType"]; + /** + * Source Api Response + * @description The original API response from the source, as stringified JSON. + */ + source_api_response?: string | null; + /** + * Cover Image + * @description Url for image to preview model + */ + cover_image?: string | null; + /** + * Format + * @default diffusers + * @constant + * @enum {string} + */ + format: "diffusers"; + /** @default */ + repo_variant?: components["schemas"]["ModelRepoVariant"] | null; + /** + * Type + * @default controlnet + * @constant + * @enum {string} + */ + type: "controlnet"; + }; + /** + * ControlNet + * @description Collects ControlNet info to pass to other nodes + */ + ControlNetInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The control image + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * @description ControlNet model to load + * @default null + */ + control_model?: components["schemas"]["ModelIdentifierField"]; + /** + * Control Weight + * @description The weight given to the ControlNet + * @default 1 + */ + control_weight?: number | number[]; + /** + * Begin Step Percent + * @description When the ControlNet is first applied (% of total steps) + * @default 0 + */ + begin_step_percent?: number; + /** + * End Step Percent + * @description When the ControlNet is last applied (% of total steps) + * @default 1 + */ + end_step_percent?: number; + /** + * Control Mode + * @description The control mode used + * @default balanced + * @enum {string} + */ + control_mode?: "balanced" | "more_prompt" | "more_control" | "unbalanced"; + /** + * Resize Mode + * @description The resize mode used + * @default just_resize + * @enum {string} + */ + resize_mode?: "just_resize" | "crop_resize" | "fill_resize" | "just_resize_simple"; + /** + * type + * @default controlnet + * @constant + * @enum {string} + */ + type: "controlnet"; + }; + /** ControlNetMetadataField */ + ControlNetMetadataField: { + /** @description The control image */ + image: components["schemas"]["ImageField"]; + /** + * @description The control image, after processing. + * @default null + */ + processed_image?: components["schemas"]["ImageField"] | null; + /** @description The ControlNet model to use */ + control_model: components["schemas"]["ModelIdentifierField"]; + /** + * Control Weight + * @description The weight given to the ControlNet + * @default 1 + */ + control_weight?: number | number[]; + /** + * Begin Step Percent + * @description When the ControlNet is first applied (% of total steps) + * @default 0 + */ + begin_step_percent?: number; + /** + * End Step Percent + * @description When the ControlNet is last applied (% of total steps) + * @default 1 + */ + end_step_percent?: number; + /** + * Control Mode + * @description The control mode to use + * @default balanced + * @enum {string} + */ + control_mode?: "balanced" | "more_prompt" | "more_control" | "unbalanced"; + /** + * Resize Mode + * @description The resize mode to use + * @default just_resize + * @enum {string} + */ + resize_mode?: "just_resize" | "crop_resize" | "fill_resize" | "just_resize_simple"; + }; + /** + * ControlOutput + * @description node output for ControlNet info + */ + ControlOutput: { + /** @description ControlNet(s) to apply */ + control: components["schemas"]["ControlField"]; + /** + * type + * @default control_output + * @constant + * @enum {string} + */ + type: "control_output"; + }; + /** + * Core Metadata + * @description Collects core generation metadata into a MetadataField + */ + CoreMetadataInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Generation Mode + * @description The generation mode that output this image + * @default null + */ + generation_mode?: ("txt2img" | "img2img" | "inpaint" | "outpaint" | "sdxl_txt2img" | "sdxl_img2img" | "sdxl_inpaint" | "sdxl_outpaint") | null; + /** + * Positive Prompt + * @description The positive prompt parameter + * @default null + */ + positive_prompt?: string | null; + /** + * Negative Prompt + * @description The negative prompt parameter + * @default null + */ + negative_prompt?: string | null; + /** + * Width + * @description The width parameter + * @default null + */ + width?: number | null; + /** + * Height + * @description The height parameter + * @default null + */ + height?: number | null; + /** + * Seed + * @description The seed used for noise generation + * @default null + */ + seed?: number | null; + /** + * Rand Device + * @description The device used for random number generation + * @default null + */ + rand_device?: string | null; + /** + * Cfg Scale + * @description The classifier-free guidance scale parameter + * @default null + */ + cfg_scale?: number | null; + /** + * Cfg Rescale Multiplier + * @description Rescale multiplier for CFG guidance, used for models trained with zero-terminal SNR + * @default null + */ + cfg_rescale_multiplier?: number | null; + /** + * Steps + * @description The number of steps used for inference + * @default null + */ + steps?: number | null; + /** + * Scheduler + * @description The scheduler used for inference + * @default null + */ + scheduler?: string | null; + /** + * Seamless X + * @description Whether seamless tiling was used on the X axis + * @default null + */ + seamless_x?: boolean | null; + /** + * Seamless Y + * @description Whether seamless tiling was used on the Y axis + * @default null + */ + seamless_y?: boolean | null; + /** + * Clip Skip + * @description The number of skipped CLIP layers + * @default null + */ + clip_skip?: number | null; + /** + * @description The main model used for inference + * @default null + */ + model?: components["schemas"]["ModelIdentifierField"] | null; + /** + * Controlnets + * @description The ControlNets used for inference + * @default null + */ + controlnets?: components["schemas"]["ControlNetMetadataField"][] | null; + /** + * Ipadapters + * @description The IP Adapters used for inference + * @default null + */ + ipAdapters?: components["schemas"]["IPAdapterMetadataField"][] | null; + /** + * T2Iadapters + * @description The IP Adapters used for inference + * @default null + */ + t2iAdapters?: components["schemas"]["T2IAdapterMetadataField"][] | null; + /** + * Loras + * @description The LoRAs used for inference + * @default null + */ + loras?: components["schemas"]["LoRAMetadataField"][] | null; + /** + * Strength + * @description The strength used for latents-to-latents + * @default null + */ + strength?: number | null; + /** + * Init Image + * @description The name of the initial image + * @default null + */ + init_image?: string | null; + /** + * @description The VAE used for decoding, if the main model's default was not used + * @default null + */ + vae?: components["schemas"]["ModelIdentifierField"] | null; + /** + * Hrf Enabled + * @description Whether or not high resolution fix was enabled. + * @default null + */ + hrf_enabled?: boolean | null; + /** + * Hrf Method + * @description The high resolution fix upscale method. + * @default null + */ + hrf_method?: string | null; + /** + * Hrf Strength + * @description The high resolution fix img2img strength used in the upscale pass. + * @default null + */ + hrf_strength?: number | null; + /** + * Positive Style Prompt + * @description The positive style prompt parameter + * @default null + */ + positive_style_prompt?: string | null; + /** + * Negative Style Prompt + * @description The negative style prompt parameter + * @default null + */ + negative_style_prompt?: string | null; + /** + * @description The SDXL Refiner model used + * @default null + */ + refiner_model?: components["schemas"]["ModelIdentifierField"] | null; + /** + * Refiner Cfg Scale + * @description The classifier-free guidance scale parameter used for the refiner + * @default null + */ + refiner_cfg_scale?: number | null; + /** + * Refiner Steps + * @description The number of steps used for the refiner + * @default null + */ + refiner_steps?: number | null; + /** + * Refiner Scheduler + * @description The scheduler used for the refiner + * @default null + */ + refiner_scheduler?: string | null; + /** + * Refiner Positive Aesthetic Score + * @description The aesthetic score used for the refiner + * @default null + */ + refiner_positive_aesthetic_score?: number | null; + /** + * Refiner Negative Aesthetic Score + * @description The aesthetic score used for the refiner + * @default null + */ + refiner_negative_aesthetic_score?: number | null; + /** + * Refiner Start + * @description The start value used for refiner denoising + * @default null + */ + refiner_start?: number | null; + /** + * type + * @default core_metadata + * @constant + * @enum {string} + */ + type: "core_metadata"; + [key: string]: unknown; + }; + /** + * Create Denoise Mask + * @description Creates mask for denoising model run. + */ + CreateDenoiseMaskInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description VAE + * @default null + */ + vae?: components["schemas"]["VAEField"]; + /** + * @description Image which will be masked + * @default null + */ + image?: components["schemas"]["ImageField"] | null; + /** + * @description The mask to use when pasting + * @default null + */ + mask?: components["schemas"]["ImageField"]; + /** + * Tiled + * @description Processing using overlapping tiles (reduce memory consumption) + * @default false + */ + tiled?: boolean; + /** + * Fp32 + * @description Whether or not to use full float32 precision + * @default false + */ + fp32?: boolean; + /** + * type + * @default create_denoise_mask + * @constant + * @enum {string} + */ + type: "create_denoise_mask"; + }; + /** + * Create Gradient Mask + * @description Creates mask for denoising model run. + */ + CreateGradientMaskInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description Image which will be masked + * @default null + */ + mask?: components["schemas"]["ImageField"]; + /** + * Edge Radius + * @description How far to blur/expand the edges of the mask + * @default 16 + */ + edge_radius?: number; + /** + * Coherence Mode + * @default Gaussian Blur + * @enum {string} + */ + coherence_mode?: "Gaussian Blur" | "Box Blur" | "Staged"; + /** + * Minimum Denoise + * @description Minimum denoise level for the coherence region + * @default 0 + */ + minimum_denoise?: number; + /** + * [OPTIONAL] Image + * @description OPTIONAL: Only connect for specialized Inpainting models, masked_latents will be generated from the image with the VAE + * @default null + */ + image?: components["schemas"]["ImageField"] | null; + /** + * [OPTIONAL] UNet + * @description OPTIONAL: If the Unet is a specialized Inpainting model, masked_latents will be generated from the image with the VAE + * @default null + */ + unet?: components["schemas"]["UNetField"] | null; + /** + * [OPTIONAL] VAE + * @description OPTIONAL: Only connect for specialized Inpainting models, masked_latents will be generated from the image with the VAE + * @default null + */ + vae?: components["schemas"]["VAEField"] | null; + /** + * Tiled + * @description Processing using overlapping tiles (reduce memory consumption) + * @default false + */ + tiled?: boolean; + /** + * Fp32 + * @description Whether or not to use full float32 precision + * @default false + */ + fp32?: boolean; + /** + * type + * @default create_gradient_mask + * @constant + * @enum {string} + */ + type: "create_gradient_mask"; + }; + /** + * Crop Latents + * @description Crops a latent-space tensor to a box specified in image-space. The box dimensions and coordinates must be + * divisible by the latent scale factor of 8. + */ + CropLatentsCoreInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description Latents tensor + * @default null + */ + latents?: components["schemas"]["LatentsField"]; + /** + * X + * @description The left x coordinate (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space. + * @default null + */ + x?: number; + /** + * Y + * @description The top y coordinate (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space. + * @default null + */ + y?: number; + /** + * Width + * @description The width (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space. + * @default null + */ + width?: number; + /** + * Height + * @description The height (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space. + * @default null + */ + height?: number; + /** + * type + * @default crop_latents + * @constant + * @enum {string} + */ + type: "crop_latents"; + }; + /** CursorPaginatedResults[SessionQueueItemDTO] */ + CursorPaginatedResults_SessionQueueItemDTO_: { + /** + * Limit + * @description Limit of items to get + */ + limit: number; + /** + * Has More + * @description Whether there are more items available + */ + has_more: boolean; + /** + * Items + * @description Items + */ + items: components["schemas"]["SessionQueueItemDTO"][]; + }; + /** + * OpenCV Inpaint + * @description Simple inpaint using opencv. + */ + CvInpaintInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to inpaint + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * @description The mask to use when inpainting + * @default null + */ + mask?: components["schemas"]["ImageField"]; + /** + * type + * @default cv_inpaint + * @constant + * @enum {string} + */ + type: "cv_inpaint"; + }; + /** + * DW Openpose Image Processor + * @description Generates an openpose pose from an image using DWPose + */ + DWOpenposeImageProcessorInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to process + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * Draw Body + * @default true + */ + draw_body?: boolean; + /** + * Draw Face + * @default false + */ + draw_face?: boolean; + /** + * Draw Hands + * @default false + */ + draw_hands?: boolean; + /** + * Image Resolution + * @description Pixel resolution for output image + * @default 512 + */ + image_resolution?: number; + /** + * type + * @default dw_openpose_image_processor + * @constant + * @enum {string} + */ + type: "dw_openpose_image_processor"; + }; + /** DeleteBoardResult */ + DeleteBoardResult: { + /** + * Board Id + * @description The id of the board that was deleted. + */ + board_id: string; + /** + * Deleted Board Images + * @description The image names of the board-images relationships that were deleted. + */ + deleted_board_images: string[]; + /** + * Deleted Images + * @description The names of the images that were deleted. + */ + deleted_images: string[]; + }; + /** DeleteImagesFromListResult */ + DeleteImagesFromListResult: { + /** Deleted Images */ + deleted_images: string[]; + }; + /** + * Denoise Latents + * @description Denoises noisy latents to decodable images + */ + DenoiseLatentsInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Positive Conditioning + * @description Positive conditioning tensor + * @default null + */ + positive_conditioning?: components["schemas"]["ConditioningField"] | components["schemas"]["ConditioningField"][]; + /** + * Negative Conditioning + * @description Negative conditioning tensor + * @default null + */ + negative_conditioning?: components["schemas"]["ConditioningField"] | components["schemas"]["ConditioningField"][]; + /** + * @description Noise tensor + * @default null + */ + noise?: components["schemas"]["LatentsField"] | null; + /** + * Steps + * @description Number of steps to run + * @default 10 + */ + steps?: number; + /** + * CFG Scale + * @description Classifier-Free Guidance scale + * @default 7.5 + */ + cfg_scale?: number | number[]; + /** + * Denoising Start + * @description When to start denoising, expressed a percentage of total steps + * @default 0 + */ + denoising_start?: number; + /** + * Denoising End + * @description When to stop denoising, expressed a percentage of total steps + * @default 1 + */ + denoising_end?: number; + /** + * Scheduler + * @description Scheduler to use during inference + * @default euler + * @enum {string} + */ + scheduler?: "ddim" | "ddpm" | "deis" | "deis_k" | "lms" | "lms_k" | "pndm" | "heun" | "heun_k" | "euler" | "euler_k" | "euler_a" | "kdpm_2" | "kdpm_2_k" | "kdpm_2_a" | "kdpm_2_a_k" | "dpmpp_2s" | "dpmpp_2s_k" | "dpmpp_2m" | "dpmpp_2m_k" | "dpmpp_2m_sde" | "dpmpp_2m_sde_k" | "dpmpp_3m" | "dpmpp_3m_k" | "dpmpp_sde" | "dpmpp_sde_k" | "unipc" | "unipc_k" | "lcm" | "tcd"; + /** + * UNet + * @description UNet (scheduler, LoRAs) + * @default null + */ + unet?: components["schemas"]["UNetField"]; + /** + * Control + * @default null + */ + control?: components["schemas"]["ControlField"] | components["schemas"]["ControlField"][] | null; + /** + * IP-Adapter + * @description IP-Adapter to apply + * @default null + */ + ip_adapter?: components["schemas"]["IPAdapterField"] | components["schemas"]["IPAdapterField"][] | null; + /** + * T2I-Adapter + * @description T2I-Adapter(s) to apply + * @default null + */ + t2i_adapter?: components["schemas"]["T2IAdapterField"] | components["schemas"]["T2IAdapterField"][] | null; + /** + * CFG Rescale Multiplier + * @description Rescale multiplier for CFG guidance, used for models trained with zero-terminal SNR + * @default 0 + */ + cfg_rescale_multiplier?: number; + /** + * @description Latents tensor + * @default null + */ + latents?: components["schemas"]["LatentsField"] | null; + /** + * @description The mask to use for the operation + * @default null + */ + denoise_mask?: components["schemas"]["DenoiseMaskField"] | null; + /** + * type + * @default denoise_latents + * @constant + * @enum {string} + */ + type: "denoise_latents"; + }; + /** + * DenoiseMaskField + * @description An inpaint mask field + */ + DenoiseMaskField: { + /** + * Mask Name + * @description The name of the mask image + */ + mask_name: string; + /** + * Masked Latents Name + * @description The name of the masked image latents + * @default null + */ + masked_latents_name?: string | null; + /** + * Gradient + * @description Used for gradient inpainting + * @default false + */ + gradient?: boolean; + }; + /** + * DenoiseMaskOutput + * @description Base class for nodes that output a single image + */ + DenoiseMaskOutput: { + /** @description Mask for denoise model run */ + denoise_mask: components["schemas"]["DenoiseMaskField"]; + /** + * type + * @default denoise_mask_output + * @constant + * @enum {string} + */ + type: "denoise_mask_output"; + }; + /** + * Depth Anything Processor + * @description Generates a depth map based on the Depth Anything algorithm + */ + DepthAnythingImageProcessorInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to process + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * Model Size + * @description The size of the depth model to use + * @default small + * @enum {string} + */ + model_size?: "large" | "base" | "small"; + /** + * Resolution + * @description Pixel resolution for output image + * @default 512 + */ + resolution?: number; + /** + * type + * @default depth_anything_image_processor + * @constant + * @enum {string} + */ + type: "depth_anything_image_processor"; + }; + /** + * Divide Integers + * @description Divides two numbers + */ + DivideInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * A + * @description The first number + * @default 0 + */ + a?: number; + /** + * B + * @description The second number + * @default 0 + */ + b?: number; + /** + * type + * @default div + * @constant + * @enum {string} + */ + type: "div"; + }; + /** + * DownloadCancelledEvent + * @description Event model for download_cancelled + */ + DownloadCancelledEvent: { + /** + * Timestamp + * @description The timestamp of the event + */ + timestamp: number; + /** + * Source + * @description The source of the download + */ + source: string; + }; + /** + * DownloadCompleteEvent + * @description Event model for download_complete + */ + DownloadCompleteEvent: { + /** + * Timestamp + * @description The timestamp of the event + */ + timestamp: number; + /** + * Source + * @description The source of the download + */ + source: string; + /** + * Download Path + * @description The local path where the download is saved + */ + download_path: string; + /** + * Total Bytes + * @description The total number of bytes downloaded + */ + total_bytes: number; + }; + /** + * DownloadErrorEvent + * @description Event model for download_error + */ + DownloadErrorEvent: { + /** + * Timestamp + * @description The timestamp of the event + */ + timestamp: number; + /** + * Source + * @description The source of the download + */ + source: string; + /** + * Error Type + * @description The type of error + */ + error_type: string; + /** + * Error + * @description The error message + */ + error: string; + }; + /** + * DownloadJob + * @description Class to monitor and control a model download request. + */ + DownloadJob: { + /** + * Id + * @description Numeric ID of this job + * @default -1 + */ + id?: number; + /** + * Dest + * Format: path + * @description Initial destination of downloaded model on local disk; a directory or file path + */ + dest: string; + /** + * Download Path + * @description Final location of downloaded file or directory + */ + download_path?: string | null; + /** + * @description Status of the download + * @default waiting + */ + status?: components["schemas"]["DownloadJobStatus"]; + /** + * Bytes + * @description Bytes downloaded so far + * @default 0 + */ + bytes?: number; + /** + * Total Bytes + * @description Total file size (bytes) + * @default 0 + */ + total_bytes?: number; + /** + * Error Type + * @description Name of exception that caused an error + */ + error_type?: string | null; + /** + * Error + * @description Traceback of the exception that caused an error + */ + error?: string | null; + /** + * Source + * Format: uri + * @description Where to download from. Specific types specified in child classes. + */ + source: string; + /** + * Access Token + * @description authorization token for protected resources + */ + access_token?: string | null; + /** + * Priority + * @description Queue priority; lower values are higher priority + * @default 10 + */ + priority?: number; + /** + * Job Started + * @description Timestamp for when the download job started + */ + job_started?: string | null; + /** + * Job Ended + * @description Timestamp for when the download job ende1d (completed or errored) + */ + job_ended?: string | null; + /** + * Content Type + * @description Content type of downloaded file + */ + content_type?: string | null; + }; + /** + * DownloadJobStatus + * @description State of a download job. + * @enum {string} + */ + DownloadJobStatus: "waiting" | "running" | "completed" | "cancelled" | "error"; + /** + * DownloadProgressEvent + * @description Event model for download_progress + */ + DownloadProgressEvent: { + /** + * Timestamp + * @description The timestamp of the event + */ + timestamp: number; + /** + * Source + * @description The source of the download + */ + source: string; + /** + * Download Path + * @description The local path where the download is saved + */ + download_path: string; + /** + * Current Bytes + * @description The number of bytes downloaded so far + */ + current_bytes: number; + /** + * Total Bytes + * @description The total number of bytes to be downloaded + */ + total_bytes: number; + }; + /** + * DownloadStartedEvent + * @description Event model for download_started + */ + DownloadStartedEvent: { + /** + * Timestamp + * @description The timestamp of the event + */ + timestamp: number; + /** + * Source + * @description The source of the download + */ + source: string; + /** + * Download Path + * @description The local path where the download is saved + */ + download_path: string; + }; + /** + * Dynamic Prompt + * @description Parses a prompt using adieyal/dynamicprompts' random or combinatorial generator + */ + DynamicPromptInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default false + */ + use_cache?: boolean; + /** + * Prompt + * @description The prompt to parse with dynamicprompts + * @default null + */ + prompt?: string; + /** + * Max Prompts + * @description The number of prompts to generate + * @default 1 + */ + max_prompts?: number; + /** + * Combinatorial + * @description Whether to use the combinatorial generator + * @default false + */ + combinatorial?: boolean; + /** + * type + * @default dynamic_prompt + * @constant + * @enum {string} + */ + type: "dynamic_prompt"; + }; + /** DynamicPromptsResponse */ + DynamicPromptsResponse: { + /** Prompts */ + prompts: string[]; + /** Error */ + error?: string | null; + }; + /** + * Upscale (RealESRGAN) + * @description Upscales an image using RealESRGAN. + */ + ESRGANInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The input image + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * Model Name + * @description The Real-ESRGAN model to use + * @default RealESRGAN_x4plus.pth + * @enum {string} + */ + model_name?: "RealESRGAN_x4plus.pth" | "RealESRGAN_x4plus_anime_6B.pth" | "ESRGAN_SRx4_DF2KOST_official-ff704c30.pth" | "RealESRGAN_x2plus.pth"; + /** + * Tile Size + * @description Tile size for tiled ESRGAN upscaling (0=tiling disabled) + * @default 400 + */ + tile_size?: number; + /** + * type + * @default esrgan + * @constant + * @enum {string} + */ + type: "esrgan"; + }; + /** Edge */ + Edge: { + /** @description The connection for the edge's from node and field */ + source: components["schemas"]["EdgeConnection"]; + /** @description The connection for the edge's to node and field */ + destination: components["schemas"]["EdgeConnection"]; + }; + /** EdgeConnection */ + EdgeConnection: { + /** + * Node Id + * @description The id of the node for this edge connection + */ + node_id: string; + /** + * Field + * @description The field for this connection + */ + field: string; + }; + /** EnqueueBatchResult */ + EnqueueBatchResult: { + /** + * Queue Id + * @description The ID of the queue + */ + queue_id: string; + /** + * Enqueued + * @description The total number of queue items enqueued + */ + enqueued: number; + /** + * Requested + * @description The total number of queue items requested to be enqueued + */ + requested: number; + /** @description The batch that was enqueued */ + batch: components["schemas"]["Batch"]; + /** + * Priority + * @description The priority of the enqueued batch + */ + priority: number; + }; + /** ExposedField */ + ExposedField: { + /** Nodeid */ + nodeId: string; + /** Fieldname */ + fieldName: string; + }; + /** + * FaceIdentifier + * @description Outputs an image with detected face IDs printed on each face. For use with other FaceTools. + */ + FaceIdentifierInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description Image to face detect + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * Minimum Confidence + * @description Minimum confidence for face detection (lower if detection is failing) + * @default 0.5 + */ + minimum_confidence?: number; + /** + * Chunk + * @description Whether to bypass full image face detection and default to image chunking. Chunking will occur if no faces are found in the full image. + * @default false + */ + chunk?: boolean; + /** + * type + * @default face_identifier + * @constant + * @enum {string} + */ + type: "face_identifier"; + }; + /** + * FaceMask + * @description Face mask creation using mediapipe face detection + */ + FaceMaskInvocation: { + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description Image to face detect + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * Face Ids + * @description Comma-separated list of face ids to mask eg '0,2,7'. Numbered from 0. Leave empty to mask all. Find face IDs with FaceIdentifier node. + * @default + */ + face_ids?: string; + /** + * Minimum Confidence + * @description Minimum confidence for face detection (lower if detection is failing) + * @default 0.5 + */ + minimum_confidence?: number; + /** + * X Offset + * @description Offset for the X-axis of the face mask + * @default 0 + */ + x_offset?: number; + /** + * Y Offset + * @description Offset for the Y-axis of the face mask + * @default 0 + */ + y_offset?: number; + /** + * Chunk + * @description Whether to bypass full image face detection and default to image chunking. Chunking will occur if no faces are found in the full image. + * @default false + */ + chunk?: boolean; + /** + * Invert Mask + * @description Toggle to invert the mask + * @default false + */ + invert_mask?: boolean; + /** + * type + * @default face_mask_detection + * @constant + * @enum {string} + */ + type: "face_mask_detection"; + }; + /** + * FaceMaskOutput + * @description Base class for FaceMask output + */ + FaceMaskOutput: { + /** @description The output image */ + image: components["schemas"]["ImageField"]; + /** + * Width + * @description The width of the image in pixels + */ + width: number; + /** + * Height + * @description The height of the image in pixels + */ + height: number; + /** + * type + * @default face_mask_output + * @constant + * @enum {string} + */ + type: "face_mask_output"; + /** @description The output mask */ + mask: components["schemas"]["ImageField"]; + }; + /** + * FaceOff + * @description Bound, extract, and mask a face from an image using MediaPipe detection + */ + FaceOffInvocation: { + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description Image for face detection + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * Face Id + * @description The face ID to process, numbered from 0. Multiple faces not supported. Find a face's ID with FaceIdentifier node. + * @default 0 + */ + face_id?: number; + /** + * Minimum Confidence + * @description Minimum confidence for face detection (lower if detection is failing) + * @default 0.5 + */ + minimum_confidence?: number; + /** + * X Offset + * @description X-axis offset of the mask + * @default 0 + */ + x_offset?: number; + /** + * Y Offset + * @description Y-axis offset of the mask + * @default 0 + */ + y_offset?: number; + /** + * Padding + * @description All-axis padding around the mask in pixels + * @default 0 + */ + padding?: number; + /** + * Chunk + * @description Whether to bypass full image face detection and default to image chunking. Chunking will occur if no faces are found in the full image. + * @default false + */ + chunk?: boolean; + /** + * type + * @default face_off + * @constant + * @enum {string} + */ + type: "face_off"; + }; + /** + * FaceOffOutput + * @description Base class for FaceOff Output + */ + FaceOffOutput: { + /** @description The output image */ + image: components["schemas"]["ImageField"]; + /** + * Width + * @description The width of the image in pixels + */ + width: number; + /** + * Height + * @description The height of the image in pixels + */ + height: number; + /** + * type + * @default face_off_output + * @constant + * @enum {string} + */ + type: "face_off_output"; + /** @description The output mask */ + mask: components["schemas"]["ImageField"]; + /** + * X + * @description The x coordinate of the bounding box's left side + */ + x: number; + /** + * Y + * @description The y coordinate of the bounding box's top side + */ + y: number; + }; + /** + * FieldKind + * @description The kind of field. + * - `Input`: An input field on a node. + * - `Output`: An output field on a node. + * - `Internal`: A field which is treated as an input, but cannot be used in node definitions. Metadata is + * one example. It is provided to nodes via the WithMetadata class, and we want to reserve the field name + * "metadata" for this on all nodes. `FieldKind` is used to short-circuit the field name validation logic, + * allowing "metadata" for that field. + * - `NodeAttribute`: The field is a node attribute. These are fields which are not inputs or outputs, + * but which are used to store information about the node. For example, the `id` and `type` fields are node + * attributes. + * + * The presence of this in `json_schema_extra["field_kind"]` is used when initializing node schemas on app + * startup, and when generating the OpenAPI schema for the workflow editor. + * @enum {string} + */ + FieldKind: "input" | "output" | "internal" | "node_attribute"; + /** + * Float Collection Primitive + * @description A collection of float primitive values + */ + FloatCollectionInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Collection + * @description The collection of float values + * @default [] + */ + collection?: number[]; + /** + * type + * @default float_collection + * @constant + * @enum {string} + */ + type: "float_collection"; + }; + /** + * FloatCollectionOutput + * @description Base class for nodes that output a collection of floats + */ + FloatCollectionOutput: { + /** + * Collection + * @description The float collection + */ + collection: number[]; + /** + * type + * @default float_collection_output + * @constant + * @enum {string} + */ + type: "float_collection_output"; + }; + /** + * Float Primitive + * @description A float primitive value + */ + FloatInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Value + * @description The float value + * @default 0 + */ + value?: number; + /** + * type + * @default float + * @constant + * @enum {string} + */ + type: "float"; + }; + /** + * Float Range + * @description Creates a range + */ + FloatLinearRangeInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Start + * @description The first value of the range + * @default 5 + */ + start?: number; + /** + * Stop + * @description The last value of the range + * @default 10 + */ + stop?: number; + /** + * Steps + * @description number of values to interpolate over (including start and stop) + * @default 30 + */ + steps?: number; + /** + * type + * @default float_range + * @constant + * @enum {string} + */ + type: "float_range"; + }; + /** + * Float Math + * @description Performs floating point math. + */ + FloatMathInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Operation + * @description The operation to perform + * @default ADD + * @enum {string} + */ + operation?: "ADD" | "SUB" | "MUL" | "DIV" | "EXP" | "ABS" | "SQRT" | "MIN" | "MAX"; + /** + * A + * @description The first number + * @default 1 + */ + a?: number; + /** + * B + * @description The second number + * @default 1 + */ + b?: number; + /** + * type + * @default float_math + * @constant + * @enum {string} + */ + type: "float_math"; + }; + /** + * FloatOutput + * @description Base class for nodes that output a single float + */ + FloatOutput: { + /** + * Value + * @description The output float + */ + value: number; + /** + * type + * @default float_output + * @constant + * @enum {string} + */ + type: "float_output"; + }; + /** + * Float To Integer + * @description Rounds a float number to (a multiple of) an integer. + */ + FloatToIntegerInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Value + * @description The value to round + * @default 0 + */ + value?: number; + /** + * Multiple of + * @description The multiple to round to + * @default 1 + */ + multiple?: number; + /** + * Method + * @description The method to use for rounding + * @default Nearest + * @enum {string} + */ + method?: "Nearest" | "Floor" | "Ceiling" | "Truncate"; + /** + * type + * @default float_to_int + * @constant + * @enum {string} + */ + type: "float_to_int"; + }; + /** + * Flux Main Model + * @description Loads a flux base model, outputting its submodels. + */ + FluxModelLoaderInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** @description Flux model (Transformer, VAE, CLIP) to load */ + model: components["schemas"]["ModelIdentifierField"]; + /** @description T5 tokenizer and text encoder */ + t5_encoder: components["schemas"]["ModelIdentifierField"]; + /** + * type + * @default flux_model_loader + * @constant + * @enum {string} + */ + type: "flux_model_loader"; + }; + /** + * FluxModelLoaderOutput + * @description Flux base model loader output + */ + FluxModelLoaderOutput: { + /** + * Transformer + * @description Transformer + */ + transformer: components["schemas"]["TransformerField"]; + /** + * CLIP + * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count + */ + clip: components["schemas"]["CLIPField"]; + /** + * T5 Encoder + * @description T5 tokenizer and text encoder + */ + t5_encoder: components["schemas"]["T5EncoderField"]; + /** + * VAE + * @description VAE + */ + vae: components["schemas"]["VAEField"]; + /** + * Max Seq Length + * @description VAE + * @enum {integer} + */ + max_seq_len: 256 | 512; + /** + * type + * @default flux_model_loader_output + * @constant + * @enum {string} + */ + type: "flux_model_loader_output"; + }; + /** + * FLUX Text Encoding + * @description Encodes and preps a prompt for a flux image. + */ + FluxTextEncoderInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * CLIP + * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count + * @default null + */ + clip?: components["schemas"]["CLIPField"]; + /** + * T5Encoder + * @description T5 tokenizer and text encoder + * @default null + */ + t5_encoder?: components["schemas"]["T5EncoderField"]; + /** + * Max Seq Len + * @description Max sequence length for the desired flux model + * @default null + * @enum {integer} + */ + max_seq_len?: 256 | 512; + /** + * Positive Prompt + * @description Positive prompt for text-to-image generation. + * @default null + */ + positive_prompt?: string; + /** + * type + * @default flux_text_encoder + * @constant + * @enum {string} + */ + type: "flux_text_encoder"; + }; + /** + * FLUX Text to Image + * @description Text-to-image generation using a FLUX model. + */ + FluxTextToImageInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Transformer + * @description UNet (scheduler, LoRAs) + * @default null + */ + transformer?: components["schemas"]["TransformerField"]; + /** + * @description VAE + * @default null + */ + vae?: components["schemas"]["VAEField"]; + /** + * @description Positive conditioning tensor + * @default null + */ + positive_text_conditioning?: components["schemas"]["ConditioningField"]; + /** + * Width + * @description Width of the generated image. + * @default 1024 + */ + width?: number; + /** + * Height + * @description Height of the generated image. + * @default 1024 + */ + height?: number; + /** + * Num Steps + * @description Number of diffusion steps. + * @default 4 + */ + num_steps?: number; + /** + * Guidance + * @description The guidance strength. Higher values adhere more strictly to the prompt, and will produce less diverse images. + * @default 4 + */ + guidance?: number; + /** + * Seed + * @description Randomness seed for reproducibility. + * @default 0 + */ + seed?: number; + /** + * type + * @default flux_text_to_image + * @constant + * @enum {string} + */ + type: "flux_text_to_image"; + }; + /** FoundModel */ + FoundModel: { + /** + * Path + * @description Path to the model + */ + path: string; + /** + * Is Installed + * @description Whether or not the model is already installed + */ + is_installed: boolean; + }; + /** + * FreeUConfig + * @description Configuration for the FreeU hyperparameters. + * - https://huggingface.co/docs/diffusers/main/en/using-diffusers/freeu + * - https://github.com/ChenyangSi/FreeU + */ + FreeUConfig: { + /** + * S1 + * @description Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to mitigate the "oversmoothing effect" in the enhanced denoising process. + */ + s1: number; + /** + * S2 + * @description Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to mitigate the "oversmoothing effect" in the enhanced denoising process. + */ + s2: number; + /** + * B1 + * @description Scaling factor for stage 1 to amplify the contributions of backbone features. + */ + b1: number; + /** + * B2 + * @description Scaling factor for stage 2 to amplify the contributions of backbone features. + */ + b2: number; + }; + /** + * FreeU + * @description Applies FreeU to the UNet. Suggested values (b1/b2/s1/s2): + * + * SD1.5: 1.2/1.4/0.9/0.2, + * SD2: 1.1/1.2/0.9/0.2, + * SDXL: 1.1/1.2/0.6/0.4, + */ + FreeUInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * UNet + * @description UNet (scheduler, LoRAs) + * @default null + */ + unet?: components["schemas"]["UNetField"]; + /** + * B1 + * @description Scaling factor for stage 1 to amplify the contributions of backbone features. + * @default 1.2 + */ + b1?: number; + /** + * B2 + * @description Scaling factor for stage 2 to amplify the contributions of backbone features. + * @default 1.4 + */ + b2?: number; + /** + * S1 + * @description Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to mitigate the "oversmoothing effect" in the enhanced denoising process. + * @default 0.9 + */ + s1?: number; + /** + * S2 + * @description Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to mitigate the "oversmoothing effect" in the enhanced denoising process. + * @default 0.2 + */ + s2?: number; + /** + * type + * @default freeu + * @constant + * @enum {string} + */ + type: "freeu"; + }; + /** + * GradientMaskOutput + * @description Outputs a denoise mask and an image representing the total gradient of the mask. + */ + GradientMaskOutput: { + /** @description Mask for denoise model run */ + denoise_mask: components["schemas"]["DenoiseMaskField"]; + /** @description Image representing the total gradient area of the mask. For paste-back purposes. */ + expanded_mask_area: components["schemas"]["ImageField"]; + /** + * type + * @default gradient_mask_output + * @constant + * @enum {string} + */ + type: "gradient_mask_output"; + }; + /** Graph */ + Graph: { + /** + * Id + * @description The id of this graph + */ + id?: string; + /** + * Nodes + * @description The nodes in this graph + */ + nodes?: { + [key: string]: components["schemas"]["AddInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeImageProcessorInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DepthAnythingImageProcessorInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxTextToImageInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"]; + }; + /** + * Edges + * @description The connections between nodes and their fields in this graph + */ + edges?: components["schemas"]["Edge"][]; + }; + /** + * GraphExecutionState + * @description Tracks the state of a graph execution + */ + GraphExecutionState: { + /** + * Id + * @description The id of the execution state + */ + id?: string; + /** @description The graph being executed */ + graph: components["schemas"]["Graph"]; + /** @description The expanded graph of activated and executed nodes */ + execution_graph?: components["schemas"]["Graph"]; + /** + * Executed + * @description The set of node ids that have been executed + */ + executed?: string[]; + /** + * Executed History + * @description The list of node ids that have been executed, in order of execution + */ + executed_history?: string[]; + /** + * Results + * @description The results of node executions + */ + results?: { + [key: string]: components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["BoundingBoxCollectionOutput"] | components["schemas"]["BoundingBoxOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["CLIPSkipInvocationOutput"] | components["schemas"]["CalculateImageTilesOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["FluxModelLoaderOutput"] | components["schemas"]["GradientMaskOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["IdealSizeOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["LoRALoaderOutput"] | components["schemas"]["LoRASelectorOutput"] | components["schemas"]["MaskOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["ModelIdentifierOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["PairTileImageOutput"] | components["schemas"]["SDXLLoRALoaderOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["String2Output"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["TileToPropertiesOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["VAEOutput"]; + }; + /** + * Errors + * @description Errors raised when executing nodes + */ + errors?: { + [key: string]: string; + }; + /** + * Prepared Source Mapping + * @description The map of prepared nodes to original graph nodes + */ + prepared_source_mapping?: { + [key: string]: string; + }; + /** + * Source Prepared Mapping + * @description The map of original graph nodes to prepared nodes + */ + source_prepared_mapping?: { + [key: string]: string[]; + }; + }; + /** + * Grounding DINO (Text Prompt Object Detection) + * @description Runs a Grounding DINO model. Performs zero-shot bounding-box object detection from a text prompt. + */ + GroundingDinoInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Model + * @description The Grounding DINO model to use. + * @default null + * @enum {string} + */ + model?: "grounding-dino-tiny" | "grounding-dino-base"; + /** + * Prompt + * @description The prompt describing the object to segment. + * @default null + */ + prompt?: string; + /** + * @description The image to segment. + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * Detection Threshold + * @description The detection threshold for the Grounding DINO model. All detected bounding boxes with scores above this threshold will be returned. + * @default 0.3 + */ + detection_threshold?: number; + /** + * type + * @default grounding_dino + * @constant + * @enum {string} + */ + type: "grounding_dino"; + }; + /** + * HFModelSource + * @description A HuggingFace repo_id with optional variant, sub-folder and access token. + * Note that the variant option, if not provided to the constructor, will default to fp16, which is + * what people (almost) always want. + */ + HFModelSource: { + /** Repo Id */ + repo_id: string; + /** @default fp16 */ + variant?: components["schemas"]["ModelRepoVariant"] | null; + /** Subfolder */ + subfolder?: string | null; + /** Access Token */ + access_token?: string | null; + /** + * Type + * @default hf + * @constant + * @enum {string} + */ + type?: "hf"; + }; + /** HTTPValidationError */ + HTTPValidationError: { + /** Detail */ + detail?: components["schemas"]["ValidationError"][]; + }; + /** + * HED (softedge) Processor + * @description Applies HED edge detection to image + */ + HedImageProcessorInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to process + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * Detect Resolution + * @description Pixel resolution for detection + * @default 512 + */ + detect_resolution?: number; + /** + * Image Resolution + * @description Pixel resolution for output image + * @default 512 + */ + image_resolution?: number; + /** + * Scribble + * @description Whether or not to use scribble mode + * @default false + */ + scribble?: boolean; + /** + * type + * @default hed_image_processor + * @constant + * @enum {string} + */ + type: "hed_image_processor"; + }; + /** + * Heuristic Resize + * @description Resize an image using a heuristic method. Preserves edge maps. + */ + HeuristicResizeInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to resize + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * Width + * @description The width to resize to (px) + * @default 512 + */ + width?: number; + /** + * Height + * @description The height to resize to (px) + * @default 512 + */ + height?: number; + /** + * type + * @default heuristic_resize + * @constant + * @enum {string} + */ + type: "heuristic_resize"; + }; + /** + * HuggingFaceMetadata + * @description Extended metadata fields provided by HuggingFace. + */ + HuggingFaceMetadata: { + /** + * Name + * @description model's name + */ + name: string; + /** + * Files + * @description model files and their sizes + */ + files?: components["schemas"]["RemoteModelFile"][]; + /** + * Type + * @default huggingface + * @constant + * @enum {string} + */ + type?: "huggingface"; + /** + * Id + * @description The HF model id + */ + id: string; + /** + * Api Response + * @description Response from the HF API as stringified JSON + */ + api_response?: string | null; + /** + * Is Diffusers + * @description Whether the metadata is for a Diffusers format model + * @default false + */ + is_diffusers?: boolean; + /** + * Ckpt Urls + * @description URLs for all checkpoint format models in the metadata + */ + ckpt_urls?: string[] | null; + }; + /** HuggingFaceModels */ + HuggingFaceModels: { + /** + * Urls + * @description URLs for all checkpoint format models in the metadata + */ + urls: string[] | null; + /** + * Is Diffusers + * @description Whether the metadata is for a Diffusers format model + */ + is_diffusers: boolean; + }; + /** + * IPAdapterCheckpointConfig + * @description Model config for IP Adapter checkpoint format models. + */ + IPAdapterCheckpointConfig: { + /** + * Key + * @description A unique key for this model. + */ + key: string; + /** + * Hash + * @description The hash of the model file(s). + */ + hash: string; + /** + * Path + * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. + */ + path: string; + /** + * Name + * @description Name of the model. + */ + name: string; + /** @description The base model. */ + base: components["schemas"]["BaseModelType"]; + /** + * Description + * @description Model description + */ + description?: string | null; + /** + * Source + * @description The original source of the model (path, URL or repo_id). + */ + source: string; + /** @description The type of source */ + source_type: components["schemas"]["ModelSourceType"]; + /** + * Source Api Response + * @description The original API response from the source, as stringified JSON. + */ + source_api_response?: string | null; + /** + * Cover Image + * @description Url for image to preview model + */ + cover_image?: string | null; + /** + * Type + * @default ip_adapter + * @constant + * @enum {string} + */ + type: "ip_adapter"; + /** + * Format + * @constant + * @enum {string} + */ + format: "checkpoint"; + }; + /** IPAdapterField */ + IPAdapterField: { + /** + * Image + * @description The IP-Adapter image prompt(s). + */ + image: components["schemas"]["ImageField"] | components["schemas"]["ImageField"][]; + /** @description The IP-Adapter model to use. */ + ip_adapter_model: components["schemas"]["ModelIdentifierField"]; + /** @description The name of the CLIP image encoder model. */ + image_encoder_model: components["schemas"]["ModelIdentifierField"]; + /** + * Weight + * @description The weight given to the IP-Adapter. + * @default 1 + */ + weight?: number | number[]; + /** + * Target Blocks + * @description The IP Adapter blocks to apply + * @default [] + */ + target_blocks?: string[]; + /** + * Begin Step Percent + * @description When the IP-Adapter is first applied (% of total steps) + * @default 0 + */ + begin_step_percent?: number; + /** + * End Step Percent + * @description When the IP-Adapter is last applied (% of total steps) + * @default 1 + */ + end_step_percent?: number; + /** + * @description The bool mask associated with this IP-Adapter. Excluded regions should be set to False, included regions should be set to True. + * @default null + */ + mask?: components["schemas"]["TensorField"] | null; + }; + /** + * IP-Adapter + * @description Collects IP-Adapter info to pass to other nodes. + */ + IPAdapterInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Image + * @description The IP-Adapter image prompt(s). + * @default null + */ + image?: components["schemas"]["ImageField"] | components["schemas"]["ImageField"][]; + /** + * IP-Adapter Model + * @description The IP-Adapter model. + * @default null + */ + ip_adapter_model?: components["schemas"]["ModelIdentifierField"]; + /** + * Clip Vision Model + * @description CLIP Vision model to use. Overrides model settings. Mandatory for checkpoint models. + * @default ViT-H + * @enum {string} + */ + clip_vision_model?: "ViT-H" | "ViT-G"; + /** + * Weight + * @description The weight given to the IP-Adapter + * @default 1 + */ + weight?: number | number[]; + /** + * Method + * @description The method to apply the IP-Adapter + * @default full + * @enum {string} + */ + method?: "full" | "style" | "composition"; + /** + * Begin Step Percent + * @description When the IP-Adapter is first applied (% of total steps) + * @default 0 + */ + begin_step_percent?: number; + /** + * End Step Percent + * @description When the IP-Adapter is last applied (% of total steps) + * @default 1 + */ + end_step_percent?: number; + /** + * @description A mask defining the region that this IP-Adapter applies to. + * @default null + */ + mask?: components["schemas"]["TensorField"] | null; + /** + * type + * @default ip_adapter + * @constant + * @enum {string} + */ + type: "ip_adapter"; + }; + /** + * IPAdapterInvokeAIConfig + * @description Model config for IP Adapter diffusers format models. + */ + IPAdapterInvokeAIConfig: { + /** + * Key + * @description A unique key for this model. + */ + key: string; + /** + * Hash + * @description The hash of the model file(s). + */ + hash: string; + /** + * Path + * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. + */ + path: string; + /** + * Name + * @description Name of the model. + */ + name: string; + /** @description The base model. */ + base: components["schemas"]["BaseModelType"]; + /** + * Description + * @description Model description + */ + description?: string | null; + /** + * Source + * @description The original source of the model (path, URL or repo_id). + */ + source: string; + /** @description The type of source */ + source_type: components["schemas"]["ModelSourceType"]; + /** + * Source Api Response + * @description The original API response from the source, as stringified JSON. + */ + source_api_response?: string | null; + /** + * Cover Image + * @description Url for image to preview model + */ + cover_image?: string | null; + /** + * Type + * @default ip_adapter + * @constant + * @enum {string} + */ + type: "ip_adapter"; + /** Image Encoder Model Id */ + image_encoder_model_id: string; + /** + * Format + * @constant + * @enum {string} + */ + format: "invokeai"; + }; + /** + * IPAdapterMetadataField + * @description IP Adapter Field, minus the CLIP Vision Encoder model + */ + IPAdapterMetadataField: { + /** @description The IP-Adapter image prompt. */ + image: components["schemas"]["ImageField"]; + /** @description The IP-Adapter model. */ + ip_adapter_model: components["schemas"]["ModelIdentifierField"]; + /** + * Clip Vision Model + * @description The CLIP Vision model + * @enum {string} + */ + clip_vision_model: "ViT-H" | "ViT-G"; + /** + * Method + * @description Method to apply IP Weights with + * @enum {string} + */ + method: "full" | "style" | "composition"; + /** + * Weight + * @description The weight given to the IP-Adapter + */ + weight: number | number[]; + /** + * Begin Step Percent + * @description When the IP-Adapter is first applied (% of total steps) + */ + begin_step_percent: number; + /** + * End Step Percent + * @description When the IP-Adapter is last applied (% of total steps) + */ + end_step_percent: number; + }; + /** IPAdapterOutput */ + IPAdapterOutput: { + /** + * IP-Adapter + * @description IP-Adapter to apply + */ + ip_adapter: components["schemas"]["IPAdapterField"]; + /** + * type + * @default ip_adapter_output + * @constant + * @enum {string} + */ + type: "ip_adapter_output"; + }; + /** + * Ideal Size + * @description Calculates the ideal size for generation to avoid duplication + */ + IdealSizeInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Width + * @description Final image width + * @default 1024 + */ + width?: number; + /** + * Height + * @description Final image height + * @default 576 + */ + height?: number; + /** + * @description UNet (scheduler, LoRAs) + * @default null + */ + unet?: components["schemas"]["UNetField"]; + /** + * Multiplier + * @description Amount to multiply the model's dimensions by when calculating the ideal size (may result in initial generation artifacts if too large) + * @default 1 + */ + multiplier?: number; + /** + * type + * @default ideal_size + * @constant + * @enum {string} + */ + type: "ideal_size"; + }; + /** + * IdealSizeOutput + * @description Base class for invocations that output an image + */ + IdealSizeOutput: { + /** + * Width + * @description The ideal width of the image (in pixels) + */ + width: number; + /** + * Height + * @description The ideal height of the image (in pixels) + */ + height: number; + /** + * type + * @default ideal_size_output + * @constant + * @enum {string} + */ + type: "ideal_size_output"; + }; + /** + * Blur Image + * @description Blurs an image + */ + ImageBlurInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to blur + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * Radius + * @description The blur radius + * @default 8 + */ + radius?: number; + /** + * Blur Type + * @description The type of blur + * @default gaussian + * @enum {string} + */ + blur_type?: "gaussian" | "box"; + /** + * type + * @default img_blur + * @constant + * @enum {string} + */ + type: "img_blur"; + }; + /** + * ImageCategory + * @description The category of an image. + * + * - GENERAL: The image is an output, init image, or otherwise an image without a specialized purpose. + * - MASK: The image is a mask image. + * - CONTROL: The image is a ControlNet control image. + * - USER: The image is a user-provide image. + * - OTHER: The image is some other type of image with a specialized purpose. To be used by external nodes. + * @enum {string} + */ + ImageCategory: "general" | "mask" | "control" | "user" | "other"; + /** + * Extract Image Channel + * @description Gets a channel from an image. + */ + ImageChannelInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to get the channel from + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * Channel + * @description The channel to get + * @default A + * @enum {string} + */ + channel?: "A" | "R" | "G" | "B"; + /** + * type + * @default img_chan + * @constant + * @enum {string} + */ + type: "img_chan"; + }; + /** + * Multiply Image Channel + * @description Scale a specific color channel of an image. + */ + ImageChannelMultiplyInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to adjust + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * Channel + * @description Which channel to adjust + * @default null + * @enum {string} + */ + channel?: "Red (RGBA)" | "Green (RGBA)" | "Blue (RGBA)" | "Alpha (RGBA)" | "Cyan (CMYK)" | "Magenta (CMYK)" | "Yellow (CMYK)" | "Black (CMYK)" | "Hue (HSV)" | "Saturation (HSV)" | "Value (HSV)" | "Luminosity (LAB)" | "A (LAB)" | "B (LAB)" | "Y (YCbCr)" | "Cb (YCbCr)" | "Cr (YCbCr)"; + /** + * Scale + * @description The amount to scale the channel by. + * @default 1 + */ + scale?: number; + /** + * Invert Channel + * @description Invert the channel after scaling + * @default false + */ + invert_channel?: boolean; + /** + * type + * @default img_channel_multiply + * @constant + * @enum {string} + */ + type: "img_channel_multiply"; + }; + /** + * Offset Image Channel + * @description Add or subtract a value from a specific color channel of an image. + */ + ImageChannelOffsetInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to adjust + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * Channel + * @description Which channel to adjust + * @default null + * @enum {string} + */ + channel?: "Red (RGBA)" | "Green (RGBA)" | "Blue (RGBA)" | "Alpha (RGBA)" | "Cyan (CMYK)" | "Magenta (CMYK)" | "Yellow (CMYK)" | "Black (CMYK)" | "Hue (HSV)" | "Saturation (HSV)" | "Value (HSV)" | "Luminosity (LAB)" | "A (LAB)" | "B (LAB)" | "Y (YCbCr)" | "Cb (YCbCr)" | "Cr (YCbCr)"; + /** + * Offset + * @description The amount to adjust the channel by + * @default 0 + */ + offset?: number; + /** + * type + * @default img_channel_offset + * @constant + * @enum {string} + */ + type: "img_channel_offset"; + }; + /** + * Image Collection Primitive + * @description A collection of image primitive values + */ + ImageCollectionInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Collection + * @description The collection of image values + * @default null + */ + collection?: components["schemas"]["ImageField"][]; + /** + * type + * @default image_collection + * @constant + * @enum {string} + */ + type: "image_collection"; + }; + /** + * ImageCollectionOutput + * @description Base class for nodes that output a collection of images + */ + ImageCollectionOutput: { + /** + * Collection + * @description The output images + */ + collection: components["schemas"]["ImageField"][]; + /** + * type + * @default image_collection_output + * @constant + * @enum {string} + */ + type: "image_collection_output"; + }; + /** + * Convert Image Mode + * @description Converts an image to a different mode. + */ + ImageConvertInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to convert + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * Mode + * @description The mode to convert to + * @default L + * @enum {string} + */ + mode?: "L" | "RGB" | "RGBA" | "CMYK" | "YCbCr" | "LAB" | "HSV" | "I" | "F"; + /** + * type + * @default img_conv + * @constant + * @enum {string} + */ + type: "img_conv"; + }; + /** + * Crop Image + * @description Crops an image to a specified box. The box can be outside of the image. + */ + ImageCropInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to crop + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * X + * @description The left x coordinate of the crop rectangle + * @default 0 + */ + x?: number; + /** + * Y + * @description The top y coordinate of the crop rectangle + * @default 0 + */ + y?: number; + /** + * Width + * @description The width of the crop rectangle + * @default 512 + */ + width?: number; + /** + * Height + * @description The height of the crop rectangle + * @default 512 + */ + height?: number; + /** + * type + * @default img_crop + * @constant + * @enum {string} + */ + type: "img_crop"; + }; + /** + * ImageDTO + * @description Deserialized image record, enriched for the frontend. + */ + ImageDTO: { + /** + * Image Name + * @description The unique name of the image. + */ + image_name: string; + /** + * Image Url + * @description The URL of the image. + */ + image_url: string; + /** + * Thumbnail Url + * @description The URL of the image's thumbnail. + */ + thumbnail_url: string; + /** @description The type of the image. */ + image_origin: components["schemas"]["ResourceOrigin"]; + /** @description The category of the image. */ + image_category: components["schemas"]["ImageCategory"]; + /** + * Width + * @description The width of the image in px. + */ + width: number; + /** + * Height + * @description The height of the image in px. + */ + height: number; + /** + * Created At + * @description The created timestamp of the image. + */ + created_at: string; + /** + * Updated At + * @description The updated timestamp of the image. + */ + updated_at: string; + /** + * Deleted At + * @description The deleted timestamp of the image. + */ + deleted_at?: string | null; + /** + * Is Intermediate + * @description Whether this is an intermediate image. + */ + is_intermediate: boolean; + /** + * Session Id + * @description The session ID that generated this image, if it is a generated image. + */ + session_id?: string | null; + /** + * Node Id + * @description The node ID that generated this image, if it is a generated image. + */ + node_id?: string | null; + /** + * Starred + * @description Whether this image is starred. + */ + starred: boolean; + /** + * Has Workflow + * @description Whether this image has a workflow. + */ + has_workflow: boolean; + /** + * Board Id + * @description The id of the board the image belongs to, if one exists. + */ + board_id?: string | null; + }; + /** + * ImageField + * @description An image primitive field + */ + ImageField: { + /** + * Image Name + * @description The name of the image + */ + image_name: string; + }; + /** + * Adjust Image Hue + * @description Adjusts the Hue of an image. + */ + ImageHueAdjustmentInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to adjust + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * Hue + * @description The degrees by which to rotate the hue, 0-360 + * @default 0 + */ + hue?: number; + /** + * type + * @default img_hue_adjust + * @constant + * @enum {string} + */ + type: "img_hue_adjust"; + }; + /** + * Inverse Lerp Image + * @description Inverse linear interpolation of all pixels of an image + */ + ImageInverseLerpInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to lerp + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * Min + * @description The minimum input value + * @default 0 + */ + min?: number; + /** + * Max + * @description The maximum input value + * @default 255 + */ + max?: number; + /** + * type + * @default img_ilerp + * @constant + * @enum {string} + */ + type: "img_ilerp"; + }; + /** + * Image Primitive + * @description An image primitive value + */ + ImageInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to load + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * type + * @default image + * @constant + * @enum {string} + */ + type: "image"; + }; + /** + * Lerp Image + * @description Linear interpolation of all pixels of an image + */ + ImageLerpInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to lerp + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * Min + * @description The minimum output value + * @default 0 + */ + min?: number; + /** + * Max + * @description The maximum output value + * @default 255 + */ + max?: number; + /** + * type + * @default img_lerp + * @constant + * @enum {string} + */ + type: "img_lerp"; + }; + /** + * Image Mask to Tensor + * @description Convert a mask image to a tensor. Converts the image to grayscale and uses thresholding at the specified value. + */ + ImageMaskToTensorInvocation: { + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The mask image to convert. + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * Cutoff + * @description Cutoff (<) + * @default 128 + */ + cutoff?: number; + /** + * Invert + * @description Whether to invert the mask. + * @default false + */ + invert?: boolean; + /** + * type + * @default image_mask_to_tensor + * @constant + * @enum {string} + */ + type: "image_mask_to_tensor"; + }; + /** + * Multiply Images + * @description Multiplies two images together using `PIL.ImageChops.multiply()`. + */ + ImageMultiplyInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The first image to multiply + * @default null + */ + image1?: components["schemas"]["ImageField"]; + /** + * @description The second image to multiply + * @default null + */ + image2?: components["schemas"]["ImageField"]; + /** + * type + * @default img_mul + * @constant + * @enum {string} + */ + type: "img_mul"; + }; + /** + * Blur NSFW Image + * @description Add blur to NSFW-flagged images + */ + ImageNSFWBlurInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to check + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * type + * @default img_nsfw + * @constant + * @enum {string} + */ + type: "img_nsfw"; + }; + /** + * ImageOutput + * @description Base class for nodes that output a single image + */ + ImageOutput: { + /** @description The output image */ + image: components["schemas"]["ImageField"]; + /** + * Width + * @description The width of the image in pixels + */ + width: number; + /** + * Height + * @description The height of the image in pixels + */ + height: number; + /** + * type + * @default image_output + * @constant + * @enum {string} + */ + type: "image_output"; + }; + /** + * Paste Image + * @description Pastes an image into another image. + */ + ImagePasteInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The base image + * @default null + */ + base_image?: components["schemas"]["ImageField"]; + /** + * @description The image to paste + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * @description The mask to use when pasting + * @default null + */ + mask?: components["schemas"]["ImageField"] | null; + /** + * X + * @description The left x coordinate at which to paste the image + * @default 0 + */ + x?: number; + /** + * Y + * @description The top y coordinate at which to paste the image + * @default 0 + */ + y?: number; + /** + * Crop + * @description Crop to base image dimensions + * @default false + */ + crop?: boolean; + /** + * type + * @default img_paste + * @constant + * @enum {string} + */ + type: "img_paste"; + }; + /** + * ImageRecordChanges + * @description A set of changes to apply to an image record. + * + * Only limited changes are valid: + * - `image_category`: change the category of an image + * - `session_id`: change the session associated with an image + * - `is_intermediate`: change the image's `is_intermediate` flag + * - `starred`: change whether the image is starred + */ + ImageRecordChanges: { + /** @description The image's new category. */ + image_category?: components["schemas"]["ImageCategory"] | null; + /** + * Session Id + * @description The image's new session ID. + */ + session_id?: string | null; + /** + * Is Intermediate + * @description The image's new `is_intermediate` flag. + */ + is_intermediate?: boolean | null; + /** + * Starred + * @description The image's new `starred` state + */ + starred?: boolean | null; + [key: string]: unknown; + }; + /** + * Resize Image + * @description Resizes an image to specific dimensions + */ + ImageResizeInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to resize + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * Width + * @description The width to resize to (px) + * @default 512 + */ + width?: number; + /** + * Height + * @description The height to resize to (px) + * @default 512 + */ + height?: number; + /** + * Resample Mode + * @description The resampling mode + * @default bicubic + * @enum {string} + */ + resample_mode?: "nearest" | "box" | "bilinear" | "hamming" | "bicubic" | "lanczos"; + /** + * type + * @default img_resize + * @constant + * @enum {string} + */ + type: "img_resize"; + }; + /** + * Scale Image + * @description Scales an image by a factor + */ + ImageScaleInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to scale + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * Scale Factor + * @description The factor by which to scale the image + * @default 2 + */ + scale_factor?: number; + /** + * Resample Mode + * @description The resampling mode + * @default bicubic + * @enum {string} + */ + resample_mode?: "nearest" | "box" | "bilinear" | "hamming" | "bicubic" | "lanczos"; + /** + * type + * @default img_scale + * @constant + * @enum {string} + */ + type: "img_scale"; + }; + /** + * Image to Latents + * @description Encodes an image into latents. + */ + ImageToLatentsInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to encode + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * @description VAE + * @default null + */ + vae?: components["schemas"]["VAEField"]; + /** + * Tiled + * @description Processing using overlapping tiles (reduce memory consumption) + * @default false + */ + tiled?: boolean; + /** + * Tile Size + * @description The tile size for VAE tiling in pixels (image space). If set to 0, the default tile size for the model will be used. Larger tile sizes generally produce better results at the cost of higher memory usage. + * @default 0 + */ + tile_size?: number; + /** + * Fp32 + * @description Whether or not to use full float32 precision + * @default false + */ + fp32?: boolean; + /** + * type + * @default i2l + * @constant + * @enum {string} + */ + type: "i2l"; + }; + /** + * ImageUrlsDTO + * @description The URLs for an image and its thumbnail. + */ + ImageUrlsDTO: { + /** + * Image Name + * @description The unique name of the image. + */ + image_name: string; + /** + * Image Url + * @description The URL of the image. + */ + image_url: string; + /** + * Thumbnail Url + * @description The URL of the image's thumbnail. + */ + thumbnail_url: string; + }; + /** + * Add Invisible Watermark + * @description Add an invisible watermark to an image + */ + ImageWatermarkInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to check + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * Text + * @description Watermark text + * @default InvokeAI + */ + text?: string; + /** + * type + * @default img_watermark + * @constant + * @enum {string} + */ + type: "img_watermark"; + }; + /** ImagesDownloaded */ + ImagesDownloaded: { + /** + * Response + * @description The message to display to the user when images begin downloading + */ + response?: string | null; + /** + * Bulk Download Item Name + * @description The name of the bulk download item for which events will be emitted + */ + bulk_download_item_name?: string | null; + }; + /** ImagesUpdatedFromListResult */ + ImagesUpdatedFromListResult: { + /** + * Updated Image Names + * @description The image names that were updated + */ + updated_image_names: string[]; + }; + /** + * Solid Color Infill + * @description Infills transparent areas of an image with a solid color + */ + InfillColorInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to process + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * @description The color to use to infill + * @default { + * "r": 127, + * "g": 127, + * "b": 127, + * "a": 255 + * } + */ + color?: components["schemas"]["ColorField"]; + /** + * type + * @default infill_rgba + * @constant + * @enum {string} + */ + type: "infill_rgba"; + }; + /** + * PatchMatch Infill + * @description Infills transparent areas of an image using the PatchMatch algorithm + */ + InfillPatchMatchInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to process + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * Downscale + * @description Run patchmatch on downscaled image to speedup infill + * @default 2 + */ + downscale?: number; + /** + * Resample Mode + * @description The resampling mode + * @default bicubic + * @enum {string} + */ + resample_mode?: "nearest" | "box" | "bilinear" | "hamming" | "bicubic" | "lanczos"; + /** + * type + * @default infill_patchmatch + * @constant + * @enum {string} + */ + type: "infill_patchmatch"; + }; + /** + * Tile Infill + * @description Infills transparent areas of an image with tiles of the image + */ + InfillTileInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to process + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * Tile Size + * @description The tile size (px) + * @default 32 + */ + tile_size?: number; + /** + * Seed + * @description The seed to use for tile generation (omit for random) + * @default 0 + */ + seed?: number; + /** + * type + * @default infill_tile + * @constant + * @enum {string} + */ + type: "infill_tile"; + }; + /** + * Input + * @description The type of input a field accepts. + * - `Input.Direct`: The field must have its value provided directly, when the invocation and field are instantiated. + * - `Input.Connection`: The field must have its value provided by a connection. + * - `Input.Any`: The field may have its value provided either directly or by a connection. + * @enum {string} + */ + Input: "connection" | "direct" | "any"; + /** + * InputFieldJSONSchemaExtra + * @description Extra attributes to be added to input fields and their OpenAPI schema. Used during graph execution, + * and by the workflow editor during schema parsing and UI rendering. + */ + InputFieldJSONSchemaExtra: { + input: components["schemas"]["Input"]; + /** Orig Required */ + orig_required: boolean; + field_kind: components["schemas"]["FieldKind"]; + /** + * Default + * @default null + */ + default: unknown; + /** + * Orig Default + * @default null + */ + orig_default: unknown; + /** + * Ui Hidden + * @default false + */ + ui_hidden: boolean; + /** @default null */ + ui_type: components["schemas"]["UIType"] | null; + /** @default null */ + ui_component: components["schemas"]["UIComponent"] | null; + /** + * Ui Order + * @default null + */ + ui_order: number | null; + /** + * Ui Choice Labels + * @default null + */ + ui_choice_labels: { + [key: string]: string; + } | null; + }; + /** + * InstallStatus + * @description State of an install job running in the background. + * @enum {string} + */ + InstallStatus: "waiting" | "downloading" | "downloads_done" | "running" | "completed" | "error" | "cancelled"; + /** + * Integer Collection Primitive + * @description A collection of integer primitive values + */ + IntegerCollectionInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Collection + * @description The collection of integer values + * @default [] + */ + collection?: number[]; + /** + * type + * @default integer_collection + * @constant + * @enum {string} + */ + type: "integer_collection"; + }; + /** + * IntegerCollectionOutput + * @description Base class for nodes that output a collection of integers + */ + IntegerCollectionOutput: { + /** + * Collection + * @description The int collection + */ + collection: number[]; + /** + * type + * @default integer_collection_output + * @constant + * @enum {string} + */ + type: "integer_collection_output"; + }; + /** + * Integer Primitive + * @description An integer primitive value + */ + IntegerInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Value + * @description The integer value + * @default 0 + */ + value?: number; + /** + * type + * @default integer + * @constant + * @enum {string} + */ + type: "integer"; + }; + /** + * Integer Math + * @description Performs integer math. + */ + IntegerMathInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Operation + * @description The operation to perform + * @default ADD + * @enum {string} + */ + operation?: "ADD" | "SUB" | "MUL" | "DIV" | "EXP" | "MOD" | "ABS" | "MIN" | "MAX"; + /** + * A + * @description The first number + * @default 1 + */ + a?: number; + /** + * B + * @description The second number + * @default 1 + */ + b?: number; + /** + * type + * @default integer_math + * @constant + * @enum {string} + */ + type: "integer_math"; + }; + /** + * IntegerOutput + * @description Base class for nodes that output a single integer + */ + IntegerOutput: { + /** + * Value + * @description The output integer + */ + value: number; + /** + * type + * @default integer_output + * @constant + * @enum {string} + */ + type: "integer_output"; + }; + /** + * Invert Tensor Mask + * @description Inverts a tensor mask. + */ + InvertTensorMaskInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The tensor mask to convert. + * @default null + */ + mask?: components["schemas"]["TensorField"]; + /** + * type + * @default invert_tensor_mask + * @constant + * @enum {string} + */ + type: "invert_tensor_mask"; + }; + /** InvocationCacheStatus */ + InvocationCacheStatus: { + /** + * Size + * @description The current size of the invocation cache + */ + size: number; + /** + * Hits + * @description The number of cache hits + */ + hits: number; + /** + * Misses + * @description The number of cache misses + */ + misses: number; + /** + * Enabled + * @description Whether the invocation cache is enabled + */ + enabled: boolean; + /** + * Max Size + * @description The maximum size of the invocation cache + */ + max_size: number; + }; + /** + * InvocationCompleteEvent + * @description Event model for invocation_complete + */ + InvocationCompleteEvent: { + /** + * Timestamp + * @description The timestamp of the event + */ + timestamp: number; + /** + * Queue Id + * @description The ID of the queue + */ + queue_id: string; + /** + * Item Id + * @description The ID of the queue item + */ + item_id: number; + /** + * Batch Id + * @description The ID of the queue batch + */ + batch_id: string; + /** + * Session Id + * @description The ID of the session (aka graph execution state) + */ + session_id: string; + /** + * Invocation + * @description The ID of the invocation + */ + invocation: components["schemas"]["AddInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeImageProcessorInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DepthAnythingImageProcessorInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxTextToImageInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"]; + /** + * Invocation Source Id + * @description The ID of the prepared invocation's source node + */ + invocation_source_id: string; + /** + * Result + * @description The result of the invocation + */ + result: components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["BoundingBoxCollectionOutput"] | components["schemas"]["BoundingBoxOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["CLIPSkipInvocationOutput"] | components["schemas"]["CalculateImageTilesOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["FluxModelLoaderOutput"] | components["schemas"]["GradientMaskOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["IdealSizeOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["LoRALoaderOutput"] | components["schemas"]["LoRASelectorOutput"] | components["schemas"]["MaskOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["ModelIdentifierOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["PairTileImageOutput"] | components["schemas"]["SDXLLoRALoaderOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["String2Output"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["TileToPropertiesOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["VAEOutput"]; + }; + /** + * InvocationDenoiseProgressEvent + * @description Event model for invocation_denoise_progress + */ + InvocationDenoiseProgressEvent: { + /** + * Timestamp + * @description The timestamp of the event + */ + timestamp: number; + /** + * Queue Id + * @description The ID of the queue + */ + queue_id: string; + /** + * Item Id + * @description The ID of the queue item + */ + item_id: number; + /** + * Batch Id + * @description The ID of the queue batch + */ + batch_id: string; + /** + * Session Id + * @description The ID of the session (aka graph execution state) + */ + session_id: string; + /** + * Invocation + * @description The ID of the invocation + */ + invocation: components["schemas"]["AddInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeImageProcessorInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DepthAnythingImageProcessorInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxTextToImageInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"]; + /** + * Invocation Source Id + * @description The ID of the prepared invocation's source node + */ + invocation_source_id: string; + /** @description The progress image sent at each step during processing */ + progress_image: components["schemas"]["ProgressImage"]; + /** + * Step + * @description The current step of the invocation + */ + step: number; + /** + * Total Steps + * @description The total number of steps in the invocation + */ + total_steps: number; + /** + * Order + * @description The order of the invocation in the session + */ + order: number; + /** + * Percentage + * @description The percentage of completion of the invocation + */ + percentage: number; + }; + /** + * InvocationErrorEvent + * @description Event model for invocation_error + */ + InvocationErrorEvent: { + /** + * Timestamp + * @description The timestamp of the event + */ + timestamp: number; + /** + * Queue Id + * @description The ID of the queue + */ + queue_id: string; + /** + * Item Id + * @description The ID of the queue item + */ + item_id: number; + /** + * Batch Id + * @description The ID of the queue batch + */ + batch_id: string; + /** + * Session Id + * @description The ID of the session (aka graph execution state) + */ + session_id: string; + /** + * Invocation + * @description The ID of the invocation + */ + invocation: components["schemas"]["AddInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeImageProcessorInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DepthAnythingImageProcessorInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxTextToImageInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"]; + /** + * Invocation Source Id + * @description The ID of the prepared invocation's source node + */ + invocation_source_id: string; + /** + * Error Type + * @description The error type + */ + error_type: string; + /** + * Error Message + * @description The error message + */ + error_message: string; + /** + * Error Traceback + * @description The error traceback + */ + error_traceback: string; + /** + * User Id + * @description The ID of the user who created the invocation + * @default null + */ + user_id: string | null; + /** + * Project Id + * @description The ID of the user who created the invocation + * @default null + */ + project_id: string | null; + }; + InvocationOutputMap: { + img_conv: components["schemas"]["ImageOutput"]; + mlsd_image_processor: components["schemas"]["ImageOutput"]; + canny_image_processor: components["schemas"]["ImageOutput"]; + alpha_mask_to_tensor: components["schemas"]["MaskOutput"]; + img_watermark: components["schemas"]["ImageOutput"]; + img_mul: components["schemas"]["ImageOutput"]; + float_to_int: components["schemas"]["IntegerOutput"]; + normalbae_image_processor: components["schemas"]["ImageOutput"]; + img_channel_multiply: components["schemas"]["ImageOutput"]; + string_split: components["schemas"]["String2Output"]; + grounding_dino: components["schemas"]["BoundingBoxCollectionOutput"]; + scheduler: components["schemas"]["SchedulerOutput"]; + float_range: components["schemas"]["FloatCollectionOutput"]; + metadata_item: components["schemas"]["MetadataItemOutput"]; + rand_float: components["schemas"]["FloatOutput"]; + infill_lama: components["schemas"]["ImageOutput"]; + img_resize: components["schemas"]["ImageOutput"]; + string_replace: components["schemas"]["StringOutput"]; + compel: components["schemas"]["ConditioningOutput"]; + segment_anything: components["schemas"]["MaskOutput"]; + flux_text_encoder: components["schemas"]["ConditioningOutput"]; + cv_inpaint: components["schemas"]["ImageOutput"]; + image_collection: components["schemas"]["ImageCollectionOutput"]; + pidi_image_processor: components["schemas"]["ImageOutput"]; + infill_tile: components["schemas"]["ImageOutput"]; + unsharp_mask: components["schemas"]["ImageOutput"]; + string_join: components["schemas"]["StringOutput"]; + blank_image: components["schemas"]["ImageOutput"]; + save_image: components["schemas"]["ImageOutput"]; + show_image: components["schemas"]["ImageOutput"]; + string_split_neg: components["schemas"]["StringPosNegOutput"]; + face_mask_detection: components["schemas"]["FaceMaskOutput"]; + conditioning: components["schemas"]["ConditioningOutput"]; + metadata: components["schemas"]["MetadataOutput"]; + collect: components["schemas"]["CollectInvocationOutput"]; + pair_tile_image: components["schemas"]["PairTileImageOutput"]; + spandrel_image_to_image: components["schemas"]["ImageOutput"]; + lora_selector: components["schemas"]["LoRASelectorOutput"]; + face_identifier: components["schemas"]["ImageOutput"]; + depth_anything_image_processor: components["schemas"]["ImageOutput"]; + boolean_collection: components["schemas"]["BooleanCollectionOutput"]; + tensor_mask_to_image: components["schemas"]["ImageOutput"]; + esrgan: components["schemas"]["ImageOutput"]; + img_nsfw: components["schemas"]["ImageOutput"]; + calculate_image_tiles: components["schemas"]["CalculateImageTilesOutput"]; + img_scale: components["schemas"]["ImageOutput"]; + lora_collection_loader: components["schemas"]["LoRALoaderOutput"]; + img_pad_crop: components["schemas"]["ImageOutput"]; + create_gradient_mask: components["schemas"]["GradientMaskOutput"]; + leres_image_processor: components["schemas"]["ImageOutput"]; + img_blur: components["schemas"]["ImageOutput"]; + main_model_loader: components["schemas"]["ModelLoaderOutput"]; + dynamic_prompt: components["schemas"]["StringCollectionOutput"]; + lblend: components["schemas"]["LatentsOutput"]; + rectangle_mask: components["schemas"]["MaskOutput"]; + add: components["schemas"]["IntegerOutput"]; + img_lerp: components["schemas"]["ImageOutput"]; + sdxl_refiner_model_loader: components["schemas"]["SDXLRefinerModelLoaderOutput"]; + flux_text_to_image: components["schemas"]["ImageOutput"]; + lresize: components["schemas"]["LatentsOutput"]; + dw_openpose_image_processor: components["schemas"]["ImageOutput"]; + noise: components["schemas"]["NoiseOutput"]; + canvas_paste_back: components["schemas"]["ImageOutput"]; + range: components["schemas"]["IntegerCollectionOutput"]; + color: components["schemas"]["ColorOutput"]; + sub: components["schemas"]["IntegerOutput"]; + ip_adapter: components["schemas"]["IPAdapterOutput"]; + crop_latents: components["schemas"]["LatentsOutput"]; + latents: components["schemas"]["LatentsOutput"]; + color_map_image_processor: components["schemas"]["ImageOutput"]; + float: components["schemas"]["FloatOutput"]; + infill_cv2: components["schemas"]["ImageOutput"]; + lscale: components["schemas"]["LatentsOutput"]; + denoise_latents: components["schemas"]["LatentsOutput"]; + string_collection: components["schemas"]["StringCollectionOutput"]; + zoe_depth_image_processor: components["schemas"]["ImageOutput"]; + mul: components["schemas"]["IntegerOutput"]; + clip_skip: components["schemas"]["CLIPSkipInvocationOutput"]; + sdxl_lora_loader: components["schemas"]["SDXLLoRALoaderOutput"]; + sdxl_model_loader: components["schemas"]["SDXLModelLoaderOutput"]; + step_param_easing: components["schemas"]["FloatCollectionOutput"]; + tile_image_processor: components["schemas"]["ImageOutput"]; + merge_metadata: components["schemas"]["MetadataOutput"]; + midas_depth_image_processor: components["schemas"]["ImageOutput"]; + invert_tensor_mask: components["schemas"]["MaskOutput"]; + tile_to_properties: components["schemas"]["TileToPropertiesOutput"]; + sdxl_refiner_compel_prompt: components["schemas"]["ConditioningOutput"]; + freeu: components["schemas"]["UNetOutput"]; + bounding_box: components["schemas"]["BoundingBoxOutput"]; + infill_rgba: components["schemas"]["ImageOutput"]; + image_mask_to_tensor: components["schemas"]["MaskOutput"]; + seamless: components["schemas"]["SeamlessModeOutput"]; + model_identifier: components["schemas"]["ModelIdentifierOutput"]; + conditioning_collection: components["schemas"]["ConditioningCollectionOutput"]; + heuristic_resize: components["schemas"]["ImageOutput"]; + content_shuffle_image_processor: components["schemas"]["ImageOutput"]; + boolean: components["schemas"]["BooleanOutput"]; + string_join_three: components["schemas"]["StringOutput"]; + t2i_adapter: components["schemas"]["T2IAdapterOutput"]; + mask_from_id: components["schemas"]["ImageOutput"]; + tiled_multi_diffusion_denoise_latents: components["schemas"]["LatentsOutput"]; + prompt_from_file: components["schemas"]["StringCollectionOutput"]; + range_of_size: components["schemas"]["IntegerCollectionOutput"]; + float_math: components["schemas"]["FloatOutput"]; + controlnet: components["schemas"]["ControlOutput"]; + mask_combine: components["schemas"]["ImageOutput"]; + img_ilerp: components["schemas"]["ImageOutput"]; + img_paste: components["schemas"]["ImageOutput"]; + latents_collection: components["schemas"]["LatentsCollectionOutput"]; + round_float: components["schemas"]["FloatOutput"]; + lineart_anime_image_processor: components["schemas"]["ImageOutput"]; + mask_edge: components["schemas"]["ImageOutput"]; + img_chan: components["schemas"]["ImageOutput"]; + vae_loader: components["schemas"]["VAEOutput"]; + l2i: components["schemas"]["ImageOutput"]; + img_hue_adjust: components["schemas"]["ImageOutput"]; + img_crop: components["schemas"]["ImageOutput"]; + image: components["schemas"]["ImageOutput"]; + core_metadata: components["schemas"]["MetadataOutput"]; + float_collection: components["schemas"]["FloatCollectionOutput"]; + infill_patchmatch: components["schemas"]["ImageOutput"]; + iterate: components["schemas"]["IterateInvocationOutput"]; + color_correct: components["schemas"]["ImageOutput"]; + calculate_image_tiles_even_split: components["schemas"]["CalculateImageTilesOutput"]; + hed_image_processor: components["schemas"]["ImageOutput"]; + sdxl_compel_prompt: components["schemas"]["ConditioningOutput"]; + sdxl_lora_collection_loader: components["schemas"]["SDXLLoRALoaderOutput"]; + face_off: components["schemas"]["FaceOffOutput"]; + random_range: components["schemas"]["IntegerCollectionOutput"]; + string: components["schemas"]["StringOutput"]; + rand_int: components["schemas"]["IntegerOutput"]; + merge_tiles_to_image: components["schemas"]["ImageOutput"]; + calculate_image_tiles_min_overlap: components["schemas"]["CalculateImageTilesOutput"]; + lora_loader: components["schemas"]["LoRALoaderOutput"]; + segment_anything_processor: components["schemas"]["ImageOutput"]; + integer_math: components["schemas"]["IntegerOutput"]; + tomask: components["schemas"]["ImageOutput"]; + spandrel_image_to_image_autoscale: components["schemas"]["ImageOutput"]; + flux_model_loader: components["schemas"]["FluxModelLoaderOutput"]; + i2l: components["schemas"]["LatentsOutput"]; + mediapipe_face_processor: components["schemas"]["ImageOutput"]; + integer: components["schemas"]["IntegerOutput"]; + lineart_image_processor: components["schemas"]["ImageOutput"]; + div: components["schemas"]["IntegerOutput"]; + img_channel_offset: components["schemas"]["ImageOutput"]; + ideal_size: components["schemas"]["IdealSizeOutput"]; + create_denoise_mask: components["schemas"]["DenoiseMaskOutput"]; + integer_collection: components["schemas"]["IntegerCollectionOutput"]; + }; + /** + * InvocationStartedEvent + * @description Event model for invocation_started + */ + InvocationStartedEvent: { + /** + * Timestamp + * @description The timestamp of the event + */ + timestamp: number; + /** + * Queue Id + * @description The ID of the queue + */ + queue_id: string; + /** + * Item Id + * @description The ID of the queue item + */ + item_id: number; + /** + * Batch Id + * @description The ID of the queue batch + */ + batch_id: string; + /** + * Session Id + * @description The ID of the session (aka graph execution state) + */ + session_id: string; + /** + * Invocation + * @description The ID of the invocation + */ + invocation: components["schemas"]["AddInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeImageProcessorInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DepthAnythingImageProcessorInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxTextToImageInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"]; + /** + * Invocation Source Id + * @description The ID of the prepared invocation's source node + */ + invocation_source_id: string; + }; + /** + * IterateInvocation + * @description Iterates over a list of items + */ + IterateInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Collection + * @description The list of items to iterate over + * @default [] + */ + collection?: unknown[]; + /** + * Index + * @description The index, will be provided on executed iterators + * @default 0 + */ + index?: number; + /** + * type + * @default iterate + * @constant + * @enum {string} + */ + type: "iterate"; + }; + /** + * IterateInvocationOutput + * @description Used to connect iteration outputs. Will be expanded to a specific output. + */ + IterateInvocationOutput: { + /** + * Collection Item + * @description The item being iterated over + */ + item: unknown; + /** + * Index + * @description The index of the item + */ + index: number; + /** + * Total + * @description The total number of items + */ + total: number; + /** + * type + * @default iterate_output + * @constant + * @enum {string} + */ + type: "iterate_output"; + }; + JsonValue: unknown; + /** + * LaMa Infill + * @description Infills transparent areas of an image using the LaMa model + */ + LaMaInfillInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to process + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * type + * @default infill_lama + * @constant + * @enum {string} + */ + type: "infill_lama"; + }; + /** + * Latents Collection Primitive + * @description A collection of latents tensor primitive values + */ + LatentsCollectionInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Collection + * @description The collection of latents tensors + * @default null + */ + collection?: components["schemas"]["LatentsField"][]; + /** + * type + * @default latents_collection + * @constant + * @enum {string} + */ + type: "latents_collection"; + }; + /** + * LatentsCollectionOutput + * @description Base class for nodes that output a collection of latents tensors + */ + LatentsCollectionOutput: { + /** + * Collection + * @description Latents tensor + */ + collection: components["schemas"]["LatentsField"][]; + /** + * type + * @default latents_collection_output + * @constant + * @enum {string} + */ + type: "latents_collection_output"; + }; + /** + * LatentsField + * @description A latents tensor primitive field + */ + LatentsField: { + /** + * Latents Name + * @description The name of the latents + */ + latents_name: string; + /** + * Seed + * @description Seed used to generate this latents + * @default null + */ + seed?: number | null; + }; + /** + * Latents Primitive + * @description A latents tensor primitive value + */ + LatentsInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The latents tensor + * @default null + */ + latents?: components["schemas"]["LatentsField"]; + /** + * type + * @default latents + * @constant + * @enum {string} + */ + type: "latents"; + }; + /** + * LatentsOutput + * @description Base class for nodes that output a single latents tensor + */ + LatentsOutput: { + /** @description Latents tensor */ + latents: components["schemas"]["LatentsField"]; + /** + * Width + * @description Width of output (px) + */ + width: number; + /** + * Height + * @description Height of output (px) + */ + height: number; + /** + * type + * @default latents_output + * @constant + * @enum {string} + */ + type: "latents_output"; + }; + /** + * Latents to Image + * @description Generates an image from latents. + */ + LatentsToImageInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description Latents tensor + * @default null + */ + latents?: components["schemas"]["LatentsField"]; + /** + * @description VAE + * @default null + */ + vae?: components["schemas"]["VAEField"]; + /** + * Tiled + * @description Processing using overlapping tiles (reduce memory consumption) + * @default false + */ + tiled?: boolean; + /** + * Tile Size + * @description The tile size for VAE tiling in pixels (image space). If set to 0, the default tile size for the model will be used. Larger tile sizes generally produce better results at the cost of higher memory usage. + * @default 0 + */ + tile_size?: number; + /** + * Fp32 + * @description Whether or not to use full float32 precision + * @default false + */ + fp32?: boolean; + /** + * type + * @default l2i + * @constant + * @enum {string} + */ + type: "l2i"; + }; + /** + * Leres (Depth) Processor + * @description Applies leres processing to image + */ + LeresImageProcessorInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to process + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * Thr A + * @description Leres parameter `thr_a` + * @default 0 + */ + thr_a?: number; + /** + * Thr B + * @description Leres parameter `thr_b` + * @default 0 + */ + thr_b?: number; + /** + * Boost + * @description Whether to use boost mode + * @default false + */ + boost?: boolean; + /** + * Detect Resolution + * @description Pixel resolution for detection + * @default 512 + */ + detect_resolution?: number; + /** + * Image Resolution + * @description Pixel resolution for output image + * @default 512 + */ + image_resolution?: number; + /** + * type + * @default leres_image_processor + * @constant + * @enum {string} + */ + type: "leres_image_processor"; + }; + /** + * Lineart Anime Processor + * @description Applies line art anime processing to image + */ + LineartAnimeImageProcessorInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to process + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * Detect Resolution + * @description Pixel resolution for detection + * @default 512 + */ + detect_resolution?: number; + /** + * Image Resolution + * @description Pixel resolution for output image + * @default 512 + */ + image_resolution?: number; + /** + * type + * @default lineart_anime_image_processor + * @constant + * @enum {string} + */ + type: "lineart_anime_image_processor"; + }; + /** + * Lineart Processor + * @description Applies line art processing to image + */ + LineartImageProcessorInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to process + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * Detect Resolution + * @description Pixel resolution for detection + * @default 512 + */ + detect_resolution?: number; + /** + * Image Resolution + * @description Pixel resolution for output image + * @default 512 + */ + image_resolution?: number; + /** + * Coarse + * @description Whether to use coarse mode + * @default false + */ + coarse?: boolean; + /** + * type + * @default lineart_image_processor + * @constant + * @enum {string} + */ + type: "lineart_image_processor"; + }; + /** + * LoRA Collection Loader + * @description Applies a collection of LoRAs to the provided UNet and CLIP models. + */ + LoRACollectionLoader: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * LoRAs + * @description LoRA models and weights. May be a single LoRA or collection. + * @default null + */ + loras?: components["schemas"]["LoRAField"] | components["schemas"]["LoRAField"][]; + /** + * UNet + * @description UNet (scheduler, LoRAs) + * @default null + */ + unet?: components["schemas"]["UNetField"] | null; + /** + * CLIP + * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count + * @default null + */ + clip?: components["schemas"]["CLIPField"] | null; + /** + * type + * @default lora_collection_loader + * @constant + * @enum {string} + */ + type: "lora_collection_loader"; + }; + /** + * LoRADiffusersConfig + * @description Model config for LoRA/Diffusers models. + */ + LoRADiffusersConfig: { + /** + * Key + * @description A unique key for this model. + */ + key: string; + /** + * Hash + * @description The hash of the model file(s). + */ + hash: string; + /** + * Path + * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. + */ + path: string; + /** + * Name + * @description Name of the model. + */ + name: string; + /** @description The base model. */ + base: components["schemas"]["BaseModelType"]; + /** + * Description + * @description Model description + */ + description?: string | null; + /** + * Source + * @description The original source of the model (path, URL or repo_id). + */ + source: string; + /** @description The type of source */ + source_type: components["schemas"]["ModelSourceType"]; + /** + * Source Api Response + * @description The original API response from the source, as stringified JSON. + */ + source_api_response?: string | null; + /** + * Cover Image + * @description Url for image to preview model + */ + cover_image?: string | null; + /** + * Type + * @default lora + * @constant + * @enum {string} + */ + type: "lora"; + /** + * Trigger Phrases + * @description Set of trigger phrases for this model + */ + trigger_phrases?: string[] | null; + /** + * Format + * @default diffusers + * @constant + * @enum {string} + */ + format: "diffusers"; + }; + /** LoRAField */ + LoRAField: { + /** @description Info to load lora model */ + lora: components["schemas"]["ModelIdentifierField"]; + /** + * Weight + * @description Weight to apply to lora model + */ + weight: number; + }; + /** + * LoRA + * @description Apply selected lora to unet and text_encoder. + */ + LoRALoaderInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * LoRA + * @description LoRA model to load + * @default null + */ + lora?: components["schemas"]["ModelIdentifierField"]; + /** + * Weight + * @description The weight at which the LoRA is applied to each model + * @default 0.75 + */ + weight?: number; + /** + * UNet + * @description UNet (scheduler, LoRAs) + * @default null + */ + unet?: components["schemas"]["UNetField"] | null; + /** + * CLIP + * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count + * @default null + */ + clip?: components["schemas"]["CLIPField"] | null; + /** + * type + * @default lora_loader + * @constant + * @enum {string} + */ + type: "lora_loader"; + }; + /** + * LoRALoaderOutput + * @description Model loader output + */ + LoRALoaderOutput: { + /** + * UNet + * @description UNet (scheduler, LoRAs) + * @default null + */ + unet: components["schemas"]["UNetField"] | null; + /** + * CLIP + * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count + * @default null + */ + clip: components["schemas"]["CLIPField"] | null; + /** + * type + * @default lora_loader_output + * @constant + * @enum {string} + */ + type: "lora_loader_output"; + }; + /** + * LoRALyCORISConfig + * @description Model config for LoRA/Lycoris models. + */ + LoRALyCORISConfig: { + /** + * Key + * @description A unique key for this model. + */ + key: string; + /** + * Hash + * @description The hash of the model file(s). + */ + hash: string; + /** + * Path + * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. + */ + path: string; + /** + * Name + * @description Name of the model. + */ + name: string; + /** @description The base model. */ + base: components["schemas"]["BaseModelType"]; + /** + * Description + * @description Model description + */ + description?: string | null; + /** + * Source + * @description The original source of the model (path, URL or repo_id). + */ + source: string; + /** @description The type of source */ + source_type: components["schemas"]["ModelSourceType"]; + /** + * Source Api Response + * @description The original API response from the source, as stringified JSON. + */ + source_api_response?: string | null; + /** + * Cover Image + * @description Url for image to preview model + */ + cover_image?: string | null; + /** + * Type + * @default lora + * @constant + * @enum {string} + */ + type: "lora"; + /** + * Trigger Phrases + * @description Set of trigger phrases for this model + */ + trigger_phrases?: string[] | null; + /** + * Format + * @default lycoris + * @constant + * @enum {string} + */ + format: "lycoris"; + }; + /** + * LoRAMetadataField + * @description LoRA Metadata Field + */ + LoRAMetadataField: { + /** @description LoRA model to load */ + model: components["schemas"]["ModelIdentifierField"]; + /** + * Weight + * @description The weight at which the LoRA is applied to each model + */ + weight: number; + }; + /** + * LoRA Selector + * @description Selects a LoRA model and weight. + */ + LoRASelectorInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * LoRA + * @description LoRA model to load + * @default null + */ + lora?: components["schemas"]["ModelIdentifierField"]; + /** + * Weight + * @description The weight at which the LoRA is applied to each model + * @default 0.75 + */ + weight?: number; + /** + * type + * @default lora_selector + * @constant + * @enum {string} + */ + type: "lora_selector"; + }; + /** + * LoRASelectorOutput + * @description Model loader output + */ + LoRASelectorOutput: { + /** + * LoRA + * @description LoRA model and weight + */ + lora: components["schemas"]["LoRAField"]; + /** + * type + * @default lora_selector_output + * @constant + * @enum {string} + */ + type: "lora_selector_output"; + }; + /** + * LocalModelSource + * @description A local file or directory path. + */ + LocalModelSource: { + /** Path */ + path: string; + /** + * Inplace + * @default false + */ + inplace?: boolean | null; + /** + * Type + * @default local + * @constant + * @enum {string} + */ + type?: "local"; + }; + /** + * LogLevel + * @enum {integer} + */ + LogLevel: 0 | 10 | 20 | 30 | 40 | 50; + /** + * MainBnbQuantized4bCheckpointConfig + * @description Model config for main checkpoint models. + */ + MainBnbQuantized4bCheckpointConfig: { + /** + * Key + * @description A unique key for this model. + */ + key: string; + /** + * Hash + * @description The hash of the model file(s). + */ + hash: string; + /** + * Path + * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. + */ + path: string; + /** + * Name + * @description Name of the model. + */ + name: string; + /** @description The base model. */ + base: components["schemas"]["BaseModelType"]; + /** + * Description + * @description Model description + */ + description?: string | null; + /** + * Source + * @description The original source of the model (path, URL or repo_id). + */ + source: string; + /** @description The type of source */ + source_type: components["schemas"]["ModelSourceType"]; + /** + * Source Api Response + * @description The original API response from the source, as stringified JSON. + */ + source_api_response?: string | null; + /** + * Cover Image + * @description Url for image to preview model + */ + cover_image?: string | null; + /** + * Type + * @default main + * @constant + * @enum {string} + */ + type: "main"; + /** + * Trigger Phrases + * @description Set of trigger phrases for this model + */ + trigger_phrases?: string[] | null; + /** @description Default settings for this model */ + default_settings?: components["schemas"]["MainModelDefaultSettings"] | null; + /** @default normal */ + variant?: components["schemas"]["ModelVariantType"]; + /** + * Format + * @description Format of the provided checkpoint model + * @default checkpoint + * @enum {string} + */ + format: "checkpoint" | "bnb_quantized_nf4b"; + /** + * Config Path + * @description path to the checkpoint model config file + */ + config_path: string; + /** + * Converted At + * @description When this model was last converted to diffusers + */ + converted_at?: number | null; + /** @default epsilon */ + prediction_type?: components["schemas"]["SchedulerPredictionType"]; + /** + * Upcast Attention + * @default false + */ + upcast_attention?: boolean; + }; + /** + * MainCheckpointConfig + * @description Model config for main checkpoint models. + */ + MainCheckpointConfig: { + /** + * Key + * @description A unique key for this model. + */ + key: string; + /** + * Hash + * @description The hash of the model file(s). + */ + hash: string; + /** + * Path + * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. + */ + path: string; + /** + * Name + * @description Name of the model. + */ + name: string; + /** @description The base model. */ + base: components["schemas"]["BaseModelType"]; + /** + * Description + * @description Model description + */ + description?: string | null; + /** + * Source + * @description The original source of the model (path, URL or repo_id). + */ + source: string; + /** @description The type of source */ + source_type: components["schemas"]["ModelSourceType"]; + /** + * Source Api Response + * @description The original API response from the source, as stringified JSON. + */ + source_api_response?: string | null; + /** + * Cover Image + * @description Url for image to preview model + */ + cover_image?: string | null; + /** + * Type + * @default main + * @constant + * @enum {string} + */ + type: "main"; + /** + * Trigger Phrases + * @description Set of trigger phrases for this model + */ + trigger_phrases?: string[] | null; + /** @description Default settings for this model */ + default_settings?: components["schemas"]["MainModelDefaultSettings"] | null; + /** @default normal */ + variant?: components["schemas"]["ModelVariantType"]; + /** + * Format + * @description Format of the provided checkpoint model + * @default checkpoint + * @enum {string} + */ + format: "checkpoint" | "bnb_quantized_nf4b"; + /** + * Config Path + * @description path to the checkpoint model config file + */ + config_path: string; + /** + * Converted At + * @description When this model was last converted to diffusers + */ + converted_at?: number | null; + /** @default epsilon */ + prediction_type?: components["schemas"]["SchedulerPredictionType"]; + /** + * Upcast Attention + * @default false + */ + upcast_attention?: boolean; + }; + /** + * MainDiffusersConfig + * @description Model config for main diffusers models. + */ + MainDiffusersConfig: { + /** + * Key + * @description A unique key for this model. + */ + key: string; + /** + * Hash + * @description The hash of the model file(s). + */ + hash: string; + /** + * Path + * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. + */ + path: string; + /** + * Name + * @description Name of the model. + */ + name: string; + /** @description The base model. */ + base: components["schemas"]["BaseModelType"]; + /** + * Description + * @description Model description + */ + description?: string | null; + /** + * Source + * @description The original source of the model (path, URL or repo_id). + */ + source: string; + /** @description The type of source */ + source_type: components["schemas"]["ModelSourceType"]; + /** + * Source Api Response + * @description The original API response from the source, as stringified JSON. + */ + source_api_response?: string | null; + /** + * Cover Image + * @description Url for image to preview model + */ + cover_image?: string | null; + /** + * Type + * @default main + * @constant + * @enum {string} + */ + type: "main"; + /** + * Trigger Phrases + * @description Set of trigger phrases for this model + */ + trigger_phrases?: string[] | null; + /** @description Default settings for this model */ + default_settings?: components["schemas"]["MainModelDefaultSettings"] | null; + /** @default normal */ + variant?: components["schemas"]["ModelVariantType"]; + /** + * Format + * @default diffusers + * @constant + * @enum {string} + */ + format: "diffusers"; + /** @default */ + repo_variant?: components["schemas"]["ModelRepoVariant"] | null; + }; + /** MainModelDefaultSettings */ + MainModelDefaultSettings: { + /** + * Vae + * @description Default VAE for this model (model key) + */ + vae?: string | null; + /** + * Vae Precision + * @description Default VAE precision for this model + */ + vae_precision?: ("fp16" | "fp32") | null; + /** + * Scheduler + * @description Default scheduler for this model + */ + scheduler?: ("ddim" | "ddpm" | "deis" | "deis_k" | "lms" | "lms_k" | "pndm" | "heun" | "heun_k" | "euler" | "euler_k" | "euler_a" | "kdpm_2" | "kdpm_2_k" | "kdpm_2_a" | "kdpm_2_a_k" | "dpmpp_2s" | "dpmpp_2s_k" | "dpmpp_2m" | "dpmpp_2m_k" | "dpmpp_2m_sde" | "dpmpp_2m_sde_k" | "dpmpp_3m" | "dpmpp_3m_k" | "dpmpp_sde" | "dpmpp_sde_k" | "unipc" | "unipc_k" | "lcm" | "tcd") | null; + /** + * Steps + * @description Default number of steps for this model + */ + steps?: number | null; + /** + * Cfg Scale + * @description Default CFG Scale for this model + */ + cfg_scale?: number | null; + /** + * Cfg Rescale Multiplier + * @description Default CFG Rescale Multiplier for this model + */ + cfg_rescale_multiplier?: number | null; + /** + * Width + * @description Default width for this model + */ + width?: number | null; + /** + * Height + * @description Default height for this model + */ + height?: number | null; + }; + /** + * Main Model + * @description Loads a main model, outputting its submodels. + */ + MainModelLoaderInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description Main model (UNet, VAE, CLIP) to load + * @default null + */ + model?: components["schemas"]["ModelIdentifierField"]; + /** + * type + * @default main_model_loader + * @constant + * @enum {string} + */ + type: "main_model_loader"; + }; + /** + * Combine Masks + * @description Combine two masks together by multiplying them using `PIL.ImageChops.multiply()`. + */ + MaskCombineInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The first mask to combine + * @default null + */ + mask1?: components["schemas"]["ImageField"]; + /** + * @description The second image to combine + * @default null + */ + mask2?: components["schemas"]["ImageField"]; + /** + * type + * @default mask_combine + * @constant + * @enum {string} + */ + type: "mask_combine"; + }; + /** + * Mask Edge + * @description Applies an edge mask to an image + */ + MaskEdgeInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to apply the mask to + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * Edge Size + * @description The size of the edge + * @default null + */ + edge_size?: number; + /** + * Edge Blur + * @description The amount of blur on the edge + * @default null + */ + edge_blur?: number; + /** + * Low Threshold + * @description First threshold for the hysteresis procedure in Canny edge detection + * @default null + */ + low_threshold?: number; + /** + * High Threshold + * @description Second threshold for the hysteresis procedure in Canny edge detection + * @default null + */ + high_threshold?: number; + /** + * type + * @default mask_edge + * @constant + * @enum {string} + */ + type: "mask_edge"; + }; + /** + * Mask from Alpha + * @description Extracts the alpha channel of an image as a mask. + */ + MaskFromAlphaInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to create the mask from + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * Invert + * @description Whether or not to invert the mask + * @default false + */ + invert?: boolean; + /** + * type + * @default tomask + * @constant + * @enum {string} + */ + type: "tomask"; + }; + /** + * Mask from ID + * @description Generate a mask for a particular color in an ID Map + */ + MaskFromIDInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to create the mask from + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * @description ID color to mask + * @default null + */ + color?: components["schemas"]["ColorField"]; + /** + * Threshold + * @description Threshold for color detection + * @default 100 + */ + threshold?: number; + /** + * Invert + * @description Whether or not to invert the mask + * @default false + */ + invert?: boolean; + /** + * type + * @default mask_from_id + * @constant + * @enum {string} + */ + type: "mask_from_id"; + }; + /** + * MaskOutput + * @description A torch mask tensor. + */ + MaskOutput: { + /** @description The mask. */ + mask: components["schemas"]["TensorField"]; + /** + * Width + * @description The width of the mask in pixels. + */ + width: number; + /** + * Height + * @description The height of the mask in pixels. + */ + height: number; + /** + * type + * @default mask_output + * @constant + * @enum {string} + */ + type: "mask_output"; + }; + /** + * Tensor Mask to Image + * @description Convert a mask tensor to an image. + */ + MaskTensorToImageInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The mask tensor to convert. + * @default null + */ + mask?: components["schemas"]["TensorField"]; + /** + * type + * @default tensor_mask_to_image + * @constant + * @enum {string} + */ + type: "tensor_mask_to_image"; + }; + /** + * Mediapipe Face Processor + * @description Applies mediapipe face processing to image + */ + MediapipeFaceProcessorInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to process + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * Max Faces + * @description Maximum number of faces to detect + * @default 1 + */ + max_faces?: number; + /** + * Min Confidence + * @description Minimum confidence for face detection + * @default 0.5 + */ + min_confidence?: number; + /** + * Detect Resolution + * @description Pixel resolution for detection + * @default 512 + */ + detect_resolution?: number; + /** + * Image Resolution + * @description Pixel resolution for output image + * @default 512 + */ + image_resolution?: number; + /** + * type + * @default mediapipe_face_processor + * @constant + * @enum {string} + */ + type: "mediapipe_face_processor"; + }; + /** + * Metadata Merge + * @description Merged a collection of MetadataDict into a single MetadataDict. + */ + MergeMetadataInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Collection + * @description Collection of Metadata + * @default null + */ + collection?: components["schemas"]["MetadataField"][]; + /** + * type + * @default merge_metadata + * @constant + * @enum {string} + */ + type: "merge_metadata"; + }; + /** + * Merge Tiles to Image + * @description Merge multiple tile images into a single image. + */ + MergeTilesToImageInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Tiles With Images + * @description A list of tile images with tile properties. + * @default null + */ + tiles_with_images?: components["schemas"]["TileWithImage"][]; + /** + * Blend Mode + * @description blending type Linear or Seam + * @default Seam + * @enum {string} + */ + blend_mode?: "Linear" | "Seam"; + /** + * Blend Amount + * @description The amount to blend adjacent tiles in pixels. Must be <= the amount of overlap between adjacent tiles. + * @default 32 + */ + blend_amount?: number; + /** + * type + * @default merge_tiles_to_image + * @constant + * @enum {string} + */ + type: "merge_tiles_to_image"; + }; + /** + * MetadataField + * @description Pydantic model for metadata with custom root of type dict[str, Any]. + * Metadata is stored without a strict schema. + */ + MetadataField: Record; + /** + * Metadata + * @description Takes a MetadataItem or collection of MetadataItems and outputs a MetadataDict. + */ + MetadataInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Items + * @description A single metadata item or collection of metadata items + * @default null + */ + items?: components["schemas"]["MetadataItemField"][] | components["schemas"]["MetadataItemField"]; + /** + * type + * @default metadata + * @constant + * @enum {string} + */ + type: "metadata"; + }; + /** MetadataItemField */ + MetadataItemField: { + /** + * Label + * @description Label for this metadata item + */ + label: string; + /** + * Value + * @description The value for this metadata item (may be any type) + */ + value: unknown; + }; + /** + * Metadata Item + * @description Used to create an arbitrary metadata item. Provide "label" and make a connection to "value" to store that data as the value. + */ + MetadataItemInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Label + * @description Label for this metadata item + * @default null + */ + label?: string; + /** + * Value + * @description The value for this metadata item (may be any type) + * @default null + */ + value?: unknown; + /** + * type + * @default metadata_item + * @constant + * @enum {string} + */ + type: "metadata_item"; + }; + /** + * MetadataItemOutput + * @description Metadata Item Output + */ + MetadataItemOutput: { + /** @description Metadata Item */ + item: components["schemas"]["MetadataItemField"]; + /** + * type + * @default metadata_item_output + * @constant + * @enum {string} + */ + type: "metadata_item_output"; + }; + /** MetadataOutput */ + MetadataOutput: { + /** @description Metadata Dict */ + metadata: components["schemas"]["MetadataField"]; + /** + * type + * @default metadata_output + * @constant + * @enum {string} + */ + type: "metadata_output"; + }; + /** + * Midas Depth Processor + * @description Applies Midas depth processing to image + */ + MidasDepthImageProcessorInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to process + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * A Mult + * @description Midas parameter `a_mult` (a = a_mult * PI) + * @default 2 + */ + a_mult?: number; + /** + * Bg Th + * @description Midas parameter `bg_th` + * @default 0.1 + */ + bg_th?: number; + /** + * Detect Resolution + * @description Pixel resolution for detection + * @default 512 + */ + detect_resolution?: number; + /** + * Image Resolution + * @description Pixel resolution for output image + * @default 512 + */ + image_resolution?: number; + /** + * type + * @default midas_depth_image_processor + * @constant + * @enum {string} + */ + type: "midas_depth_image_processor"; + }; + /** + * MLSD Processor + * @description Applies MLSD processing to image + */ + MlsdImageProcessorInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to process + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * Detect Resolution + * @description Pixel resolution for detection + * @default 512 + */ + detect_resolution?: number; + /** + * Image Resolution + * @description Pixel resolution for output image + * @default 512 + */ + image_resolution?: number; + /** + * Thr V + * @description MLSD parameter `thr_v` + * @default 0.1 + */ + thr_v?: number; + /** + * Thr D + * @description MLSD parameter `thr_d` + * @default 0.1 + */ + thr_d?: number; + /** + * type + * @default mlsd_image_processor + * @constant + * @enum {string} + */ + type: "mlsd_image_processor"; + }; + /** + * ModelFormat + * @description Storage format of model. + * @enum {string} + */ + ModelFormat: "diffusers" | "checkpoint" | "lycoris" | "onnx" | "olive" | "embedding_file" | "embedding_folder" | "invokeai" | "t5_encoder" | "t5_encoder_8b" | "t5_encoder_4b" | "bnb_quantized_nf4b"; + /** ModelIdentifierField */ + ModelIdentifierField: { + /** + * Key + * @description The model's unique key + */ + key: string; + /** + * Hash + * @description The model's BLAKE3 hash + */ + hash: string; + /** + * Name + * @description The model's name + */ + name: string; + /** @description The model's base model type */ + base: components["schemas"]["BaseModelType"]; + /** @description The model's type */ + type: components["schemas"]["ModelType"]; + /** + * @description The submodel to load, if this is a main model + * @default null + */ + submodel_type?: components["schemas"]["SubModelType"] | null; + }; + /** + * Model identifier + * @description Selects any model, outputting it its identifier. Be careful with this one! The identifier will be accepted as + * input for any model, even if the model types don't match. If you connect this to a mismatched input, you'll get an + * error. + */ + ModelIdentifierInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Model + * @description The model to select + * @default null + */ + model?: components["schemas"]["ModelIdentifierField"]; + /** + * type + * @default model_identifier + * @constant + * @enum {string} + */ + type: "model_identifier"; + }; + /** + * ModelIdentifierOutput + * @description Model identifier output + */ + ModelIdentifierOutput: { + /** + * Model + * @description Model identifier + */ + model: components["schemas"]["ModelIdentifierField"]; + /** + * type + * @default model_identifier_output + * @constant + * @enum {string} + */ + type: "model_identifier_output"; + }; + /** + * ModelInstallCancelledEvent + * @description Event model for model_install_cancelled + */ + ModelInstallCancelledEvent: { + /** + * Timestamp + * @description The timestamp of the event + */ + timestamp: number; + /** + * Id + * @description The ID of the install job + */ + id: number; + /** + * Source + * @description Source of the model; local path, repo_id or url + */ + source: string; + }; + /** + * ModelInstallCompleteEvent + * @description Event model for model_install_complete + */ + ModelInstallCompleteEvent: { + /** + * Timestamp + * @description The timestamp of the event + */ + timestamp: number; + /** + * Id + * @description The ID of the install job + */ + id: number; + /** + * Source + * @description Source of the model; local path, repo_id or url + */ + source: string; + /** + * Key + * @description Model config record key + */ + key: string; + /** + * Total Bytes + * @description Size of the model (may be None for installation of a local path) + */ + total_bytes: number | null; + }; + /** + * ModelInstallDownloadProgressEvent + * @description Event model for model_install_download_progress + */ + ModelInstallDownloadProgressEvent: { + /** + * Timestamp + * @description The timestamp of the event + */ + timestamp: number; + /** + * Id + * @description The ID of the install job + */ + id: number; + /** + * Source + * @description Source of the model; local path, repo_id or url + */ + source: string; + /** + * Local Path + * @description Where model is downloading to + */ + local_path: string; + /** + * Bytes + * @description Number of bytes downloaded so far + */ + bytes: number; + /** + * Total Bytes + * @description Total size of download, including all files + */ + total_bytes: number; + /** + * Parts + * @description Progress of downloading URLs that comprise the model, if any + */ + parts: ({ + [key: string]: number | string; + })[]; + }; + /** + * ModelInstallDownloadStartedEvent + * @description Event model for model_install_download_started + */ + ModelInstallDownloadStartedEvent: { + /** + * Timestamp + * @description The timestamp of the event + */ + timestamp: number; + /** + * Id + * @description The ID of the install job + */ + id: number; + /** + * Source + * @description Source of the model; local path, repo_id or url + */ + source: string; + /** + * Local Path + * @description Where model is downloading to + */ + local_path: string; + /** + * Bytes + * @description Number of bytes downloaded so far + */ + bytes: number; + /** + * Total Bytes + * @description Total size of download, including all files + */ + total_bytes: number; + /** + * Parts + * @description Progress of downloading URLs that comprise the model, if any + */ + parts: ({ + [key: string]: number | string; + })[]; + }; + /** + * ModelInstallDownloadsCompleteEvent + * @description Emitted once when an install job becomes active. + */ + ModelInstallDownloadsCompleteEvent: { + /** + * Timestamp + * @description The timestamp of the event + */ + timestamp: number; + /** + * Id + * @description The ID of the install job + */ + id: number; + /** + * Source + * @description Source of the model; local path, repo_id or url + */ + source: string; + }; + /** + * ModelInstallErrorEvent + * @description Event model for model_install_error + */ + ModelInstallErrorEvent: { + /** + * Timestamp + * @description The timestamp of the event + */ + timestamp: number; + /** + * Id + * @description The ID of the install job + */ + id: number; + /** + * Source + * @description Source of the model; local path, repo_id or url + */ + source: string; + /** + * Error Type + * @description The name of the exception + */ + error_type: string; + /** + * Error + * @description A text description of the exception + */ + error: string; + }; + /** + * ModelInstallJob + * @description Object that tracks the current status of an install request. + */ + ModelInstallJob: { + /** + * Id + * @description Unique ID for this job + */ + id: number; + /** + * @description Current status of install process + * @default waiting + */ + status?: components["schemas"]["InstallStatus"]; + /** + * Error Reason + * @description Information about why the job failed + */ + error_reason?: string | null; + /** @description Configuration information (e.g. 'description') to apply to model. */ + config_in?: components["schemas"]["ModelRecordChanges"]; + /** + * Config Out + * @description After successful installation, this will hold the configuration object. + */ + config_out?: (components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["T5Encoder8bConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]) | null; + /** + * Inplace + * @description Leave model in its current location; otherwise install under models directory + * @default false + */ + inplace?: boolean; + /** + * Source + * @description Source (URL, repo_id, or local path) of model + */ + source: components["schemas"]["LocalModelSource"] | components["schemas"]["HFModelSource"] | components["schemas"]["URLModelSource"]; + /** + * Local Path + * Format: path + * @description Path to locally-downloaded model; may be the same as the source + */ + local_path: string; + /** + * Bytes + * @description For a remote model, the number of bytes downloaded so far (may not be available) + * @default 0 + */ + bytes?: number; + /** + * Total Bytes + * @description Total size of the model to be installed + * @default 0 + */ + total_bytes?: number; + /** + * Source Metadata + * @description Metadata provided by the model source + */ + source_metadata?: (components["schemas"]["BaseMetadata"] | components["schemas"]["HuggingFaceMetadata"]) | null; + /** + * Download Parts + * @description Download jobs contributing to this install + */ + download_parts?: components["schemas"]["DownloadJob"][]; + /** + * Error + * @description On an error condition, this field will contain the text of the exception + */ + error?: string | null; + /** + * Error Traceback + * @description On an error condition, this field will contain the exception traceback + */ + error_traceback?: string | null; + }; + /** + * ModelInstallStartedEvent + * @description Event model for model_install_started + */ + ModelInstallStartedEvent: { + /** + * Timestamp + * @description The timestamp of the event + */ + timestamp: number; + /** + * Id + * @description The ID of the install job + */ + id: number; + /** + * Source + * @description Source of the model; local path, repo_id or url + */ + source: string; + }; + /** + * ModelLoadCompleteEvent + * @description Event model for model_load_complete + */ + ModelLoadCompleteEvent: { + /** + * Timestamp + * @description The timestamp of the event + */ + timestamp: number; + /** + * Config + * @description The model's config + */ + config: components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["T5Encoder8bConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; + /** + * @description The submodel type, if any + * @default null + */ + submodel_type: components["schemas"]["SubModelType"] | null; + }; + /** + * ModelLoadStartedEvent + * @description Event model for model_load_started + */ + ModelLoadStartedEvent: { + /** + * Timestamp + * @description The timestamp of the event + */ + timestamp: number; + /** + * Config + * @description The model's config + */ + config: components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["T5Encoder8bConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; + /** + * @description The submodel type, if any + * @default null + */ + submodel_type: components["schemas"]["SubModelType"] | null; + }; + /** + * ModelLoaderOutput + * @description Model loader output + */ + ModelLoaderOutput: { + /** + * VAE + * @description VAE + */ + vae: components["schemas"]["VAEField"]; + /** + * type + * @default model_loader_output + * @constant + * @enum {string} + */ + type: "model_loader_output"; + /** + * CLIP + * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count + */ + clip: components["schemas"]["CLIPField"]; + /** + * UNet + * @description UNet (scheduler, LoRAs) + */ + unet: components["schemas"]["UNetField"]; + }; + /** + * ModelRecordChanges + * @description A set of changes to apply to a model. + */ + ModelRecordChanges: { + /** + * Source + * @description original source of the model + */ + source?: string | null; + /** @description type of model source */ + source_type?: components["schemas"]["ModelSourceType"] | null; + /** + * Source Api Response + * @description metadata from remote source + */ + source_api_response?: string | null; + /** + * Name + * @description Name of the model. + */ + name?: string | null; + /** + * Path + * @description Path to the model. + */ + path?: string | null; + /** + * Description + * @description Model description + */ + description?: string | null; + /** @description The base model. */ + base?: components["schemas"]["BaseModelType"] | null; + /** @description Type of model */ + type?: components["schemas"]["ModelType"] | null; + /** + * Key + * @description Database ID for this model + */ + key?: string | null; + /** + * Hash + * @description hash of model file + */ + hash?: string | null; + /** + * Format + * @description format of model file + */ + format?: string | null; + /** + * Trigger Phrases + * @description Set of trigger phrases for this model + */ + trigger_phrases?: string[] | null; + /** + * Default Settings + * @description Default settings for this model + */ + default_settings?: components["schemas"]["MainModelDefaultSettings"] | components["schemas"]["ControlAdapterDefaultSettings"] | null; + /** @description The variant of the model. */ + variant?: components["schemas"]["ModelVariantType"] | null; + /** @description The prediction type of the model. */ + prediction_type?: components["schemas"]["SchedulerPredictionType"] | null; + /** + * Upcast Attention + * @description Whether to upcast attention. + */ + upcast_attention?: boolean | null; + /** + * Config Path + * @description Path to config file for model + */ + config_path?: string | null; + }; + /** + * ModelRepoVariant + * @description Various hugging face variants on the diffusers format. + * @enum {string} + */ + ModelRepoVariant: "" | "fp16" | "fp32" | "onnx" | "openvino" | "flax"; + /** + * ModelSourceType + * @description Model source type. + * @enum {string} + */ + ModelSourceType: "path" | "url" | "hf_repo_id"; + /** + * ModelType + * @description Model type. + * @enum {string} + */ + ModelType: "onnx" | "main" | "vae" | "lora" | "controlnet" | "embedding" | "ip_adapter" | "clip_vision" | "clip_embed" | "t2i_adapter" | "t5_encoder" | "spandrel_image_to_image"; + /** + * ModelVariantType + * @description Variant type. + * @enum {string} + */ + ModelVariantType: "normal" | "inpaint" | "depth"; + /** + * ModelsList + * @description Return list of configs. + */ + ModelsList: { + /** Models */ + models: (components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["T5Encoder8bConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"])[]; + }; + /** + * Multiply Integers + * @description Multiplies two numbers + */ + MultiplyInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * A + * @description The first number + * @default 0 + */ + a?: number; + /** + * B + * @description The second number + * @default 0 + */ + b?: number; + /** + * type + * @default mul + * @constant + * @enum {string} + */ + type: "mul"; + }; + /** NodeFieldValue */ + NodeFieldValue: { + /** + * Node Path + * @description The node into which this batch data item will be substituted. + */ + node_path: string; + /** + * Field Name + * @description The field into which this batch data item will be substituted. + */ + field_name: string; + /** + * Value + * @description The value to substitute into the node/field. + */ + value: string | number; + }; + /** + * Noise + * @description Generates latent noise. + */ + NoiseInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Seed + * @description Seed for random number generation + * @default 0 + */ + seed?: number; + /** + * Width + * @description Width of output (px) + * @default 512 + */ + width?: number; + /** + * Height + * @description Height of output (px) + * @default 512 + */ + height?: number; + /** + * Use Cpu + * @description Use CPU for noise generation (for reproducible results across platforms) + * @default true + */ + use_cpu?: boolean; + /** + * type + * @default noise + * @constant + * @enum {string} + */ + type: "noise"; + }; + /** + * NoiseOutput + * @description Invocation noise output + */ + NoiseOutput: { + /** @description Noise tensor */ + noise: components["schemas"]["LatentsField"]; + /** + * Width + * @description Width of output (px) + */ + width: number; + /** + * Height + * @description Height of output (px) + */ + height: number; + /** + * type + * @default noise_output + * @constant + * @enum {string} + */ + type: "noise_output"; + }; + /** + * Normal BAE Processor + * @description Applies NormalBae processing to image + */ + NormalbaeImageProcessorInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to process + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * Detect Resolution + * @description Pixel resolution for detection + * @default 512 + */ + detect_resolution?: number; + /** + * Image Resolution + * @description Pixel resolution for output image + * @default 512 + */ + image_resolution?: number; + /** + * type + * @default normalbae_image_processor + * @constant + * @enum {string} + */ + type: "normalbae_image_processor"; + }; + /** OffsetPaginatedResults[BoardDTO] */ + OffsetPaginatedResults_BoardDTO_: { + /** + * Limit + * @description Limit of items to get + */ + limit: number; + /** + * Offset + * @description Offset from which to retrieve items + */ + offset: number; + /** + * Total + * @description Total number of items in result + */ + total: number; + /** + * Items + * @description Items + */ + items: components["schemas"]["BoardDTO"][]; + }; + /** OffsetPaginatedResults[ImageDTO] */ + OffsetPaginatedResults_ImageDTO_: { + /** + * Limit + * @description Limit of items to get + */ + limit: number; + /** + * Offset + * @description Offset from which to retrieve items + */ + offset: number; + /** + * Total + * @description Total number of items in result + */ + total: number; + /** + * Items + * @description Items + */ + items: components["schemas"]["ImageDTO"][]; + }; + /** + * OutputFieldJSONSchemaExtra + * @description Extra attributes to be added to input fields and their OpenAPI schema. Used by the workflow editor + * during schema parsing and UI rendering. + */ + OutputFieldJSONSchemaExtra: { + field_kind: components["schemas"]["FieldKind"]; + /** Ui Hidden */ + ui_hidden: boolean; + ui_type: components["schemas"]["UIType"] | null; + /** Ui Order */ + ui_order: number | null; + }; + /** PaginatedResults[WorkflowRecordListItemDTO] */ + PaginatedResults_WorkflowRecordListItemDTO_: { + /** + * Page + * @description Current Page + */ + page: number; + /** + * Pages + * @description Total number of pages + */ + pages: number; + /** + * Per Page + * @description Number of items per page + */ + per_page: number; + /** + * Total + * @description Total number of items in result + */ + total: number; + /** + * Items + * @description Items + */ + items: components["schemas"]["WorkflowRecordListItemDTO"][]; + }; + /** + * Pair Tile with Image + * @description Pair an image with its tile properties. + */ + PairTileImageInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The tile image. + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * @description The tile properties. + * @default null + */ + tile?: components["schemas"]["Tile"]; + /** + * type + * @default pair_tile_image + * @constant + * @enum {string} + */ + type: "pair_tile_image"; + }; + /** PairTileImageOutput */ + PairTileImageOutput: { + /** @description A tile description with its corresponding image. */ + tile_with_image: components["schemas"]["TileWithImage"]; + /** + * type + * @default pair_tile_image_output + * @constant + * @enum {string} + */ + type: "pair_tile_image_output"; + }; + /** + * PIDI Processor + * @description Applies PIDI processing to image + */ + PidiImageProcessorInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to process + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * Detect Resolution + * @description Pixel resolution for detection + * @default 512 + */ + detect_resolution?: number; + /** + * Image Resolution + * @description Pixel resolution for output image + * @default 512 + */ + image_resolution?: number; + /** + * Safe + * @description Whether or not to use safe mode + * @default false + */ + safe?: boolean; + /** + * Scribble + * @description Whether or not to use scribble mode + * @default false + */ + scribble?: boolean; + /** + * type + * @default pidi_image_processor + * @constant + * @enum {string} + */ + type: "pidi_image_processor"; + }; + /** + * ProgressImage + * @description The progress image sent intermittently during processing + */ + ProgressImage: { + /** + * Width + * @description The effective width of the image in pixels + */ + width: number; + /** + * Height + * @description The effective height of the image in pixels + */ + height: number; + /** + * Dataurl + * @description The image data as a b64 data URL + */ + dataURL: string; + }; + /** + * Prompts from File + * @description Loads prompts from a text file + */ + PromptsFromFileInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * File Path + * @description Path to prompt text file + * @default null + */ + file_path?: string; + /** + * Pre Prompt + * @description String to prepend to each prompt + * @default null + */ + pre_prompt?: string | null; + /** + * Post Prompt + * @description String to append to each prompt + * @default null + */ + post_prompt?: string | null; + /** + * Start Line + * @description Line in the file to start start from + * @default 1 + */ + start_line?: number; + /** + * Max Prompts + * @description Max lines to read from file (0=all) + * @default 1 + */ + max_prompts?: number; + /** + * type + * @default prompt_from_file + * @constant + * @enum {string} + */ + type: "prompt_from_file"; + }; + /** + * PruneResult + * @description Result of pruning the session queue + */ + PruneResult: { + /** + * Deleted + * @description Number of queue items deleted + */ + deleted: number; + }; + /** + * QueueClearedEvent + * @description Event model for queue_cleared + */ + QueueClearedEvent: { + /** + * Timestamp + * @description The timestamp of the event + */ + timestamp: number; + /** + * Queue Id + * @description The ID of the queue + */ + queue_id: string; + }; + /** + * QueueItemStatusChangedEvent + * @description Event model for queue_item_status_changed + */ + QueueItemStatusChangedEvent: { + /** + * Timestamp + * @description The timestamp of the event + */ + timestamp: number; + /** + * Queue Id + * @description The ID of the queue + */ + queue_id: string; + /** + * Item Id + * @description The ID of the queue item + */ + item_id: number; + /** + * Batch Id + * @description The ID of the queue batch + */ + batch_id: string; + /** + * Status + * @description The new status of the queue item + * @enum {string} + */ + status: "pending" | "in_progress" | "completed" | "failed" | "canceled"; + /** + * Error Type + * @description The error type, if any + * @default null + */ + error_type: string | null; + /** + * Error Message + * @description The error message, if any + * @default null + */ + error_message: string | null; + /** + * Error Traceback + * @description The error traceback, if any + * @default null + */ + error_traceback: string | null; + /** + * Created At + * @description The timestamp when the queue item was created + * @default null + */ + created_at: string | null; + /** + * Updated At + * @description The timestamp when the queue item was last updated + * @default null + */ + updated_at: string | null; + /** + * Started At + * @description The timestamp when the queue item was started + * @default null + */ + started_at: string | null; + /** + * Completed At + * @description The timestamp when the queue item was completed + * @default null + */ + completed_at: string | null; + /** @description The status of the batch */ + batch_status: components["schemas"]["BatchStatus"]; + /** @description The status of the queue */ + queue_status: components["schemas"]["SessionQueueStatus"]; + /** + * Session Id + * @description The ID of the session (aka graph execution state) + */ + session_id: string; + }; + /** + * Random Float + * @description Outputs a single random float + */ + RandomFloatInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default false + */ + use_cache?: boolean; + /** + * Low + * @description The inclusive low value + * @default 0 + */ + low?: number; + /** + * High + * @description The exclusive high value + * @default 1 + */ + high?: number; + /** + * Decimals + * @description The number of decimal places to round to + * @default 2 + */ + decimals?: number; + /** + * type + * @default rand_float + * @constant + * @enum {string} + */ + type: "rand_float"; + }; + /** + * Random Integer + * @description Outputs a single random integer. + */ + RandomIntInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default false + */ + use_cache?: boolean; + /** + * Low + * @description The inclusive low value + * @default 0 + */ + low?: number; + /** + * High + * @description The exclusive high value + * @default 2147483647 + */ + high?: number; + /** + * type + * @default rand_int + * @constant + * @enum {string} + */ + type: "rand_int"; + }; + /** + * Random Range + * @description Creates a collection of random numbers + */ + RandomRangeInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default false + */ + use_cache?: boolean; + /** + * Low + * @description The inclusive low value + * @default 0 + */ + low?: number; + /** + * High + * @description The exclusive high value + * @default 2147483647 + */ + high?: number; + /** + * Size + * @description The number of values to generate + * @default 1 + */ + size?: number; + /** + * Seed + * @description The seed for the RNG (omit for random) + * @default 0 + */ + seed?: number; + /** + * type + * @default random_range + * @constant + * @enum {string} + */ + type: "random_range"; + }; + /** + * Integer Range + * @description Creates a range of numbers from start to stop with step + */ + RangeInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Start + * @description The start of the range + * @default 0 + */ + start?: number; + /** + * Stop + * @description The stop of the range + * @default 10 + */ + stop?: number; + /** + * Step + * @description The step of the range + * @default 1 + */ + step?: number; + /** + * type + * @default range + * @constant + * @enum {string} + */ + type: "range"; + }; + /** + * Integer Range of Size + * @description Creates a range from start to start + (size * step) incremented by step + */ + RangeOfSizeInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Start + * @description The start of the range + * @default 0 + */ + start?: number; + /** + * Size + * @description The number of values + * @default 1 + */ + size?: number; + /** + * Step + * @description The step of the range + * @default 1 + */ + step?: number; + /** + * type + * @default range_of_size + * @constant + * @enum {string} + */ + type: "range_of_size"; + }; + /** + * Create Rectangle Mask + * @description Create a rectangular mask. + */ + RectangleMaskInvocation: { + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Width + * @description The width of the entire mask. + * @default null + */ + width?: number; + /** + * Height + * @description The height of the entire mask. + * @default null + */ + height?: number; + /** + * X Left + * @description The left x-coordinate of the rectangular masked region (inclusive). + * @default null + */ + x_left?: number; + /** + * Y Top + * @description The top y-coordinate of the rectangular masked region (inclusive). + * @default null + */ + y_top?: number; + /** + * Rectangle Width + * @description The width of the rectangular masked region. + * @default null + */ + rectangle_width?: number; + /** + * Rectangle Height + * @description The height of the rectangular masked region. + * @default null + */ + rectangle_height?: number; + /** + * type + * @default rectangle_mask + * @constant + * @enum {string} + */ + type: "rectangle_mask"; + }; + /** + * RemoteModelFile + * @description Information about a downloadable file that forms part of a model. + */ + RemoteModelFile: { + /** + * Url + * Format: uri + * @description The url to download this model file + */ + url: string; + /** + * Path + * Format: path + * @description The path to the file, relative to the model root + */ + path: string; + /** + * Size + * @description The size of this file, in bytes + * @default 0 + */ + size?: number | null; + /** + * Sha256 + * @description SHA256 hash of this model (not always available) + */ + sha256?: string | null; + }; + /** RemoveImagesFromBoardResult */ + RemoveImagesFromBoardResult: { + /** + * Removed Image Names + * @description The image names that were removed from their board + */ + removed_image_names: string[]; + }; + /** + * Resize Latents + * @description Resizes latents to explicit width/height (in pixels). Provided dimensions are floor-divided by 8. + */ + ResizeLatentsInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description Latents tensor + * @default null + */ + latents?: components["schemas"]["LatentsField"]; + /** + * Width + * @description Width of output (px) + * @default null + */ + width?: number; + /** + * Height + * @description Width of output (px) + * @default null + */ + height?: number; + /** + * Mode + * @description Interpolation mode + * @default bilinear + * @enum {string} + */ + mode?: "nearest" | "linear" | "bilinear" | "bicubic" | "trilinear" | "area" | "nearest-exact"; + /** + * Antialias + * @description Whether or not to apply antialiasing (bilinear or bicubic only) + * @default false + */ + antialias?: boolean; + /** + * type + * @default lresize + * @constant + * @enum {string} + */ + type: "lresize"; + }; + /** + * ResourceOrigin + * @description The origin of a resource (eg image). + * + * - INTERNAL: The resource was created by the application. + * - EXTERNAL: The resource was not created by the application. + * This may be a user-initiated upload, or an internal application upload (eg Canvas init image). + * @enum {string} + */ + ResourceOrigin: "internal" | "external"; + /** + * Round Float + * @description Rounds a float to a specified number of decimal places. + */ + RoundInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Value + * @description The float value + * @default 0 + */ + value?: number; + /** + * Decimals + * @description The number of decimal places + * @default 0 + */ + decimals?: number; + /** + * type + * @default round_float + * @constant + * @enum {string} + */ + type: "round_float"; + }; + /** + * SDXL Prompt + * @description Parse prompt using compel package to conditioning. + */ + SDXLCompelPromptInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Prompt + * @description Prompt to be parsed by Compel to create a conditioning tensor + * @default + */ + prompt?: string; + /** + * Style + * @description Prompt to be parsed by Compel to create a conditioning tensor + * @default + */ + style?: string; + /** + * Original Width + * @default 1024 + */ + original_width?: number; + /** + * Original Height + * @default 1024 + */ + original_height?: number; + /** + * Crop Top + * @default 0 + */ + crop_top?: number; + /** + * Crop Left + * @default 0 + */ + crop_left?: number; + /** + * Target Width + * @default 1024 + */ + target_width?: number; + /** + * Target Height + * @default 1024 + */ + target_height?: number; + /** + * CLIP 1 + * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count + * @default null + */ + clip?: components["schemas"]["CLIPField"]; + /** + * CLIP 2 + * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count + * @default null + */ + clip2?: components["schemas"]["CLIPField"]; + /** + * @description A mask defining the region that this conditioning prompt applies to. + * @default null + */ + mask?: components["schemas"]["TensorField"] | null; + /** + * type + * @default sdxl_compel_prompt + * @constant + * @enum {string} + */ + type: "sdxl_compel_prompt"; + }; + /** + * SDXL LoRA Collection Loader + * @description Applies a collection of SDXL LoRAs to the provided UNet and CLIP models. + */ + SDXLLoRACollectionLoader: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * LoRAs + * @description LoRA models and weights. May be a single LoRA or collection. + * @default null + */ + loras?: components["schemas"]["LoRAField"] | components["schemas"]["LoRAField"][]; + /** + * UNet + * @description UNet (scheduler, LoRAs) + * @default null + */ + unet?: components["schemas"]["UNetField"] | null; + /** + * CLIP + * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count + * @default null + */ + clip?: components["schemas"]["CLIPField"] | null; + /** + * CLIP 2 + * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count + * @default null + */ + clip2?: components["schemas"]["CLIPField"] | null; + /** + * type + * @default sdxl_lora_collection_loader + * @constant + * @enum {string} + */ + type: "sdxl_lora_collection_loader"; + }; + /** + * SDXL LoRA + * @description Apply selected lora to unet and text_encoder. + */ + SDXLLoRALoaderInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * LoRA + * @description LoRA model to load + * @default null + */ + lora?: components["schemas"]["ModelIdentifierField"]; + /** + * Weight + * @description The weight at which the LoRA is applied to each model + * @default 0.75 + */ + weight?: number; + /** + * UNet + * @description UNet (scheduler, LoRAs) + * @default null + */ + unet?: components["schemas"]["UNetField"] | null; + /** + * CLIP 1 + * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count + * @default null + */ + clip?: components["schemas"]["CLIPField"] | null; + /** + * CLIP 2 + * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count + * @default null + */ + clip2?: components["schemas"]["CLIPField"] | null; + /** + * type + * @default sdxl_lora_loader + * @constant + * @enum {string} + */ + type: "sdxl_lora_loader"; + }; + /** + * SDXLLoRALoaderOutput + * @description SDXL LoRA Loader Output + */ + SDXLLoRALoaderOutput: { + /** + * UNet + * @description UNet (scheduler, LoRAs) + * @default null + */ + unet: components["schemas"]["UNetField"] | null; + /** + * CLIP 1 + * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count + * @default null + */ + clip: components["schemas"]["CLIPField"] | null; + /** + * CLIP 2 + * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count + * @default null + */ + clip2: components["schemas"]["CLIPField"] | null; + /** + * type + * @default sdxl_lora_loader_output + * @constant + * @enum {string} + */ + type: "sdxl_lora_loader_output"; + }; + /** + * SDXL Main Model + * @description Loads an sdxl base model, outputting its submodels. + */ + SDXLModelLoaderInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description SDXL Main model (UNet, VAE, CLIP1, CLIP2) to load + * @default null + */ + model?: components["schemas"]["ModelIdentifierField"]; + /** + * type + * @default sdxl_model_loader + * @constant + * @enum {string} + */ + type: "sdxl_model_loader"; + }; + /** + * SDXLModelLoaderOutput + * @description SDXL base model loader output + */ + SDXLModelLoaderOutput: { + /** + * UNet + * @description UNet (scheduler, LoRAs) + */ + unet: components["schemas"]["UNetField"]; + /** + * CLIP 1 + * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count + */ + clip: components["schemas"]["CLIPField"]; + /** + * CLIP 2 + * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count + */ + clip2: components["schemas"]["CLIPField"]; + /** + * VAE + * @description VAE + */ + vae: components["schemas"]["VAEField"]; + /** + * type + * @default sdxl_model_loader_output + * @constant + * @enum {string} + */ + type: "sdxl_model_loader_output"; + }; + /** + * SDXL Refiner Prompt + * @description Parse prompt using compel package to conditioning. + */ + SDXLRefinerCompelPromptInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Style + * @description Prompt to be parsed by Compel to create a conditioning tensor + * @default + */ + style?: string; + /** + * Original Width + * @default 1024 + */ + original_width?: number; + /** + * Original Height + * @default 1024 + */ + original_height?: number; + /** + * Crop Top + * @default 0 + */ + crop_top?: number; + /** + * Crop Left + * @default 0 + */ + crop_left?: number; + /** + * Aesthetic Score + * @description The aesthetic score to apply to the conditioning tensor + * @default 6 + */ + aesthetic_score?: number; + /** + * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count + * @default null + */ + clip2?: components["schemas"]["CLIPField"]; + /** + * type + * @default sdxl_refiner_compel_prompt + * @constant + * @enum {string} + */ + type: "sdxl_refiner_compel_prompt"; + }; + /** + * SDXL Refiner Model + * @description Loads an sdxl refiner model, outputting its submodels. + */ + SDXLRefinerModelLoaderInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description SDXL Refiner Main Modde (UNet, VAE, CLIP2) to load + * @default null + */ + model?: components["schemas"]["ModelIdentifierField"]; + /** + * type + * @default sdxl_refiner_model_loader + * @constant + * @enum {string} + */ + type: "sdxl_refiner_model_loader"; + }; + /** + * SDXLRefinerModelLoaderOutput + * @description SDXL refiner model loader output + */ + SDXLRefinerModelLoaderOutput: { + /** + * UNet + * @description UNet (scheduler, LoRAs) + */ + unet: components["schemas"]["UNetField"]; + /** + * CLIP 2 + * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count + */ + clip2: components["schemas"]["CLIPField"]; + /** + * VAE + * @description VAE + */ + vae: components["schemas"]["VAEField"]; + /** + * type + * @default sdxl_refiner_model_loader_output + * @constant + * @enum {string} + */ + type: "sdxl_refiner_model_loader_output"; + }; + /** + * SQLiteDirection + * @enum {string} + */ + SQLiteDirection: "ASC" | "DESC"; + /** + * Save Image + * @description Saves an image. Unlike an image primitive, this invocation stores a copy of the image. + */ + SaveImageInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default false + */ + use_cache?: boolean; + /** + * @description The image to process + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * type + * @default save_image + * @constant + * @enum {string} + */ + type: "save_image"; + }; + /** + * Scale Latents + * @description Scales latents by a given factor. + */ + ScaleLatentsInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description Latents tensor + * @default null + */ + latents?: components["schemas"]["LatentsField"]; + /** + * Scale Factor + * @description The factor by which to scale + * @default null + */ + scale_factor?: number; + /** + * Mode + * @description Interpolation mode + * @default bilinear + * @enum {string} + */ + mode?: "nearest" | "linear" | "bilinear" | "bicubic" | "trilinear" | "area" | "nearest-exact"; + /** + * Antialias + * @description Whether or not to apply antialiasing (bilinear or bicubic only) + * @default false + */ + antialias?: boolean; + /** + * type + * @default lscale + * @constant + * @enum {string} + */ + type: "lscale"; + }; + /** + * Scheduler + * @description Selects a scheduler. + */ + SchedulerInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Scheduler + * @description Scheduler to use during inference + * @default euler + * @enum {string} + */ + scheduler?: "ddim" | "ddpm" | "deis" | "deis_k" | "lms" | "lms_k" | "pndm" | "heun" | "heun_k" | "euler" | "euler_k" | "euler_a" | "kdpm_2" | "kdpm_2_k" | "kdpm_2_a" | "kdpm_2_a_k" | "dpmpp_2s" | "dpmpp_2s_k" | "dpmpp_2m" | "dpmpp_2m_k" | "dpmpp_2m_sde" | "dpmpp_2m_sde_k" | "dpmpp_3m" | "dpmpp_3m_k" | "dpmpp_sde" | "dpmpp_sde_k" | "unipc" | "unipc_k" | "lcm" | "tcd"; + /** + * type + * @default scheduler + * @constant + * @enum {string} + */ + type: "scheduler"; + }; + /** SchedulerOutput */ + SchedulerOutput: { + /** + * Scheduler + * @description Scheduler to use during inference + * @enum {string} + */ + scheduler: "ddim" | "ddpm" | "deis" | "deis_k" | "lms" | "lms_k" | "pndm" | "heun" | "heun_k" | "euler" | "euler_k" | "euler_a" | "kdpm_2" | "kdpm_2_k" | "kdpm_2_a" | "kdpm_2_a_k" | "dpmpp_2s" | "dpmpp_2s_k" | "dpmpp_2m" | "dpmpp_2m_k" | "dpmpp_2m_sde" | "dpmpp_2m_sde_k" | "dpmpp_3m" | "dpmpp_3m_k" | "dpmpp_sde" | "dpmpp_sde_k" | "unipc" | "unipc_k" | "lcm" | "tcd"; + /** + * type + * @default scheduler_output + * @constant + * @enum {string} + */ + type: "scheduler_output"; + }; + /** + * SchedulerPredictionType + * @description Scheduler prediction type. + * @enum {string} + */ + SchedulerPredictionType: "epsilon" | "v_prediction" | "sample"; + /** + * Seamless + * @description Applies the seamless transformation to the Model UNet and VAE. + */ + SeamlessModeInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * UNet + * @description UNet (scheduler, LoRAs) + * @default null + */ + unet?: components["schemas"]["UNetField"] | null; + /** + * VAE + * @description VAE model to load + * @default null + */ + vae?: components["schemas"]["VAEField"] | null; + /** + * Seamless Y + * @description Specify whether Y axis is seamless + * @default true + */ + seamless_y?: boolean; + /** + * Seamless X + * @description Specify whether X axis is seamless + * @default true + */ + seamless_x?: boolean; + /** + * type + * @default seamless + * @constant + * @enum {string} + */ + type: "seamless"; + }; + /** + * SeamlessModeOutput + * @description Modified Seamless Model output + */ + SeamlessModeOutput: { + /** + * UNet + * @description UNet (scheduler, LoRAs) + * @default null + */ + unet: components["schemas"]["UNetField"] | null; + /** + * VAE + * @description VAE + * @default null + */ + vae: components["schemas"]["VAEField"] | null; + /** + * type + * @default seamless_output + * @constant + * @enum {string} + */ + type: "seamless_output"; + }; + /** + * Segment Anything + * @description Runs a Segment Anything Model. + */ + SegmentAnythingInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Model + * @description The Segment Anything model to use. + * @default null + * @enum {string} + */ + model?: "segment-anything-base" | "segment-anything-large" | "segment-anything-huge"; + /** + * @description The image to segment. + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * Bounding Boxes + * @description The bounding boxes to prompt the SAM model with. + * @default null + */ + bounding_boxes?: components["schemas"]["BoundingBoxField"][]; + /** + * Apply Polygon Refinement + * @description Whether to apply polygon refinement to the masks. This will smooth the edges of the masks slightly and ensure that each mask consists of a single closed polygon (before merging). + * @default true + */ + apply_polygon_refinement?: boolean; + /** + * Mask Filter + * @description The filtering to apply to the detected masks before merging them into a final output. + * @default all + * @enum {string} + */ + mask_filter?: "all" | "largest" | "highest_box_score"; + /** + * type + * @default segment_anything + * @constant + * @enum {string} + */ + type: "segment_anything"; + }; + /** + * Segment Anything Processor + * @description Applies segment anything processing to image + */ + SegmentAnythingProcessorInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to process + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * Detect Resolution + * @description Pixel resolution for detection + * @default 512 + */ + detect_resolution?: number; + /** + * Image Resolution + * @description Pixel resolution for output image + * @default 512 + */ + image_resolution?: number; + /** + * type + * @default segment_anything_processor + * @constant + * @enum {string} + */ + type: "segment_anything_processor"; + }; + /** SessionProcessorStatus */ + SessionProcessorStatus: { + /** + * Is Started + * @description Whether the session processor is started + */ + is_started: boolean; + /** + * Is Processing + * @description Whether a session is being processed + */ + is_processing: boolean; + }; + /** + * SessionQueueAndProcessorStatus + * @description The overall status of session queue and processor + */ + SessionQueueAndProcessorStatus: { + queue: components["schemas"]["SessionQueueStatus"]; + processor: components["schemas"]["SessionProcessorStatus"]; + }; + /** SessionQueueItem */ + SessionQueueItem: { + /** + * Item Id + * @description The identifier of the session queue item + */ + item_id: number; + /** + * Status + * @description The status of this queue item + * @default pending + * @enum {string} + */ + status: "pending" | "in_progress" | "completed" | "failed" | "canceled"; + /** + * Priority + * @description The priority of this queue item + * @default 0 + */ + priority: number; + /** + * Batch Id + * @description The ID of the batch associated with this queue item + */ + batch_id: string; + /** + * Session Id + * @description The ID of the session associated with this queue item. The session doesn't exist in graph_executions until the queue item is executed. + */ + session_id: string; + /** + * Error Type + * @description The error type if this queue item errored + */ + error_type?: string | null; + /** + * Error Message + * @description The error message if this queue item errored + */ + error_message?: string | null; + /** + * Error Traceback + * @description The error traceback if this queue item errored + */ + error_traceback?: string | null; + /** + * Created At + * @description When this queue item was created + */ + created_at: string; + /** + * Updated At + * @description When this queue item was updated + */ + updated_at: string; + /** + * Started At + * @description When this queue item was started + */ + started_at?: string | null; + /** + * Completed At + * @description When this queue item was completed + */ + completed_at?: string | null; + /** + * Queue Id + * @description The id of the queue with which this item is associated + */ + queue_id: string; + /** + * Field Values + * @description The field values that were used for this queue item + */ + field_values?: components["schemas"]["NodeFieldValue"][] | null; + /** @description The fully-populated session to be executed */ + session: components["schemas"]["GraphExecutionState"]; + /** @description The workflow associated with this queue item */ + workflow?: components["schemas"]["WorkflowWithoutID"] | null; + }; + /** SessionQueueItemDTO */ + SessionQueueItemDTO: { + /** + * Item Id + * @description The identifier of the session queue item + */ + item_id: number; + /** + * Status + * @description The status of this queue item + * @default pending + * @enum {string} + */ + status: "pending" | "in_progress" | "completed" | "failed" | "canceled"; + /** + * Priority + * @description The priority of this queue item + * @default 0 + */ + priority: number; + /** + * Batch Id + * @description The ID of the batch associated with this queue item + */ + batch_id: string; + /** + * Session Id + * @description The ID of the session associated with this queue item. The session doesn't exist in graph_executions until the queue item is executed. + */ + session_id: string; + /** + * Error Type + * @description The error type if this queue item errored + */ + error_type?: string | null; + /** + * Error Message + * @description The error message if this queue item errored + */ + error_message?: string | null; + /** + * Error Traceback + * @description The error traceback if this queue item errored + */ + error_traceback?: string | null; + /** + * Created At + * @description When this queue item was created + */ + created_at: string; + /** + * Updated At + * @description When this queue item was updated + */ + updated_at: string; + /** + * Started At + * @description When this queue item was started + */ + started_at?: string | null; + /** + * Completed At + * @description When this queue item was completed + */ + completed_at?: string | null; + /** + * Queue Id + * @description The id of the queue with which this item is associated + */ + queue_id: string; + /** + * Field Values + * @description The field values that were used for this queue item + */ + field_values?: components["schemas"]["NodeFieldValue"][] | null; + }; + /** SessionQueueStatus */ + SessionQueueStatus: { + /** + * Queue Id + * @description The ID of the queue + */ + queue_id: string; + /** + * Item Id + * @description The current queue item id + */ + item_id: number | null; + /** + * Batch Id + * @description The current queue item's batch id + */ + batch_id: string | null; + /** + * Session Id + * @description The current queue item's session id + */ + session_id: string | null; + /** + * Pending + * @description Number of queue items with status 'pending' + */ + pending: number; + /** + * In Progress + * @description Number of queue items with status 'in_progress' + */ + in_progress: number; + /** + * Completed + * @description Number of queue items with status 'complete' + */ + completed: number; + /** + * Failed + * @description Number of queue items with status 'error' + */ + failed: number; + /** + * Canceled + * @description Number of queue items with status 'canceled' + */ + canceled: number; + /** + * Total + * @description Total number of queue items + */ + total: number; + }; + /** + * Show Image + * @description Displays a provided image using the OS image viewer, and passes it forward in the pipeline. + */ + ShowImageInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to show + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * type + * @default show_image + * @constant + * @enum {string} + */ + type: "show_image"; + }; + /** + * Image-to-Image (Autoscale) + * @description Run any spandrel image-to-image model (https://github.com/chaiNNer-org/spandrel) until the target scale is reached. + */ + SpandrelImageToImageAutoscaleInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The input image + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * Image-to-Image Model + * @description Image-to-Image model + * @default null + */ + image_to_image_model?: components["schemas"]["ModelIdentifierField"]; + /** + * Tile Size + * @description The tile size for tiled image-to-image. Set to 0 to disable tiling. + * @default 512 + */ + tile_size?: number; + /** + * type + * @default spandrel_image_to_image_autoscale + * @constant + * @enum {string} + */ + type: "spandrel_image_to_image_autoscale"; + /** + * Scale + * @description The final scale of the output image. If the model does not upscale the image, this will be ignored. + * @default 4 + */ + scale?: number; + /** + * Fit To Multiple Of 8 + * @description If true, the output image will be resized to the nearest multiple of 8 in both dimensions. + * @default false + */ + fit_to_multiple_of_8?: boolean; + }; + /** + * SpandrelImageToImageConfig + * @description Model config for Spandrel Image to Image models. + */ + SpandrelImageToImageConfig: { + /** + * Key + * @description A unique key for this model. + */ + key: string; + /** + * Hash + * @description The hash of the model file(s). + */ + hash: string; + /** + * Path + * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. + */ + path: string; + /** + * Name + * @description Name of the model. + */ + name: string; + /** @description The base model. */ + base: components["schemas"]["BaseModelType"]; + /** + * Description + * @description Model description + */ + description?: string | null; + /** + * Source + * @description The original source of the model (path, URL or repo_id). + */ + source: string; + /** @description The type of source */ + source_type: components["schemas"]["ModelSourceType"]; + /** + * Source Api Response + * @description The original API response from the source, as stringified JSON. + */ + source_api_response?: string | null; + /** + * Cover Image + * @description Url for image to preview model + */ + cover_image?: string | null; + /** + * Type + * @default spandrel_image_to_image + * @constant + * @enum {string} + */ + type: "spandrel_image_to_image"; + /** + * Format + * @default checkpoint + * @constant + * @enum {string} + */ + format: "checkpoint"; + }; + /** + * Image-to-Image + * @description Run any spandrel image-to-image model (https://github.com/chaiNNer-org/spandrel). + */ + SpandrelImageToImageInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The input image + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * Image-to-Image Model + * @description Image-to-Image model + * @default null + */ + image_to_image_model?: components["schemas"]["ModelIdentifierField"]; + /** + * Tile Size + * @description The tile size for tiled image-to-image. Set to 0 to disable tiling. + * @default 512 + */ + tile_size?: number; + /** + * type + * @default spandrel_image_to_image + * @constant + * @enum {string} + */ + type: "spandrel_image_to_image"; + }; + /** StarterModel */ + StarterModel: { + /** Description */ + description: string; + /** Source */ + source: string; + /** Name */ + name: string; + base: components["schemas"]["BaseModelType"]; + type: components["schemas"]["ModelType"]; + /** + * Is Installed + * @default false + */ + is_installed?: boolean; + /** Dependencies */ + dependencies?: components["schemas"]["StarterModelWithoutDependencies"][] | null; + }; + /** StarterModelWithoutDependencies */ + StarterModelWithoutDependencies: { + /** Description */ + description: string; + /** Source */ + source: string; + /** Name */ + name: string; + base: components["schemas"]["BaseModelType"]; + type: components["schemas"]["ModelType"]; + /** + * Is Installed + * @default false + */ + is_installed?: boolean; + }; + /** + * Step Param Easing + * @description Experimental per-step parameter easing for denoising steps + */ + StepParamEasingInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Easing + * @description The easing function to use + * @default Linear + * @enum {string} + */ + easing?: "Linear" | "QuadIn" | "QuadOut" | "QuadInOut" | "CubicIn" | "CubicOut" | "CubicInOut" | "QuarticIn" | "QuarticOut" | "QuarticInOut" | "QuinticIn" | "QuinticOut" | "QuinticInOut" | "SineIn" | "SineOut" | "SineInOut" | "CircularIn" | "CircularOut" | "CircularInOut" | "ExponentialIn" | "ExponentialOut" | "ExponentialInOut" | "ElasticIn" | "ElasticOut" | "ElasticInOut" | "BackIn" | "BackOut" | "BackInOut" | "BounceIn" | "BounceOut" | "BounceInOut"; + /** + * Num Steps + * @description number of denoising steps + * @default 20 + */ + num_steps?: number; + /** + * Start Value + * @description easing starting value + * @default 0 + */ + start_value?: number; + /** + * End Value + * @description easing ending value + * @default 1 + */ + end_value?: number; + /** + * Start Step Percent + * @description fraction of steps at which to start easing + * @default 0 + */ + start_step_percent?: number; + /** + * End Step Percent + * @description fraction of steps after which to end easing + * @default 1 + */ + end_step_percent?: number; + /** + * Pre Start Value + * @description value before easing start + * @default null + */ + pre_start_value?: number | null; + /** + * Post End Value + * @description value after easing end + * @default null + */ + post_end_value?: number | null; + /** + * Mirror + * @description include mirror of easing function + * @default false + */ + mirror?: boolean; + /** + * Show Easing Plot + * @description show easing plot + * @default false + */ + show_easing_plot?: boolean; + /** + * type + * @default step_param_easing + * @constant + * @enum {string} + */ + type: "step_param_easing"; + }; + /** + * String2Output + * @description Base class for invocations that output two strings + */ + String2Output: { + /** + * String 1 + * @description string 1 + */ + string_1: string; + /** + * String 2 + * @description string 2 + */ + string_2: string; + /** + * type + * @default string_2_output + * @constant + * @enum {string} + */ + type: "string_2_output"; + }; + /** + * String Collection Primitive + * @description A collection of string primitive values + */ + StringCollectionInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Collection + * @description The collection of string values + * @default [] + */ + collection?: string[]; + /** + * type + * @default string_collection + * @constant + * @enum {string} + */ + type: "string_collection"; + }; + /** + * StringCollectionOutput + * @description Base class for nodes that output a collection of strings + */ + StringCollectionOutput: { + /** + * Collection + * @description The output strings + */ + collection: string[]; + /** + * type + * @default string_collection_output + * @constant + * @enum {string} + */ + type: "string_collection_output"; + }; + /** + * String Primitive + * @description A string primitive value + */ + StringInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Value + * @description The string value + * @default + */ + value?: string; + /** + * type + * @default string + * @constant + * @enum {string} + */ + type: "string"; + }; + /** + * String Join + * @description Joins string left to string right + */ + StringJoinInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * String Left + * @description String Left + * @default + */ + string_left?: string; + /** + * String Right + * @description String Right + * @default + */ + string_right?: string; + /** + * type + * @default string_join + * @constant + * @enum {string} + */ + type: "string_join"; + }; + /** + * String Join Three + * @description Joins string left to string middle to string right + */ + StringJoinThreeInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * String Left + * @description String Left + * @default + */ + string_left?: string; + /** + * String Middle + * @description String Middle + * @default + */ + string_middle?: string; + /** + * String Right + * @description String Right + * @default + */ + string_right?: string; + /** + * type + * @default string_join_three + * @constant + * @enum {string} + */ + type: "string_join_three"; + }; + /** + * StringOutput + * @description Base class for nodes that output a single string + */ + StringOutput: { + /** + * Value + * @description The output string + */ + value: string; + /** + * type + * @default string_output + * @constant + * @enum {string} + */ + type: "string_output"; + }; + /** + * StringPosNegOutput + * @description Base class for invocations that output a positive and negative string + */ + StringPosNegOutput: { + /** + * Positive String + * @description Positive string + */ + positive_string: string; + /** + * Negative String + * @description Negative string + */ + negative_string: string; + /** + * type + * @default string_pos_neg_output + * @constant + * @enum {string} + */ + type: "string_pos_neg_output"; + }; + /** + * String Replace + * @description Replaces the search string with the replace string + */ + StringReplaceInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * String + * @description String to work on + * @default + */ + string?: string; + /** + * Search String + * @description String to search for + * @default + */ + search_string?: string; + /** + * Replace String + * @description String to replace the search + * @default + */ + replace_string?: string; + /** + * Use Regex + * @description Use search string as a regex expression (non regex is case insensitive) + * @default false + */ + use_regex?: boolean; + /** + * type + * @default string_replace + * @constant + * @enum {string} + */ + type: "string_replace"; + }; + /** + * String Split + * @description Splits string into two strings, based on the first occurance of the delimiter. The delimiter will be removed from the string + */ + StringSplitInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * String + * @description String to split + * @default + */ + string?: string; + /** + * Delimiter + * @description Delimiter to spilt with. blank will split on the first whitespace + * @default + */ + delimiter?: string; + /** + * type + * @default string_split + * @constant + * @enum {string} + */ + type: "string_split"; + }; + /** + * String Split Negative + * @description Splits string into two strings, inside [] goes into negative string everthing else goes into positive string. Each [ and ] character is replaced with a space + */ + StringSplitNegInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * String + * @description String to split + * @default + */ + string?: string; + /** + * type + * @default string_split_neg + * @constant + * @enum {string} + */ + type: "string_split_neg"; + }; + /** + * SubModelType + * @description Submodel type. + * @enum {string} + */ + SubModelType: "unet" | "transformer" | "text_encoder" | "text_encoder_2" | "tokenizer" | "tokenizer_2" | "vae" | "vae_decoder" | "vae_encoder" | "scheduler" | "safety_checker"; + /** + * Subtract Integers + * @description Subtracts two numbers + */ + SubtractInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * A + * @description The first number + * @default 0 + */ + a?: number; + /** + * B + * @description The second number + * @default 0 + */ + b?: number; + /** + * type + * @default sub + * @constant + * @enum {string} + */ + type: "sub"; + }; + /** + * T2IAdapterConfig + * @description Model config for T2I. + */ + T2IAdapterConfig: { + /** @description Default settings for this model */ + default_settings?: components["schemas"]["ControlAdapterDefaultSettings"] | null; + /** + * Key + * @description A unique key for this model. + */ + key: string; + /** + * Hash + * @description The hash of the model file(s). + */ + hash: string; + /** + * Path + * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. + */ + path: string; + /** + * Name + * @description Name of the model. + */ + name: string; + /** @description The base model. */ + base: components["schemas"]["BaseModelType"]; + /** + * Description + * @description Model description + */ + description?: string | null; + /** + * Source + * @description The original source of the model (path, URL or repo_id). + */ + source: string; + /** @description The type of source */ + source_type: components["schemas"]["ModelSourceType"]; + /** + * Source Api Response + * @description The original API response from the source, as stringified JSON. + */ + source_api_response?: string | null; + /** + * Cover Image + * @description Url for image to preview model + */ + cover_image?: string | null; + /** + * Format + * @default diffusers + * @constant + * @enum {string} + */ + format: "diffusers"; + /** @default */ + repo_variant?: components["schemas"]["ModelRepoVariant"] | null; + /** + * Type + * @default t2i_adapter + * @constant + * @enum {string} + */ + type: "t2i_adapter"; + }; + /** T2IAdapterField */ + T2IAdapterField: { + /** @description The T2I-Adapter image prompt. */ + image: components["schemas"]["ImageField"]; + /** @description The T2I-Adapter model to use. */ + t2i_adapter_model: components["schemas"]["ModelIdentifierField"]; + /** + * Weight + * @description The weight given to the T2I-Adapter + * @default 1 + */ + weight?: number | number[]; + /** + * Begin Step Percent + * @description When the T2I-Adapter is first applied (% of total steps) + * @default 0 + */ + begin_step_percent?: number; + /** + * End Step Percent + * @description When the T2I-Adapter is last applied (% of total steps) + * @default 1 + */ + end_step_percent?: number; + /** + * Resize Mode + * @description The resize mode to use + * @default just_resize + * @enum {string} + */ + resize_mode?: "just_resize" | "crop_resize" | "fill_resize" | "just_resize_simple"; + }; + /** + * T2I-Adapter + * @description Collects T2I-Adapter info to pass to other nodes. + */ + T2IAdapterInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The IP-Adapter image prompt. + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * T2I-Adapter Model + * @description The T2I-Adapter model. + * @default null + */ + t2i_adapter_model?: components["schemas"]["ModelIdentifierField"]; + /** + * Weight + * @description The weight given to the T2I-Adapter + * @default 1 + */ + weight?: number | number[]; + /** + * Begin Step Percent + * @description When the T2I-Adapter is first applied (% of total steps) + * @default 0 + */ + begin_step_percent?: number; + /** + * End Step Percent + * @description When the T2I-Adapter is last applied (% of total steps) + * @default 1 + */ + end_step_percent?: number; + /** + * Resize Mode + * @description The resize mode applied to the T2I-Adapter input image so that it matches the target output size. + * @default just_resize + * @enum {string} + */ + resize_mode?: "just_resize" | "crop_resize" | "fill_resize" | "just_resize_simple"; + /** + * type + * @default t2i_adapter + * @constant + * @enum {string} + */ + type: "t2i_adapter"; + }; + /** T2IAdapterMetadataField */ + T2IAdapterMetadataField: { + /** @description The control image. */ + image: components["schemas"]["ImageField"]; + /** + * @description The control image, after processing. + * @default null + */ + processed_image?: components["schemas"]["ImageField"] | null; + /** @description The T2I-Adapter model to use. */ + t2i_adapter_model: components["schemas"]["ModelIdentifierField"]; + /** + * Weight + * @description The weight given to the T2I-Adapter + * @default 1 + */ + weight?: number | number[]; + /** + * Begin Step Percent + * @description When the T2I-Adapter is first applied (% of total steps) + * @default 0 + */ + begin_step_percent?: number; + /** + * End Step Percent + * @description When the T2I-Adapter is last applied (% of total steps) + * @default 1 + */ + end_step_percent?: number; + /** + * Resize Mode + * @description The resize mode to use + * @default just_resize + * @enum {string} + */ + resize_mode?: "just_resize" | "crop_resize" | "fill_resize" | "just_resize_simple"; + }; + /** T2IAdapterOutput */ + T2IAdapterOutput: { + /** + * T2I Adapter + * @description T2I-Adapter(s) to apply + */ + t2i_adapter: components["schemas"]["T2IAdapterField"]; + /** + * type + * @default t2i_adapter_output + * @constant + * @enum {string} + */ + type: "t2i_adapter_output"; + }; + /** T5Encoder8bConfig */ + T5Encoder8bConfig: { + /** + * Key + * @description A unique key for this model. + */ + key: string; + /** + * Hash + * @description The hash of the model file(s). + */ + hash: string; + /** + * Path + * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. + */ + path: string; + /** + * Name + * @description Name of the model. + */ + name: string; + /** @description The base model. */ + base: components["schemas"]["BaseModelType"]; + /** + * Description + * @description Model description + */ + description?: string | null; + /** + * Source + * @description The original source of the model (path, URL or repo_id). + */ + source: string; + /** @description The type of source */ + source_type: components["schemas"]["ModelSourceType"]; + /** + * Source Api Response + * @description The original API response from the source, as stringified JSON. + */ + source_api_response?: string | null; + /** + * Cover Image + * @description Url for image to preview model + */ + cover_image?: string | null; + /** + * Type + * @default t5_encoder + * @constant + * @enum {string} + */ + type: "t5_encoder"; + /** + * Format + * @default t5_encoder_8b + * @constant + * @enum {string} + */ + format: "t5_encoder_8b"; + }; + /** T5EncoderConfig */ + T5EncoderConfig: { + /** + * Key + * @description A unique key for this model. + */ + key: string; + /** + * Hash + * @description The hash of the model file(s). + */ + hash: string; + /** + * Path + * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. + */ + path: string; + /** + * Name + * @description Name of the model. + */ + name: string; + /** @description The base model. */ + base: components["schemas"]["BaseModelType"]; + /** + * Description + * @description Model description + */ + description?: string | null; + /** + * Source + * @description The original source of the model (path, URL or repo_id). + */ + source: string; + /** @description The type of source */ + source_type: components["schemas"]["ModelSourceType"]; + /** + * Source Api Response + * @description The original API response from the source, as stringified JSON. + */ + source_api_response?: string | null; + /** + * Cover Image + * @description Url for image to preview model + */ + cover_image?: string | null; + /** + * Type + * @default t5_encoder + * @constant + * @enum {string} + */ + type: "t5_encoder"; + /** + * Format + * @default t5_encoder + * @constant + * @enum {string} + */ + format: "t5_encoder"; + }; + /** T5EncoderField */ + T5EncoderField: { + /** @description Info to load tokenizer submodel */ + tokenizer: components["schemas"]["ModelIdentifierField"]; + /** @description Info to load text_encoder submodel */ + text_encoder: components["schemas"]["ModelIdentifierField"]; + }; + /** TBLR */ + TBLR: { + /** Top */ + top: number; + /** Bottom */ + bottom: number; + /** Left */ + left: number; + /** Right */ + right: number; + }; + /** + * TensorField + * @description A tensor primitive field. + */ + TensorField: { + /** + * Tensor Name + * @description The name of a tensor. + */ + tensor_name: string; + }; + /** + * TextualInversionFileConfig + * @description Model config for textual inversion embeddings. + */ + TextualInversionFileConfig: { + /** + * Key + * @description A unique key for this model. + */ + key: string; + /** + * Hash + * @description The hash of the model file(s). + */ + hash: string; + /** + * Path + * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. + */ + path: string; + /** + * Name + * @description Name of the model. + */ + name: string; + /** @description The base model. */ + base: components["schemas"]["BaseModelType"]; + /** + * Description + * @description Model description + */ + description?: string | null; + /** + * Source + * @description The original source of the model (path, URL or repo_id). + */ + source: string; + /** @description The type of source */ + source_type: components["schemas"]["ModelSourceType"]; + /** + * Source Api Response + * @description The original API response from the source, as stringified JSON. + */ + source_api_response?: string | null; + /** + * Cover Image + * @description Url for image to preview model + */ + cover_image?: string | null; + /** + * Type + * @default embedding + * @constant + * @enum {string} + */ + type: "embedding"; + /** + * Format + * @default embedding_file + * @constant + * @enum {string} + */ + format: "embedding_file"; + }; + /** + * TextualInversionFolderConfig + * @description Model config for textual inversion embeddings. + */ + TextualInversionFolderConfig: { + /** + * Key + * @description A unique key for this model. + */ + key: string; + /** + * Hash + * @description The hash of the model file(s). + */ + hash: string; + /** + * Path + * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. + */ + path: string; + /** + * Name + * @description Name of the model. + */ + name: string; + /** @description The base model. */ + base: components["schemas"]["BaseModelType"]; + /** + * Description + * @description Model description + */ + description?: string | null; + /** + * Source + * @description The original source of the model (path, URL or repo_id). + */ + source: string; + /** @description The type of source */ + source_type: components["schemas"]["ModelSourceType"]; + /** + * Source Api Response + * @description The original API response from the source, as stringified JSON. + */ + source_api_response?: string | null; + /** + * Cover Image + * @description Url for image to preview model + */ + cover_image?: string | null; + /** + * Type + * @default embedding + * @constant + * @enum {string} + */ + type: "embedding"; + /** + * Format + * @default embedding_folder + * @constant + * @enum {string} + */ + format: "embedding_folder"; + }; + /** Tile */ + Tile: { + /** @description The coordinates of this tile relative to its parent image. */ + coords: components["schemas"]["TBLR"]; + /** @description The amount of overlap with adjacent tiles on each side of this tile. */ + overlap: components["schemas"]["TBLR"]; + }; + /** + * Tile Resample Processor + * @description Tile resampler processor + */ + TileResamplerProcessorInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to process + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * Down Sampling Rate + * @description Down sampling rate + * @default 1 + */ + down_sampling_rate?: number; + /** + * type + * @default tile_image_processor + * @constant + * @enum {string} + */ + type: "tile_image_processor"; + }; + /** + * Tile to Properties + * @description Split a Tile into its individual properties. + */ + TileToPropertiesInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The tile to split into properties. + * @default null + */ + tile?: components["schemas"]["Tile"]; + /** + * type + * @default tile_to_properties + * @constant + * @enum {string} + */ + type: "tile_to_properties"; + }; + /** TileToPropertiesOutput */ + TileToPropertiesOutput: { + /** + * Coords Left + * @description Left coordinate of the tile relative to its parent image. + */ + coords_left: number; + /** + * Coords Right + * @description Right coordinate of the tile relative to its parent image. + */ + coords_right: number; + /** + * Coords Top + * @description Top coordinate of the tile relative to its parent image. + */ + coords_top: number; + /** + * Coords Bottom + * @description Bottom coordinate of the tile relative to its parent image. + */ + coords_bottom: number; + /** + * Width + * @description The width of the tile. Equal to coords_right - coords_left. + */ + width: number; + /** + * Height + * @description The height of the tile. Equal to coords_bottom - coords_top. + */ + height: number; + /** + * Overlap Top + * @description Overlap between this tile and its top neighbor. + */ + overlap_top: number; + /** + * Overlap Bottom + * @description Overlap between this tile and its bottom neighbor. + */ + overlap_bottom: number; + /** + * Overlap Left + * @description Overlap between this tile and its left neighbor. + */ + overlap_left: number; + /** + * Overlap Right + * @description Overlap between this tile and its right neighbor. + */ + overlap_right: number; + /** + * type + * @default tile_to_properties_output + * @constant + * @enum {string} + */ + type: "tile_to_properties_output"; + }; + /** TileWithImage */ + TileWithImage: { + tile: components["schemas"]["Tile"]; + image: components["schemas"]["ImageField"]; + }; + /** + * Tiled Multi-Diffusion Denoise Latents + * @description Tiled Multi-Diffusion denoising. + * + * This node handles automatically tiling the input image, and is primarily intended for global refinement of images + * in tiled upscaling workflows. Future Multi-Diffusion nodes should allow the user to specify custom regions with + * different parameters for each region to harness the full power of Multi-Diffusion. + * + * This node has a similar interface to the `DenoiseLatents` node, but it has a reduced feature set (no IP-Adapter, + * T2I-Adapter, masking, etc.). + */ + TiledMultiDiffusionDenoiseLatents: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description Positive conditioning tensor + * @default null + */ + positive_conditioning?: components["schemas"]["ConditioningField"]; + /** + * @description Negative conditioning tensor + * @default null + */ + negative_conditioning?: components["schemas"]["ConditioningField"]; + /** + * @description Noise tensor + * @default null + */ + noise?: components["schemas"]["LatentsField"] | null; + /** + * @description Latents tensor + * @default null + */ + latents?: components["schemas"]["LatentsField"] | null; + /** + * Tile Height + * @description Height of the tiles in image space. + * @default 1024 + */ + tile_height?: number; + /** + * Tile Width + * @description Width of the tiles in image space. + * @default 1024 + */ + tile_width?: number; + /** + * Tile Overlap + * @description The overlap between adjacent tiles in pixel space. (Of course, tile merging is applied in latent space.) Tiles will be cropped during merging (if necessary) to ensure that they overlap by exactly this amount. + * @default 32 + */ + tile_overlap?: number; + /** + * Steps + * @description Number of steps to run + * @default 18 + */ + steps?: number; + /** + * CFG Scale + * @description Classifier-Free Guidance scale + * @default 6 + */ + cfg_scale?: number | number[]; + /** + * Denoising Start + * @description When to start denoising, expressed a percentage of total steps + * @default 0 + */ + denoising_start?: number; + /** + * Denoising End + * @description When to stop denoising, expressed a percentage of total steps + * @default 1 + */ + denoising_end?: number; + /** + * Scheduler + * @description Scheduler to use during inference + * @default euler + * @enum {string} + */ + scheduler?: "ddim" | "ddpm" | "deis" | "deis_k" | "lms" | "lms_k" | "pndm" | "heun" | "heun_k" | "euler" | "euler_k" | "euler_a" | "kdpm_2" | "kdpm_2_k" | "kdpm_2_a" | "kdpm_2_a_k" | "dpmpp_2s" | "dpmpp_2s_k" | "dpmpp_2m" | "dpmpp_2m_k" | "dpmpp_2m_sde" | "dpmpp_2m_sde_k" | "dpmpp_3m" | "dpmpp_3m_k" | "dpmpp_sde" | "dpmpp_sde_k" | "unipc" | "unipc_k" | "lcm" | "tcd"; + /** + * UNet + * @description UNet (scheduler, LoRAs) + * @default null + */ + unet?: components["schemas"]["UNetField"]; + /** + * CFG Rescale Multiplier + * @description Rescale multiplier for CFG guidance, used for models trained with zero-terminal SNR + * @default 0 + */ + cfg_rescale_multiplier?: number; + /** + * Control + * @default null + */ + control?: components["schemas"]["ControlField"] | components["schemas"]["ControlField"][] | null; + /** + * type + * @default tiled_multi_diffusion_denoise_latents + * @constant + * @enum {string} + */ + type: "tiled_multi_diffusion_denoise_latents"; + }; + /** TransformerField */ + TransformerField: { + /** @description Info to load Transformer submodel */ + transformer: components["schemas"]["ModelIdentifierField"]; + }; + /** + * UIComponent + * @description The type of UI component to use for a field, used to override the default components, which are + * inferred from the field type. + * @enum {string} + */ + UIComponent: "none" | "textarea" | "slider"; + /** + * UIConfigBase + * @description Provides additional node configuration to the UI. + * This is used internally by the @invocation decorator logic. Do not use this directly. + */ + UIConfigBase: { + /** + * Tags + * @description The node's tags + */ + tags: string[] | null; + /** + * Title + * @description The node's display name + * @default null + */ + title: string | null; + /** + * Category + * @description The node's category + * @default null + */ + category: string | null; + /** + * Version + * @description The node's version. Should be a valid semver string e.g. "1.0.0" or "3.8.13". + */ + version: string; + /** + * Node Pack + * @description Whether or not this is a custom node + * @default null + */ + node_pack: string | null; + /** + * @description The node's classification + * @default stable + */ + classification: components["schemas"]["Classification"]; + }; + /** + * UIType + * @description Type hints for the UI for situations in which the field type is not enough to infer the correct UI type. + * + * - Model Fields + * The most common node-author-facing use will be for model fields. Internally, there is no difference + * between SD-1, SD-2 and SDXL model fields - they all use the class `MainModelField`. To ensure the + * base-model-specific UI is rendered, use e.g. `ui_type=UIType.SDXLMainModelField` to indicate that + * the field is an SDXL main model field. + * + * - Any Field + * We cannot infer the usage of `typing.Any` via schema parsing, so you *must* use `ui_type=UIType.Any` to + * indicate that the field accepts any type. Use with caution. This cannot be used on outputs. + * + * - Scheduler Field + * Special handling in the UI is needed for this field, which otherwise would be parsed as a plain enum field. + * + * - Internal Fields + * Similar to the Any Field, the `collect` and `iterate` nodes make use of `typing.Any`. To facilitate + * handling these types in the client, we use `UIType._Collection` and `UIType._CollectionItem`. These + * should not be used by node authors. + * + * - DEPRECATED Fields + * These types are deprecated and should not be used by node authors. A warning will be logged if one is + * used, and the type will be ignored. They are included here for backwards compatibility. + * @enum {string} + */ + UIType: "MainModelField" | "FluxMainModelField" | "SDXLMainModelField" | "SDXLRefinerModelField" | "ONNXModelField" | "VAEModelField" | "LoRAModelField" | "ControlNetModelField" | "IPAdapterModelField" | "T2IAdapterModelField" | "T5EncoderModelField" | "SpandrelImageToImageModelField" | "SchedulerField" | "AnyField" | "CollectionField" | "CollectionItemField" | "DEPRECATED_Boolean" | "DEPRECATED_Color" | "DEPRECATED_Conditioning" | "DEPRECATED_Control" | "DEPRECATED_Float" | "DEPRECATED_Image" | "DEPRECATED_Integer" | "DEPRECATED_Latents" | "DEPRECATED_String" | "DEPRECATED_BooleanCollection" | "DEPRECATED_ColorCollection" | "DEPRECATED_ConditioningCollection" | "DEPRECATED_ControlCollection" | "DEPRECATED_FloatCollection" | "DEPRECATED_ImageCollection" | "DEPRECATED_IntegerCollection" | "DEPRECATED_LatentsCollection" | "DEPRECATED_StringCollection" | "DEPRECATED_BooleanPolymorphic" | "DEPRECATED_ColorPolymorphic" | "DEPRECATED_ConditioningPolymorphic" | "DEPRECATED_ControlPolymorphic" | "DEPRECATED_FloatPolymorphic" | "DEPRECATED_ImagePolymorphic" | "DEPRECATED_IntegerPolymorphic" | "DEPRECATED_LatentsPolymorphic" | "DEPRECATED_StringPolymorphic" | "DEPRECATED_UNet" | "DEPRECATED_Vae" | "DEPRECATED_CLIP" | "DEPRECATED_Collection" | "DEPRECATED_CollectionItem" | "DEPRECATED_Enum" | "DEPRECATED_WorkflowField" | "DEPRECATED_IsIntermediate" | "DEPRECATED_BoardField" | "DEPRECATED_MetadataItem" | "DEPRECATED_MetadataItemCollection" | "DEPRECATED_MetadataItemPolymorphic" | "DEPRECATED_MetadataDict"; + /** UNetField */ + UNetField: { + /** @description Info to load unet submodel */ + unet: components["schemas"]["ModelIdentifierField"]; + /** @description Info to load scheduler submodel */ + scheduler: components["schemas"]["ModelIdentifierField"]; + /** + * Loras + * @description LoRAs to apply on model loading + */ + loras: components["schemas"]["LoRAField"][]; + /** + * Seamless Axes + * @description Axes("x" and "y") to which apply seamless + */ + seamless_axes?: string[]; + /** + * @description FreeU configuration + * @default null + */ + freeu_config?: components["schemas"]["FreeUConfig"] | null; + }; + /** + * UNetOutput + * @description Base class for invocations that output a UNet field. + */ + UNetOutput: { + /** + * UNet + * @description UNet (scheduler, LoRAs) + */ + unet: components["schemas"]["UNetField"]; + /** + * type + * @default unet_output + * @constant + * @enum {string} + */ + type: "unet_output"; + }; + /** + * URLModelSource + * @description A generic URL point to a checkpoint file. + */ + URLModelSource: { + /** + * Url + * Format: uri + */ + url: string; + /** Access Token */ + access_token?: string | null; + /** + * Type + * @default url + * @constant + * @enum {string} + */ + type?: "url"; + }; + /** + * Unsharp Mask + * @description Applies an unsharp mask filter to an image + */ + UnsharpMaskInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to use + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * Radius + * @description Unsharp mask radius + * @default 2 + */ + radius?: number; + /** + * Strength + * @description Unsharp mask strength + * @default 50 + */ + strength?: number; + /** + * type + * @default unsharp_mask + * @constant + * @enum {string} + */ + type: "unsharp_mask"; + }; + /** Upscaler */ + Upscaler: { + /** + * Upscaling Method + * @description Name of upscaling method + */ + upscaling_method: string; + /** + * Upscaling Models + * @description List of upscaling models for this method + */ + upscaling_models: string[]; + }; + /** + * VAECheckpointConfig + * @description Model config for standalone VAE models. + */ + VAECheckpointConfig: { + /** + * Key + * @description A unique key for this model. + */ + key: string; + /** + * Hash + * @description The hash of the model file(s). + */ + hash: string; + /** + * Path + * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. + */ + path: string; + /** + * Name + * @description Name of the model. + */ + name: string; + /** @description The base model. */ + base: components["schemas"]["BaseModelType"]; + /** + * Description + * @description Model description + */ + description?: string | null; + /** + * Source + * @description The original source of the model (path, URL or repo_id). + */ + source: string; + /** @description The type of source */ + source_type: components["schemas"]["ModelSourceType"]; + /** + * Source Api Response + * @description The original API response from the source, as stringified JSON. + */ + source_api_response?: string | null; + /** + * Cover Image + * @description Url for image to preview model + */ + cover_image?: string | null; + /** + * Format + * @description Format of the provided checkpoint model + * @default checkpoint + * @enum {string} + */ + format: "checkpoint" | "bnb_quantized_nf4b"; + /** + * Config Path + * @description path to the checkpoint model config file + */ + config_path: string; + /** + * Converted At + * @description When this model was last converted to diffusers + */ + converted_at?: number | null; + /** + * Type + * @default vae + * @constant + * @enum {string} + */ + type: "vae"; + }; + /** + * VAEDiffusersConfig + * @description Model config for standalone VAE models (diffusers version). + */ + VAEDiffusersConfig: { + /** + * Key + * @description A unique key for this model. + */ + key: string; + /** + * Hash + * @description The hash of the model file(s). + */ + hash: string; + /** + * Path + * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. + */ + path: string; + /** + * Name + * @description Name of the model. + */ + name: string; + /** @description The base model. */ + base: components["schemas"]["BaseModelType"]; + /** + * Description + * @description Model description + */ + description?: string | null; + /** + * Source + * @description The original source of the model (path, URL or repo_id). + */ + source: string; + /** @description The type of source */ + source_type: components["schemas"]["ModelSourceType"]; + /** + * Source Api Response + * @description The original API response from the source, as stringified JSON. + */ + source_api_response?: string | null; + /** + * Cover Image + * @description Url for image to preview model + */ + cover_image?: string | null; + /** + * Type + * @default vae + * @constant + * @enum {string} + */ + type: "vae"; + /** + * Format + * @default diffusers + * @constant + * @enum {string} + */ + format: "diffusers"; + }; + /** VAEField */ + VAEField: { + /** @description Info to load vae submodel */ + vae: components["schemas"]["ModelIdentifierField"]; + /** + * Seamless Axes + * @description Axes("x" and "y") to which apply seamless + */ + seamless_axes?: string[]; + }; + /** + * VAE + * @description Loads a VAE model, outputting a VaeLoaderOutput + */ + VAELoaderInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * VAE + * @description VAE model to load + * @default null + */ + vae_model?: components["schemas"]["ModelIdentifierField"]; + /** + * type + * @default vae_loader + * @constant + * @enum {string} + */ + type: "vae_loader"; + }; + /** + * VAEOutput + * @description Base class for invocations that output a VAE field + */ + VAEOutput: { + /** + * VAE + * @description VAE + */ + vae: components["schemas"]["VAEField"]; + /** + * type + * @default vae_output + * @constant + * @enum {string} + */ + type: "vae_output"; + }; + /** ValidationError */ + ValidationError: { + /** Location */ + loc: (string | number)[]; + /** Message */ + msg: string; + /** Error Type */ + type: string; + }; + /** Workflow */ + Workflow: { + /** + * Name + * @description The name of the workflow. + */ + name: string; + /** + * Author + * @description The author of the workflow. + */ + author: string; + /** + * Description + * @description The description of the workflow. + */ + description: string; + /** + * Version + * @description The version of the workflow. + */ + version: string; + /** + * Contact + * @description The contact of the workflow. + */ + contact: string; + /** + * Tags + * @description The tags of the workflow. + */ + tags: string; + /** + * Notes + * @description The notes of the workflow. + */ + notes: string; + /** + * Exposedfields + * @description The exposed fields of the workflow. + */ + exposedFields: components["schemas"]["ExposedField"][]; + /** @description The meta of the workflow. */ + meta: components["schemas"]["WorkflowMeta"]; + /** + * Nodes + * @description The nodes of the workflow. + */ + nodes: { + [key: string]: components["schemas"]["JsonValue"]; + }[]; + /** + * Edges + * @description The edges of the workflow. + */ + edges: { + [key: string]: components["schemas"]["JsonValue"]; + }[]; + /** + * Id + * @description The id of the workflow. + */ + id: string; + }; + /** WorkflowAndGraphResponse */ + WorkflowAndGraphResponse: { + /** + * Workflow + * @description The workflow used to generate the image, as stringified JSON + */ + workflow: string | null; + /** + * Graph + * @description The graph used to generate the image, as stringified JSON + */ + graph: string | null; + }; + /** + * WorkflowCategory + * @enum {string} + */ + WorkflowCategory: "user" | "default" | "project"; + /** WorkflowMeta */ + WorkflowMeta: { + /** + * Version + * @description The version of the workflow schema. + */ + version: string; + /** + * @description The category of the workflow (user or default). + * @default user + */ + category?: components["schemas"]["WorkflowCategory"]; + }; + /** WorkflowRecordDTO */ + WorkflowRecordDTO: { + /** + * Workflow Id + * @description The id of the workflow. + */ + workflow_id: string; + /** + * Name + * @description The name of the workflow. + */ + name: string; + /** + * Created At + * @description The created timestamp of the workflow. + */ + created_at: string; + /** + * Updated At + * @description The updated timestamp of the workflow. + */ + updated_at: string; + /** + * Opened At + * @description The opened timestamp of the workflow. + */ + opened_at: string; + /** @description The workflow. */ + workflow: components["schemas"]["Workflow"]; + }; + /** WorkflowRecordListItemDTO */ + WorkflowRecordListItemDTO: { + /** + * Workflow Id + * @description The id of the workflow. + */ + workflow_id: string; + /** + * Name + * @description The name of the workflow. + */ + name: string; + /** + * Created At + * @description The created timestamp of the workflow. + */ + created_at: string; + /** + * Updated At + * @description The updated timestamp of the workflow. + */ + updated_at: string; + /** + * Opened At + * @description The opened timestamp of the workflow. + */ + opened_at: string; + /** + * Description + * @description The description of the workflow. + */ + description: string; + /** @description The description of the workflow. */ + category: components["schemas"]["WorkflowCategory"]; + }; + /** + * WorkflowRecordOrderBy + * @description The order by options for workflow records + * @enum {string} + */ + WorkflowRecordOrderBy: "created_at" | "updated_at" | "opened_at" | "name"; + /** WorkflowWithoutID */ + WorkflowWithoutID: { + /** + * Name + * @description The name of the workflow. + */ + name: string; + /** + * Author + * @description The author of the workflow. + */ + author: string; + /** + * Description + * @description The description of the workflow. + */ + description: string; + /** + * Version + * @description The version of the workflow. + */ + version: string; + /** + * Contact + * @description The contact of the workflow. + */ + contact: string; + /** + * Tags + * @description The tags of the workflow. + */ + tags: string; + /** + * Notes + * @description The notes of the workflow. + */ + notes: string; + /** + * Exposedfields + * @description The exposed fields of the workflow. + */ + exposedFields: components["schemas"]["ExposedField"][]; + /** @description The meta of the workflow. */ + meta: components["schemas"]["WorkflowMeta"]; + /** + * Nodes + * @description The nodes of the workflow. + */ + nodes: { + [key: string]: components["schemas"]["JsonValue"]; + }[]; + /** + * Edges + * @description The edges of the workflow. + */ + edges: { + [key: string]: components["schemas"]["JsonValue"]; + }[]; + }; + /** + * Zoe (Depth) Processor + * @description Applies Zoe depth processing to image + */ + ZoeDepthImageProcessorInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to process + * @default null + */ + image?: components["schemas"]["ImageField"]; + /** + * type + * @default zoe_depth_image_processor + * @constant + * @enum {string} + */ + type: "zoe_depth_image_processor"; + }; + }; + responses: never; + parameters: never; + requestBodies: never; + headers: never; + pathItems: never; +>>>>>>> bfbb72a8b (tsc and lint fix) }; export type $defs = Record; export interface operations { diff --git a/invokeai/frontend/web/src/services/api/types.ts b/invokeai/frontend/web/src/services/api/types.ts index 045e1508192..9c75b85ab90 100644 --- a/invokeai/frontend/web/src/services/api/types.ts +++ b/invokeai/frontend/web/src/services/api/types.ts @@ -51,7 +51,7 @@ export type VAEModelConfig = S['VAECheckpointConfig'] | S['VAEDiffusersConfig']; export type ControlNetModelConfig = S['ControlNetDiffusersConfig'] | S['ControlNetCheckpointConfig']; export type IPAdapterModelConfig = S['IPAdapterInvokeAIConfig'] | S['IPAdapterCheckpointConfig']; export type T2IAdapterModelConfig = S['T2IAdapterConfig']; -export type ClipEmbedModelConfig = S['CLIPEmbedDiffusersConfig']; +type ClipEmbedModelConfig = S['CLIPEmbedDiffusersConfig']; export type T5EncoderModelConfig = S['T5EncoderConfig']; export type T5Encoder8bModelConfig = S['T5Encoder8bConfig']; export type SpandrelImageToImageModelConfig = S['SpandrelImageToImageConfig']; From f67c4daa0a84125ef56dff64a9081c8f5f7bb6bf Mon Sep 17 00:00:00 2001 From: Mary Hipp Date: Wed, 21 Aug 2024 14:41:58 -0400 Subject: [PATCH 060/113] fix schema --- .../frontend/web/src/services/api/schema.ts | 14240 ---------------- 1 file changed, 14240 deletions(-) diff --git a/invokeai/frontend/web/src/services/api/schema.ts b/invokeai/frontend/web/src/services/api/schema.ts index 56fa7c03d2f..8c3849593ab 100644 --- a/invokeai/frontend/web/src/services/api/schema.ts +++ b/invokeai/frontend/web/src/services/api/schema.ts @@ -1381,7 +1381,6 @@ export type paths = { }; export type webhooks = Record; export type components = { -<<<<<<< HEAD schemas: { /** AddImagesToBoardResult */ AddImagesToBoardResult: { @@ -15686,14245 +15685,6 @@ export type components = { requestBodies: never; headers: never; pathItems: never; -======= - schemas: { - /** AddImagesToBoardResult */ - AddImagesToBoardResult: { - /** - * Board Id - * @description The id of the board the images were added to - */ - board_id: string; - /** - * Added Image Names - * @description The image names that were added to the board - */ - added_image_names: string[]; - }; - /** - * Add Integers - * @description Adds two numbers - */ - AddInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * A - * @description The first number - * @default 0 - */ - a?: number; - /** - * B - * @description The second number - * @default 0 - */ - b?: number; - /** - * type - * @default add - * @constant - * @enum {string} - */ - type: "add"; - }; - /** - * Alpha Mask to Tensor - * @description Convert a mask image to a tensor. Opaque regions are 1 and transparent regions are 0. - */ - AlphaMaskToTensorInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The mask image to convert. - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * Invert - * @description Whether to invert the mask. - * @default false - */ - invert?: boolean; - /** - * type - * @default alpha_mask_to_tensor - * @constant - * @enum {string} - */ - type: "alpha_mask_to_tensor"; - }; - /** - * AppConfig - * @description App Config Response - */ - AppConfig: { - /** - * Infill Methods - * @description List of available infill methods - */ - infill_methods: string[]; - /** - * Upscaling Methods - * @description List of upscaling methods - */ - upscaling_methods: components["schemas"]["Upscaler"][]; - /** - * Nsfw Methods - * @description List of NSFW checking methods - */ - nsfw_methods: string[]; - /** - * Watermarking Methods - * @description List of invisible watermark methods - */ - watermarking_methods: string[]; - }; - /** - * AppDependencyVersions - * @description App depencency Versions Response - */ - AppDependencyVersions: { - /** - * Accelerate - * @description accelerate version - */ - accelerate: string; - /** - * Compel - * @description compel version - */ - compel: string; - /** - * Cuda - * @description CUDA version - */ - cuda: string | null; - /** - * Diffusers - * @description diffusers version - */ - diffusers: string; - /** - * Numpy - * @description Numpy version - */ - numpy: string; - /** - * Opencv - * @description OpenCV version - */ - opencv: string; - /** - * Onnx - * @description ONNX version - */ - onnx: string; - /** - * Pillow - * @description Pillow (PIL) version - */ - pillow: string; - /** - * Python - * @description Python version - */ - python: string; - /** - * Torch - * @description PyTorch version - */ - torch: string; - /** - * Torchvision - * @description PyTorch Vision version - */ - torchvision: string; - /** - * Transformers - * @description transformers version - */ - transformers: string; - /** - * Xformers - * @description xformers version - */ - xformers: string | null; - }; - /** - * AppVersion - * @description App Version Response - */ - AppVersion: { - /** - * Version - * @description App version - */ - version: string; - }; - /** - * BaseMetadata - * @description Adds typing data for discriminated union. - */ - BaseMetadata: { - /** - * Name - * @description model's name - */ - name: string; - /** - * Type - * @default basemetadata - * @constant - * @enum {string} - */ - type?: "basemetadata"; - }; - /** - * BaseModelType - * @description Base model type. - * @enum {string} - */ - BaseModelType: "any" | "sd-1" | "sd-2" | "sdxl" | "sdxl-refiner" | "flux"; - /** Batch */ - Batch: { - /** - * Batch Id - * @description The ID of the batch - */ - batch_id?: string; - /** - * Data - * @description The batch data collection. - */ - data?: components["schemas"]["BatchDatum"][][] | null; - /** @description The graph to initialize the session with */ - graph: components["schemas"]["Graph"]; - /** @description The workflow to initialize the session with */ - workflow?: components["schemas"]["WorkflowWithoutID"] | null; - /** - * Runs - * @description Int stating how many times to iterate through all possible batch indices - * @default 1 - */ - runs: number; - }; - /** BatchDatum */ - BatchDatum: { - /** - * Node Path - * @description The node into which this batch data collection will be substituted. - */ - node_path: string; - /** - * Field Name - * @description The field into which this batch data collection will be substituted. - */ - field_name: string; - /** - * Items - * @description The list of items to substitute into the node/field. - */ - items?: (string | number)[]; - }; - /** - * BatchEnqueuedEvent - * @description Event model for batch_enqueued - */ - BatchEnqueuedEvent: { - /** - * Timestamp - * @description The timestamp of the event - */ - timestamp: number; - /** - * Queue Id - * @description The ID of the queue - */ - queue_id: string; - /** - * Batch Id - * @description The ID of the batch - */ - batch_id: string; - /** - * Enqueued - * @description The number of invocations enqueued - */ - enqueued: number; - /** - * Requested - * @description The number of invocations initially requested to be enqueued (may be less than enqueued if queue was full) - */ - requested: number; - /** - * Priority - * @description The priority of the batch - */ - priority: number; - }; - /** BatchStatus */ - BatchStatus: { - /** - * Queue Id - * @description The ID of the queue - */ - queue_id: string; - /** - * Batch Id - * @description The ID of the batch - */ - batch_id: string; - /** - * Pending - * @description Number of queue items with status 'pending' - */ - pending: number; - /** - * In Progress - * @description Number of queue items with status 'in_progress' - */ - in_progress: number; - /** - * Completed - * @description Number of queue items with status 'complete' - */ - completed: number; - /** - * Failed - * @description Number of queue items with status 'error' - */ - failed: number; - /** - * Canceled - * @description Number of queue items with status 'canceled' - */ - canceled: number; - /** - * Total - * @description Total number of queue items - */ - total: number; - }; - /** - * Blank Image - * @description Creates a blank image and forwards it to the pipeline - */ - BlankImageInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * Width - * @description The width of the image - * @default 512 - */ - width?: number; - /** - * Height - * @description The height of the image - * @default 512 - */ - height?: number; - /** - * Mode - * @description The mode of the image - * @default RGB - * @enum {string} - */ - mode?: "RGB" | "RGBA"; - /** - * @description The color of the image - * @default { - * "r": 0, - * "g": 0, - * "b": 0, - * "a": 255 - * } - */ - color?: components["schemas"]["ColorField"]; - /** - * type - * @default blank_image - * @constant - * @enum {string} - */ - type: "blank_image"; - }; - /** - * Blend Latents - * @description Blend two latents using a given alpha. Latents must have same size. - */ - BlendLatentsInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description Latents tensor - * @default null - */ - latents_a?: components["schemas"]["LatentsField"]; - /** - * @description Latents tensor - * @default null - */ - latents_b?: components["schemas"]["LatentsField"]; - /** - * Alpha - * @description Blending factor. 0.0 = use input A only, 1.0 = use input B only, 0.5 = 50% mix of input A and input B. - * @default 0.5 - */ - alpha?: number; - /** - * type - * @default lblend - * @constant - * @enum {string} - */ - type: "lblend"; - }; - /** BoardChanges */ - BoardChanges: { - /** - * Board Name - * @description The board's new name. - */ - board_name?: string | null; - /** - * Cover Image Name - * @description The name of the board's new cover image. - */ - cover_image_name?: string | null; - /** - * Archived - * @description Whether or not the board is archived - */ - archived?: boolean | null; - }; - /** - * BoardDTO - * @description Deserialized board record with cover image URL and image count. - */ - BoardDTO: { - /** - * Board Id - * @description The unique ID of the board. - */ - board_id: string; - /** - * Board Name - * @description The name of the board. - */ - board_name: string; - /** - * Created At - * @description The created timestamp of the board. - */ - created_at: string; - /** - * Updated At - * @description The updated timestamp of the board. - */ - updated_at: string; - /** - * Deleted At - * @description The deleted timestamp of the board. - */ - deleted_at?: string | null; - /** - * Cover Image Name - * @description The name of the board's cover image. - */ - cover_image_name: string | null; - /** - * Archived - * @description Whether or not the board is archived. - */ - archived: boolean; - /** - * Is Private - * @description Whether the board is private. - */ - is_private?: boolean | null; - /** - * Image Count - * @description The number of images in the board. - */ - image_count: number; - }; - /** - * BoardField - * @description A board primitive field - */ - BoardField: { - /** - * Board Id - * @description The id of the board - */ - board_id: string; - }; - /** Body_add_image_to_board */ - Body_add_image_to_board: { - /** - * Board Id - * @description The id of the board to add to - */ - board_id: string; - /** - * Image Name - * @description The name of the image to add - */ - image_name: string; - }; - /** Body_add_images_to_board */ - Body_add_images_to_board: { - /** - * Board Id - * @description The id of the board to add to - */ - board_id: string; - /** - * Image Names - * @description The names of the images to add - */ - image_names: string[]; - }; - /** Body_cancel_by_batch_ids */ - Body_cancel_by_batch_ids: { - /** - * Batch Ids - * @description The list of batch_ids to cancel all queue items for - */ - batch_ids: string[]; - }; - /** Body_create_workflow */ - Body_create_workflow: { - /** @description The workflow to create */ - workflow: components["schemas"]["WorkflowWithoutID"]; - }; - /** Body_delete_images_from_list */ - Body_delete_images_from_list: { - /** - * Image Names - * @description The list of names of images to delete - */ - image_names: string[]; - }; - /** Body_download */ - Body_download: { - /** - * Source - * Format: uri - * @description download source - */ - source: string; - /** - * Dest - * @description download destination - */ - dest: string; - /** - * Priority - * @description queue priority - * @default 10 - */ - priority?: number; - /** - * Access Token - * @description token for authorization to download - */ - access_token?: string | null; - }; - /** Body_download_images_from_list */ - Body_download_images_from_list: { - /** - * Image Names - * @description The list of names of images to download - */ - image_names?: string[] | null; - /** - * Board Id - * @description The board from which image should be downloaded - */ - board_id?: string | null; - }; - /** Body_enqueue_batch */ - Body_enqueue_batch: { - /** @description Batch to process */ - batch: components["schemas"]["Batch"]; - /** - * Prepend - * @description Whether or not to prepend this batch in the queue - * @default false - */ - prepend?: boolean; - }; - /** Body_parse_dynamicprompts */ - Body_parse_dynamicprompts: { - /** - * Prompt - * @description The prompt to parse with dynamicprompts - */ - prompt: string; - /** - * Max Prompts - * @description The max number of prompts to generate - * @default 1000 - */ - max_prompts?: number; - /** - * Combinatorial - * @description Whether to use the combinatorial generator - * @default true - */ - combinatorial?: boolean; - }; - /** Body_remove_image_from_board */ - Body_remove_image_from_board: { - /** - * Image Name - * @description The name of the image to remove - */ - image_name: string; - }; - /** Body_remove_images_from_board */ - Body_remove_images_from_board: { - /** - * Image Names - * @description The names of the images to remove - */ - image_names: string[]; - }; - /** Body_star_images_in_list */ - Body_star_images_in_list: { - /** - * Image Names - * @description The list of names of images to star - */ - image_names: string[]; - }; - /** Body_unstar_images_in_list */ - Body_unstar_images_in_list: { - /** - * Image Names - * @description The list of names of images to unstar - */ - image_names: string[]; - }; - /** Body_update_model_image */ - Body_update_model_image: { - /** - * Image - * Format: binary - */ - image: Blob; - }; - /** Body_update_workflow */ - Body_update_workflow: { - /** @description The updated workflow */ - workflow: components["schemas"]["Workflow"]; - }; - /** Body_upload_image */ - Body_upload_image: { - /** - * File - * Format: binary - */ - file: Blob; - /** @description The metadata to associate with the image */ - metadata?: components["schemas"]["JsonValue"] | null; - }; - /** - * Boolean Collection Primitive - * @description A collection of boolean primitive values - */ - BooleanCollectionInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * Collection - * @description The collection of boolean values - * @default [] - */ - collection?: boolean[]; - /** - * type - * @default boolean_collection - * @constant - * @enum {string} - */ - type: "boolean_collection"; - }; - /** - * BooleanCollectionOutput - * @description Base class for nodes that output a collection of booleans - */ - BooleanCollectionOutput: { - /** - * Collection - * @description The output boolean collection - */ - collection: boolean[]; - /** - * type - * @default boolean_collection_output - * @constant - * @enum {string} - */ - type: "boolean_collection_output"; - }; - /** - * Boolean Primitive - * @description A boolean primitive value - */ - BooleanInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * Value - * @description The boolean value - * @default false - */ - value?: boolean; - /** - * type - * @default boolean - * @constant - * @enum {string} - */ - type: "boolean"; - }; - /** - * BooleanOutput - * @description Base class for nodes that output a single boolean - */ - BooleanOutput: { - /** - * Value - * @description The output boolean - */ - value: boolean; - /** - * type - * @default boolean_output - * @constant - * @enum {string} - */ - type: "boolean_output"; - }; - /** - * BoundingBoxCollectionOutput - * @description Base class for nodes that output a collection of bounding boxes - */ - BoundingBoxCollectionOutput: { - /** - * Bounding Boxes - * @description The output bounding boxes. - */ - collection: components["schemas"]["BoundingBoxField"][]; - /** - * type - * @default bounding_box_collection_output - * @constant - * @enum {string} - */ - type: "bounding_box_collection_output"; - }; - /** - * BoundingBoxField - * @description A bounding box primitive value. - */ - BoundingBoxField: { - /** - * X Min - * @description The minimum x-coordinate of the bounding box (inclusive). - */ - x_min: number; - /** - * X Max - * @description The maximum x-coordinate of the bounding box (exclusive). - */ - x_max: number; - /** - * Y Min - * @description The minimum y-coordinate of the bounding box (inclusive). - */ - y_min: number; - /** - * Y Max - * @description The maximum y-coordinate of the bounding box (exclusive). - */ - y_max: number; - /** - * Score - * @description The score associated with the bounding box. In the range [0, 1]. This value is typically set when the bounding box was produced by a detector and has an associated confidence score. - * @default null - */ - score?: number | null; - }; - /** - * Bounding Box - * @description Create a bounding box manually by supplying box coordinates - */ - BoundingBoxInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * X Min - * @description x-coordinate of the bounding box's top left vertex - * @default 0 - */ - x_min?: number; - /** - * Y Min - * @description y-coordinate of the bounding box's top left vertex - * @default 0 - */ - y_min?: number; - /** - * X Max - * @description x-coordinate of the bounding box's bottom right vertex - * @default 0 - */ - x_max?: number; - /** - * Y Max - * @description y-coordinate of the bounding box's bottom right vertex - * @default 0 - */ - y_max?: number; - /** - * type - * @default bounding_box - * @constant - * @enum {string} - */ - type: "bounding_box"; - }; - /** - * BoundingBoxOutput - * @description Base class for nodes that output a single bounding box - */ - BoundingBoxOutput: { - /** @description The output bounding box. */ - bounding_box: components["schemas"]["BoundingBoxField"]; - /** - * type - * @default bounding_box_output - * @constant - * @enum {string} - */ - type: "bounding_box_output"; - }; - /** - * BulkDownloadCompleteEvent - * @description Event model for bulk_download_complete - */ - BulkDownloadCompleteEvent: { - /** - * Timestamp - * @description The timestamp of the event - */ - timestamp: number; - /** - * Bulk Download Id - * @description The ID of the bulk image download - */ - bulk_download_id: string; - /** - * Bulk Download Item Id - * @description The ID of the bulk image download item - */ - bulk_download_item_id: string; - /** - * Bulk Download Item Name - * @description The name of the bulk image download item - */ - bulk_download_item_name: string; - }; - /** - * BulkDownloadErrorEvent - * @description Event model for bulk_download_error - */ - BulkDownloadErrorEvent: { - /** - * Timestamp - * @description The timestamp of the event - */ - timestamp: number; - /** - * Bulk Download Id - * @description The ID of the bulk image download - */ - bulk_download_id: string; - /** - * Bulk Download Item Id - * @description The ID of the bulk image download item - */ - bulk_download_item_id: string; - /** - * Bulk Download Item Name - * @description The name of the bulk image download item - */ - bulk_download_item_name: string; - /** - * Error - * @description The error message - */ - error: string; - }; - /** - * BulkDownloadStartedEvent - * @description Event model for bulk_download_started - */ - BulkDownloadStartedEvent: { - /** - * Timestamp - * @description The timestamp of the event - */ - timestamp: number; - /** - * Bulk Download Id - * @description The ID of the bulk image download - */ - bulk_download_id: string; - /** - * Bulk Download Item Id - * @description The ID of the bulk image download item - */ - bulk_download_item_id: string; - /** - * Bulk Download Item Name - * @description The name of the bulk image download item - */ - bulk_download_item_name: string; - }; - /** - * CLIPEmbedDiffusersConfig - * @description Model config for Clip Embeddings. - */ - CLIPEmbedDiffusersConfig: { - /** - * Key - * @description A unique key for this model. - */ - key: string; - /** - * Hash - * @description The hash of the model file(s). - */ - hash: string; - /** - * Path - * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. - */ - path: string; - /** - * Name - * @description Name of the model. - */ - name: string; - /** @description The base model. */ - base: components["schemas"]["BaseModelType"]; - /** - * Description - * @description Model description - */ - description?: string | null; - /** - * Source - * @description The original source of the model (path, URL or repo_id). - */ - source: string; - /** @description The type of source */ - source_type: components["schemas"]["ModelSourceType"]; - /** - * Source Api Response - * @description The original API response from the source, as stringified JSON. - */ - source_api_response?: string | null; - /** - * Cover Image - * @description Url for image to preview model - */ - cover_image?: string | null; - /** - * Format - * @default diffusers - * @constant - * @enum {string} - */ - format: "diffusers"; - /** @default */ - repo_variant?: components["schemas"]["ModelRepoVariant"] | null; - /** - * Type - * @default clip_embed - * @constant - * @enum {string} - */ - type: "clip_embed"; - }; - /** CLIPField */ - CLIPField: { - /** @description Info to load tokenizer submodel */ - tokenizer: components["schemas"]["ModelIdentifierField"]; - /** @description Info to load text_encoder submodel */ - text_encoder: components["schemas"]["ModelIdentifierField"]; - /** - * Skipped Layers - * @description Number of skipped layers in text_encoder - */ - skipped_layers: number; - /** - * Loras - * @description LoRAs to apply on model loading - */ - loras: components["schemas"]["LoRAField"][]; - }; - /** - * CLIPOutput - * @description Base class for invocations that output a CLIP field - */ - CLIPOutput: { - /** - * CLIP - * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count - */ - clip: components["schemas"]["CLIPField"]; - /** - * type - * @default clip_output - * @constant - * @enum {string} - */ - type: "clip_output"; - }; - /** - * CLIP Skip - * @description Skip layers in clip text_encoder model. - */ - CLIPSkipInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * CLIP - * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count - * @default null - */ - clip?: components["schemas"]["CLIPField"]; - /** - * Skipped Layers - * @description Number of layers to skip in text encoder - * @default 0 - */ - skipped_layers?: number; - /** - * type - * @default clip_skip - * @constant - * @enum {string} - */ - type: "clip_skip"; - }; - /** - * CLIPSkipInvocationOutput - * @description CLIP skip node output - */ - CLIPSkipInvocationOutput: { - /** - * CLIP - * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count - * @default null - */ - clip: components["schemas"]["CLIPField"] | null; - /** - * type - * @default clip_skip_output - * @constant - * @enum {string} - */ - type: "clip_skip_output"; - }; - /** - * CLIPVisionDiffusersConfig - * @description Model config for CLIPVision. - */ - CLIPVisionDiffusersConfig: { - /** - * Key - * @description A unique key for this model. - */ - key: string; - /** - * Hash - * @description The hash of the model file(s). - */ - hash: string; - /** - * Path - * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. - */ - path: string; - /** - * Name - * @description Name of the model. - */ - name: string; - /** @description The base model. */ - base: components["schemas"]["BaseModelType"]; - /** - * Description - * @description Model description - */ - description?: string | null; - /** - * Source - * @description The original source of the model (path, URL or repo_id). - */ - source: string; - /** @description The type of source */ - source_type: components["schemas"]["ModelSourceType"]; - /** - * Source Api Response - * @description The original API response from the source, as stringified JSON. - */ - source_api_response?: string | null; - /** - * Cover Image - * @description Url for image to preview model - */ - cover_image?: string | null; - /** - * Format - * @default diffusers - * @constant - * @enum {string} - */ - format: "diffusers"; - /** @default */ - repo_variant?: components["schemas"]["ModelRepoVariant"] | null; - /** - * Type - * @default clip_vision - * @constant - * @enum {string} - */ - type: "clip_vision"; - }; - /** - * CV2 Infill - * @description Infills transparent areas of an image using OpenCV Inpainting - */ - CV2InfillInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to process - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * type - * @default infill_cv2 - * @constant - * @enum {string} - */ - type: "infill_cv2"; - }; - /** - * Calculate Image Tiles Even Split - * @description Calculate the coordinates and overlaps of tiles that cover a target image shape. - */ - CalculateImageTilesEvenSplitInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * Image Width - * @description The image width, in pixels, to calculate tiles for. - * @default 1024 - */ - image_width?: number; - /** - * Image Height - * @description The image height, in pixels, to calculate tiles for. - * @default 1024 - */ - image_height?: number; - /** - * Num Tiles X - * @description Number of tiles to divide image into on the x axis - * @default 2 - */ - num_tiles_x?: number; - /** - * Num Tiles Y - * @description Number of tiles to divide image into on the y axis - * @default 2 - */ - num_tiles_y?: number; - /** - * Overlap - * @description The overlap, in pixels, between adjacent tiles. - * @default 128 - */ - overlap?: number; - /** - * type - * @default calculate_image_tiles_even_split - * @constant - * @enum {string} - */ - type: "calculate_image_tiles_even_split"; - }; - /** - * Calculate Image Tiles - * @description Calculate the coordinates and overlaps of tiles that cover a target image shape. - */ - CalculateImageTilesInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * Image Width - * @description The image width, in pixels, to calculate tiles for. - * @default 1024 - */ - image_width?: number; - /** - * Image Height - * @description The image height, in pixels, to calculate tiles for. - * @default 1024 - */ - image_height?: number; - /** - * Tile Width - * @description The tile width, in pixels. - * @default 576 - */ - tile_width?: number; - /** - * Tile Height - * @description The tile height, in pixels. - * @default 576 - */ - tile_height?: number; - /** - * Overlap - * @description The target overlap, in pixels, between adjacent tiles. Adjacent tiles will overlap by at least this amount - * @default 128 - */ - overlap?: number; - /** - * type - * @default calculate_image_tiles - * @constant - * @enum {string} - */ - type: "calculate_image_tiles"; - }; - /** - * Calculate Image Tiles Minimum Overlap - * @description Calculate the coordinates and overlaps of tiles that cover a target image shape. - */ - CalculateImageTilesMinimumOverlapInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * Image Width - * @description The image width, in pixels, to calculate tiles for. - * @default 1024 - */ - image_width?: number; - /** - * Image Height - * @description The image height, in pixels, to calculate tiles for. - * @default 1024 - */ - image_height?: number; - /** - * Tile Width - * @description The tile width, in pixels. - * @default 576 - */ - tile_width?: number; - /** - * Tile Height - * @description The tile height, in pixels. - * @default 576 - */ - tile_height?: number; - /** - * Min Overlap - * @description Minimum overlap between adjacent tiles, in pixels. - * @default 128 - */ - min_overlap?: number; - /** - * type - * @default calculate_image_tiles_min_overlap - * @constant - * @enum {string} - */ - type: "calculate_image_tiles_min_overlap"; - }; - /** CalculateImageTilesOutput */ - CalculateImageTilesOutput: { - /** - * Tiles - * @description The tiles coordinates that cover a particular image shape. - */ - tiles: components["schemas"]["Tile"][]; - /** - * type - * @default calculate_image_tiles_output - * @constant - * @enum {string} - */ - type: "calculate_image_tiles_output"; - }; - /** - * CancelByBatchIDsResult - * @description Result of canceling by list of batch ids - */ - CancelByBatchIDsResult: { - /** - * Canceled - * @description Number of queue items canceled - */ - canceled: number; - }; - /** - * Canny Processor - * @description Canny edge detection for ControlNet - */ - CannyImageProcessorInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to process - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * Detect Resolution - * @description Pixel resolution for detection - * @default 512 - */ - detect_resolution?: number; - /** - * Image Resolution - * @description Pixel resolution for output image - * @default 512 - */ - image_resolution?: number; - /** - * Low Threshold - * @description The low threshold of the Canny pixel gradient (0-255) - * @default 100 - */ - low_threshold?: number; - /** - * High Threshold - * @description The high threshold of the Canny pixel gradient (0-255) - * @default 200 - */ - high_threshold?: number; - /** - * type - * @default canny_image_processor - * @constant - * @enum {string} - */ - type: "canny_image_processor"; - }; - /** - * Canvas Paste Back - * @description Combines two images by using the mask provided. Intended for use on the Unified Canvas. - */ - CanvasPasteBackInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The source image - * @default null - */ - source_image?: components["schemas"]["ImageField"]; - /** - * @description The target image - * @default null - */ - target_image?: components["schemas"]["ImageField"]; - /** - * @description The mask to use when pasting - * @default null - */ - mask?: components["schemas"]["ImageField"]; - /** - * Mask Blur - * @description The amount to blur the mask by - * @default 0 - */ - mask_blur?: number; - /** - * type - * @default canvas_paste_back - * @constant - * @enum {string} - */ - type: "canvas_paste_back"; - }; - /** - * Center Pad or Crop Image - * @description Pad or crop an image's sides from the center by specified pixels. Positive values are outside of the image. - */ - CenterPadCropInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to crop - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * Left - * @description Number of pixels to pad/crop from the left (negative values crop inwards, positive values pad outwards) - * @default 0 - */ - left?: number; - /** - * Right - * @description Number of pixels to pad/crop from the right (negative values crop inwards, positive values pad outwards) - * @default 0 - */ - right?: number; - /** - * Top - * @description Number of pixels to pad/crop from the top (negative values crop inwards, positive values pad outwards) - * @default 0 - */ - top?: number; - /** - * Bottom - * @description Number of pixels to pad/crop from the bottom (negative values crop inwards, positive values pad outwards) - * @default 0 - */ - bottom?: number; - /** - * type - * @default img_pad_crop - * @constant - * @enum {string} - */ - type: "img_pad_crop"; - }; - /** - * Classification - * @description The classification of an Invocation. - * - `Stable`: The invocation, including its inputs/outputs and internal logic, is stable. You may build workflows with it, having confidence that they will not break because of a change in this invocation. - * - `Beta`: The invocation is not yet stable, but is planned to be stable in the future. Workflows built around this invocation may break, but we are committed to supporting this invocation long-term. - * - `Prototype`: The invocation is not yet stable and may be removed from the application at any time. Workflows built around this invocation may break, and we are *not* committed to supporting this invocation. - * @enum {string} - */ - Classification: "stable" | "beta" | "prototype"; - /** - * ClearResult - * @description Result of clearing the session queue - */ - ClearResult: { - /** - * Deleted - * @description Number of queue items deleted - */ - deleted: number; - }; - /** - * CollectInvocation - * @description Collects values into a collection - */ - CollectInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * Collection Item - * @description The item to collect (all inputs must be of the same type) - * @default null - */ - item?: unknown; - /** - * Collection - * @description The collection, will be provided on execution - * @default [] - */ - collection?: unknown[]; - /** - * type - * @default collect - * @constant - * @enum {string} - */ - type: "collect"; - }; - /** CollectInvocationOutput */ - CollectInvocationOutput: { - /** - * Collection - * @description The collection of input items - */ - collection: unknown[]; - /** - * type - * @default collect_output - * @constant - * @enum {string} - */ - type: "collect_output"; - }; - /** - * ColorCollectionOutput - * @description Base class for nodes that output a collection of colors - */ - ColorCollectionOutput: { - /** - * Collection - * @description The output colors - */ - collection: components["schemas"]["ColorField"][]; - /** - * type - * @default color_collection_output - * @constant - * @enum {string} - */ - type: "color_collection_output"; - }; - /** - * Color Correct - * @description Shifts the colors of a target image to match the reference image, optionally - * using a mask to only color-correct certain regions of the target image. - */ - ColorCorrectInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to color-correct - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * @description Reference image for color-correction - * @default null - */ - reference?: components["schemas"]["ImageField"]; - /** - * @description Mask to use when applying color-correction - * @default null - */ - mask?: components["schemas"]["ImageField"] | null; - /** - * Mask Blur Radius - * @description Mask blur radius - * @default 8 - */ - mask_blur_radius?: number; - /** - * type - * @default color_correct - * @constant - * @enum {string} - */ - type: "color_correct"; - }; - /** - * ColorField - * @description A color primitive field - */ - ColorField: { - /** - * R - * @description The red component - */ - r: number; - /** - * G - * @description The green component - */ - g: number; - /** - * B - * @description The blue component - */ - b: number; - /** - * A - * @description The alpha component - */ - a: number; - }; - /** - * Color Primitive - * @description A color primitive value - */ - ColorInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The color value - * @default { - * "r": 0, - * "g": 0, - * "b": 0, - * "a": 255 - * } - */ - color?: components["schemas"]["ColorField"]; - /** - * type - * @default color - * @constant - * @enum {string} - */ - type: "color"; - }; - /** - * Color Map Processor - * @description Generates a color map from the provided image - */ - ColorMapImageProcessorInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to process - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * Color Map Tile Size - * @description Tile size - * @default 64 - */ - color_map_tile_size?: number; - /** - * type - * @default color_map_image_processor - * @constant - * @enum {string} - */ - type: "color_map_image_processor"; - }; - /** - * ColorOutput - * @description Base class for nodes that output a single color - */ - ColorOutput: { - /** @description The output color */ - color: components["schemas"]["ColorField"]; - /** - * type - * @default color_output - * @constant - * @enum {string} - */ - type: "color_output"; - }; - /** - * Prompt - * @description Parse prompt using compel package to conditioning. - */ - CompelInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * Prompt - * @description Prompt to be parsed by Compel to create a conditioning tensor - * @default - */ - prompt?: string; - /** - * CLIP - * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count - * @default null - */ - clip?: components["schemas"]["CLIPField"]; - /** - * @description A mask defining the region that this conditioning prompt applies to. - * @default null - */ - mask?: components["schemas"]["TensorField"] | null; - /** - * type - * @default compel - * @constant - * @enum {string} - */ - type: "compel"; - }; - /** - * Conditioning Collection Primitive - * @description A collection of conditioning tensor primitive values - */ - ConditioningCollectionInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * Collection - * @description The collection of conditioning tensors - * @default [] - */ - collection?: components["schemas"]["ConditioningField"][]; - /** - * type - * @default conditioning_collection - * @constant - * @enum {string} - */ - type: "conditioning_collection"; - }; - /** - * ConditioningCollectionOutput - * @description Base class for nodes that output a collection of conditioning tensors - */ - ConditioningCollectionOutput: { - /** - * Collection - * @description The output conditioning tensors - */ - collection: components["schemas"]["ConditioningField"][]; - /** - * type - * @default conditioning_collection_output - * @constant - * @enum {string} - */ - type: "conditioning_collection_output"; - }; - /** - * ConditioningField - * @description A conditioning tensor primitive value - */ - ConditioningField: { - /** - * Conditioning Name - * @description The name of conditioning tensor - */ - conditioning_name: string; - /** - * @description The mask associated with this conditioning tensor. Excluded regions should be set to False, included regions should be set to True. - * @default null - */ - mask?: components["schemas"]["TensorField"] | null; - }; - /** - * Conditioning Primitive - * @description A conditioning tensor primitive value - */ - ConditioningInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description Conditioning tensor - * @default null - */ - conditioning?: components["schemas"]["ConditioningField"]; - /** - * type - * @default conditioning - * @constant - * @enum {string} - */ - type: "conditioning"; - }; - /** - * ConditioningOutput - * @description Base class for nodes that output a single conditioning tensor - */ - ConditioningOutput: { - /** @description Conditioning tensor */ - conditioning: components["schemas"]["ConditioningField"]; - /** - * type - * @default conditioning_output - * @constant - * @enum {string} - */ - type: "conditioning_output"; - }; - /** - * Content Shuffle Processor - * @description Applies content shuffle processing to image - */ - ContentShuffleImageProcessorInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to process - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * Detect Resolution - * @description Pixel resolution for detection - * @default 512 - */ - detect_resolution?: number; - /** - * Image Resolution - * @description Pixel resolution for output image - * @default 512 - */ - image_resolution?: number; - /** - * H - * @description Content shuffle `h` parameter - * @default 512 - */ - h?: number; - /** - * W - * @description Content shuffle `w` parameter - * @default 512 - */ - w?: number; - /** - * F - * @description Content shuffle `f` parameter - * @default 256 - */ - f?: number; - /** - * type - * @default content_shuffle_image_processor - * @constant - * @enum {string} - */ - type: "content_shuffle_image_processor"; - }; - /** ControlAdapterDefaultSettings */ - ControlAdapterDefaultSettings: { - /** Preprocessor */ - preprocessor: string | null; - }; - /** ControlField */ - ControlField: { - /** @description The control image */ - image: components["schemas"]["ImageField"]; - /** @description The ControlNet model to use */ - control_model: components["schemas"]["ModelIdentifierField"]; - /** - * Control Weight - * @description The weight given to the ControlNet - * @default 1 - */ - control_weight?: number | number[]; - /** - * Begin Step Percent - * @description When the ControlNet is first applied (% of total steps) - * @default 0 - */ - begin_step_percent?: number; - /** - * End Step Percent - * @description When the ControlNet is last applied (% of total steps) - * @default 1 - */ - end_step_percent?: number; - /** - * Control Mode - * @description The control mode to use - * @default balanced - * @enum {string} - */ - control_mode?: "balanced" | "more_prompt" | "more_control" | "unbalanced"; - /** - * Resize Mode - * @description The resize mode to use - * @default just_resize - * @enum {string} - */ - resize_mode?: "just_resize" | "crop_resize" | "fill_resize" | "just_resize_simple"; - }; - /** - * ControlNetCheckpointConfig - * @description Model config for ControlNet models (diffusers version). - */ - ControlNetCheckpointConfig: { - /** @description Default settings for this model */ - default_settings?: components["schemas"]["ControlAdapterDefaultSettings"] | null; - /** - * Key - * @description A unique key for this model. - */ - key: string; - /** - * Hash - * @description The hash of the model file(s). - */ - hash: string; - /** - * Path - * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. - */ - path: string; - /** - * Name - * @description Name of the model. - */ - name: string; - /** @description The base model. */ - base: components["schemas"]["BaseModelType"]; - /** - * Description - * @description Model description - */ - description?: string | null; - /** - * Source - * @description The original source of the model (path, URL or repo_id). - */ - source: string; - /** @description The type of source */ - source_type: components["schemas"]["ModelSourceType"]; - /** - * Source Api Response - * @description The original API response from the source, as stringified JSON. - */ - source_api_response?: string | null; - /** - * Cover Image - * @description Url for image to preview model - */ - cover_image?: string | null; - /** - * Format - * @description Format of the provided checkpoint model - * @default checkpoint - * @enum {string} - */ - format: "checkpoint" | "bnb_quantized_nf4b"; - /** - * Config Path - * @description path to the checkpoint model config file - */ - config_path: string; - /** - * Converted At - * @description When this model was last converted to diffusers - */ - converted_at?: number | null; - /** - * Type - * @default controlnet - * @constant - * @enum {string} - */ - type: "controlnet"; - }; - /** - * ControlNetDiffusersConfig - * @description Model config for ControlNet models (diffusers version). - */ - ControlNetDiffusersConfig: { - /** @description Default settings for this model */ - default_settings?: components["schemas"]["ControlAdapterDefaultSettings"] | null; - /** - * Key - * @description A unique key for this model. - */ - key: string; - /** - * Hash - * @description The hash of the model file(s). - */ - hash: string; - /** - * Path - * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. - */ - path: string; - /** - * Name - * @description Name of the model. - */ - name: string; - /** @description The base model. */ - base: components["schemas"]["BaseModelType"]; - /** - * Description - * @description Model description - */ - description?: string | null; - /** - * Source - * @description The original source of the model (path, URL or repo_id). - */ - source: string; - /** @description The type of source */ - source_type: components["schemas"]["ModelSourceType"]; - /** - * Source Api Response - * @description The original API response from the source, as stringified JSON. - */ - source_api_response?: string | null; - /** - * Cover Image - * @description Url for image to preview model - */ - cover_image?: string | null; - /** - * Format - * @default diffusers - * @constant - * @enum {string} - */ - format: "diffusers"; - /** @default */ - repo_variant?: components["schemas"]["ModelRepoVariant"] | null; - /** - * Type - * @default controlnet - * @constant - * @enum {string} - */ - type: "controlnet"; - }; - /** - * ControlNet - * @description Collects ControlNet info to pass to other nodes - */ - ControlNetInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The control image - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * @description ControlNet model to load - * @default null - */ - control_model?: components["schemas"]["ModelIdentifierField"]; - /** - * Control Weight - * @description The weight given to the ControlNet - * @default 1 - */ - control_weight?: number | number[]; - /** - * Begin Step Percent - * @description When the ControlNet is first applied (% of total steps) - * @default 0 - */ - begin_step_percent?: number; - /** - * End Step Percent - * @description When the ControlNet is last applied (% of total steps) - * @default 1 - */ - end_step_percent?: number; - /** - * Control Mode - * @description The control mode used - * @default balanced - * @enum {string} - */ - control_mode?: "balanced" | "more_prompt" | "more_control" | "unbalanced"; - /** - * Resize Mode - * @description The resize mode used - * @default just_resize - * @enum {string} - */ - resize_mode?: "just_resize" | "crop_resize" | "fill_resize" | "just_resize_simple"; - /** - * type - * @default controlnet - * @constant - * @enum {string} - */ - type: "controlnet"; - }; - /** ControlNetMetadataField */ - ControlNetMetadataField: { - /** @description The control image */ - image: components["schemas"]["ImageField"]; - /** - * @description The control image, after processing. - * @default null - */ - processed_image?: components["schemas"]["ImageField"] | null; - /** @description The ControlNet model to use */ - control_model: components["schemas"]["ModelIdentifierField"]; - /** - * Control Weight - * @description The weight given to the ControlNet - * @default 1 - */ - control_weight?: number | number[]; - /** - * Begin Step Percent - * @description When the ControlNet is first applied (% of total steps) - * @default 0 - */ - begin_step_percent?: number; - /** - * End Step Percent - * @description When the ControlNet is last applied (% of total steps) - * @default 1 - */ - end_step_percent?: number; - /** - * Control Mode - * @description The control mode to use - * @default balanced - * @enum {string} - */ - control_mode?: "balanced" | "more_prompt" | "more_control" | "unbalanced"; - /** - * Resize Mode - * @description The resize mode to use - * @default just_resize - * @enum {string} - */ - resize_mode?: "just_resize" | "crop_resize" | "fill_resize" | "just_resize_simple"; - }; - /** - * ControlOutput - * @description node output for ControlNet info - */ - ControlOutput: { - /** @description ControlNet(s) to apply */ - control: components["schemas"]["ControlField"]; - /** - * type - * @default control_output - * @constant - * @enum {string} - */ - type: "control_output"; - }; - /** - * Core Metadata - * @description Collects core generation metadata into a MetadataField - */ - CoreMetadataInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * Generation Mode - * @description The generation mode that output this image - * @default null - */ - generation_mode?: ("txt2img" | "img2img" | "inpaint" | "outpaint" | "sdxl_txt2img" | "sdxl_img2img" | "sdxl_inpaint" | "sdxl_outpaint") | null; - /** - * Positive Prompt - * @description The positive prompt parameter - * @default null - */ - positive_prompt?: string | null; - /** - * Negative Prompt - * @description The negative prompt parameter - * @default null - */ - negative_prompt?: string | null; - /** - * Width - * @description The width parameter - * @default null - */ - width?: number | null; - /** - * Height - * @description The height parameter - * @default null - */ - height?: number | null; - /** - * Seed - * @description The seed used for noise generation - * @default null - */ - seed?: number | null; - /** - * Rand Device - * @description The device used for random number generation - * @default null - */ - rand_device?: string | null; - /** - * Cfg Scale - * @description The classifier-free guidance scale parameter - * @default null - */ - cfg_scale?: number | null; - /** - * Cfg Rescale Multiplier - * @description Rescale multiplier for CFG guidance, used for models trained with zero-terminal SNR - * @default null - */ - cfg_rescale_multiplier?: number | null; - /** - * Steps - * @description The number of steps used for inference - * @default null - */ - steps?: number | null; - /** - * Scheduler - * @description The scheduler used for inference - * @default null - */ - scheduler?: string | null; - /** - * Seamless X - * @description Whether seamless tiling was used on the X axis - * @default null - */ - seamless_x?: boolean | null; - /** - * Seamless Y - * @description Whether seamless tiling was used on the Y axis - * @default null - */ - seamless_y?: boolean | null; - /** - * Clip Skip - * @description The number of skipped CLIP layers - * @default null - */ - clip_skip?: number | null; - /** - * @description The main model used for inference - * @default null - */ - model?: components["schemas"]["ModelIdentifierField"] | null; - /** - * Controlnets - * @description The ControlNets used for inference - * @default null - */ - controlnets?: components["schemas"]["ControlNetMetadataField"][] | null; - /** - * Ipadapters - * @description The IP Adapters used for inference - * @default null - */ - ipAdapters?: components["schemas"]["IPAdapterMetadataField"][] | null; - /** - * T2Iadapters - * @description The IP Adapters used for inference - * @default null - */ - t2iAdapters?: components["schemas"]["T2IAdapterMetadataField"][] | null; - /** - * Loras - * @description The LoRAs used for inference - * @default null - */ - loras?: components["schemas"]["LoRAMetadataField"][] | null; - /** - * Strength - * @description The strength used for latents-to-latents - * @default null - */ - strength?: number | null; - /** - * Init Image - * @description The name of the initial image - * @default null - */ - init_image?: string | null; - /** - * @description The VAE used for decoding, if the main model's default was not used - * @default null - */ - vae?: components["schemas"]["ModelIdentifierField"] | null; - /** - * Hrf Enabled - * @description Whether or not high resolution fix was enabled. - * @default null - */ - hrf_enabled?: boolean | null; - /** - * Hrf Method - * @description The high resolution fix upscale method. - * @default null - */ - hrf_method?: string | null; - /** - * Hrf Strength - * @description The high resolution fix img2img strength used in the upscale pass. - * @default null - */ - hrf_strength?: number | null; - /** - * Positive Style Prompt - * @description The positive style prompt parameter - * @default null - */ - positive_style_prompt?: string | null; - /** - * Negative Style Prompt - * @description The negative style prompt parameter - * @default null - */ - negative_style_prompt?: string | null; - /** - * @description The SDXL Refiner model used - * @default null - */ - refiner_model?: components["schemas"]["ModelIdentifierField"] | null; - /** - * Refiner Cfg Scale - * @description The classifier-free guidance scale parameter used for the refiner - * @default null - */ - refiner_cfg_scale?: number | null; - /** - * Refiner Steps - * @description The number of steps used for the refiner - * @default null - */ - refiner_steps?: number | null; - /** - * Refiner Scheduler - * @description The scheduler used for the refiner - * @default null - */ - refiner_scheduler?: string | null; - /** - * Refiner Positive Aesthetic Score - * @description The aesthetic score used for the refiner - * @default null - */ - refiner_positive_aesthetic_score?: number | null; - /** - * Refiner Negative Aesthetic Score - * @description The aesthetic score used for the refiner - * @default null - */ - refiner_negative_aesthetic_score?: number | null; - /** - * Refiner Start - * @description The start value used for refiner denoising - * @default null - */ - refiner_start?: number | null; - /** - * type - * @default core_metadata - * @constant - * @enum {string} - */ - type: "core_metadata"; - [key: string]: unknown; - }; - /** - * Create Denoise Mask - * @description Creates mask for denoising model run. - */ - CreateDenoiseMaskInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description VAE - * @default null - */ - vae?: components["schemas"]["VAEField"]; - /** - * @description Image which will be masked - * @default null - */ - image?: components["schemas"]["ImageField"] | null; - /** - * @description The mask to use when pasting - * @default null - */ - mask?: components["schemas"]["ImageField"]; - /** - * Tiled - * @description Processing using overlapping tiles (reduce memory consumption) - * @default false - */ - tiled?: boolean; - /** - * Fp32 - * @description Whether or not to use full float32 precision - * @default false - */ - fp32?: boolean; - /** - * type - * @default create_denoise_mask - * @constant - * @enum {string} - */ - type: "create_denoise_mask"; - }; - /** - * Create Gradient Mask - * @description Creates mask for denoising model run. - */ - CreateGradientMaskInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description Image which will be masked - * @default null - */ - mask?: components["schemas"]["ImageField"]; - /** - * Edge Radius - * @description How far to blur/expand the edges of the mask - * @default 16 - */ - edge_radius?: number; - /** - * Coherence Mode - * @default Gaussian Blur - * @enum {string} - */ - coherence_mode?: "Gaussian Blur" | "Box Blur" | "Staged"; - /** - * Minimum Denoise - * @description Minimum denoise level for the coherence region - * @default 0 - */ - minimum_denoise?: number; - /** - * [OPTIONAL] Image - * @description OPTIONAL: Only connect for specialized Inpainting models, masked_latents will be generated from the image with the VAE - * @default null - */ - image?: components["schemas"]["ImageField"] | null; - /** - * [OPTIONAL] UNet - * @description OPTIONAL: If the Unet is a specialized Inpainting model, masked_latents will be generated from the image with the VAE - * @default null - */ - unet?: components["schemas"]["UNetField"] | null; - /** - * [OPTIONAL] VAE - * @description OPTIONAL: Only connect for specialized Inpainting models, masked_latents will be generated from the image with the VAE - * @default null - */ - vae?: components["schemas"]["VAEField"] | null; - /** - * Tiled - * @description Processing using overlapping tiles (reduce memory consumption) - * @default false - */ - tiled?: boolean; - /** - * Fp32 - * @description Whether or not to use full float32 precision - * @default false - */ - fp32?: boolean; - /** - * type - * @default create_gradient_mask - * @constant - * @enum {string} - */ - type: "create_gradient_mask"; - }; - /** - * Crop Latents - * @description Crops a latent-space tensor to a box specified in image-space. The box dimensions and coordinates must be - * divisible by the latent scale factor of 8. - */ - CropLatentsCoreInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description Latents tensor - * @default null - */ - latents?: components["schemas"]["LatentsField"]; - /** - * X - * @description The left x coordinate (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space. - * @default null - */ - x?: number; - /** - * Y - * @description The top y coordinate (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space. - * @default null - */ - y?: number; - /** - * Width - * @description The width (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space. - * @default null - */ - width?: number; - /** - * Height - * @description The height (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space. - * @default null - */ - height?: number; - /** - * type - * @default crop_latents - * @constant - * @enum {string} - */ - type: "crop_latents"; - }; - /** CursorPaginatedResults[SessionQueueItemDTO] */ - CursorPaginatedResults_SessionQueueItemDTO_: { - /** - * Limit - * @description Limit of items to get - */ - limit: number; - /** - * Has More - * @description Whether there are more items available - */ - has_more: boolean; - /** - * Items - * @description Items - */ - items: components["schemas"]["SessionQueueItemDTO"][]; - }; - /** - * OpenCV Inpaint - * @description Simple inpaint using opencv. - */ - CvInpaintInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to inpaint - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * @description The mask to use when inpainting - * @default null - */ - mask?: components["schemas"]["ImageField"]; - /** - * type - * @default cv_inpaint - * @constant - * @enum {string} - */ - type: "cv_inpaint"; - }; - /** - * DW Openpose Image Processor - * @description Generates an openpose pose from an image using DWPose - */ - DWOpenposeImageProcessorInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to process - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * Draw Body - * @default true - */ - draw_body?: boolean; - /** - * Draw Face - * @default false - */ - draw_face?: boolean; - /** - * Draw Hands - * @default false - */ - draw_hands?: boolean; - /** - * Image Resolution - * @description Pixel resolution for output image - * @default 512 - */ - image_resolution?: number; - /** - * type - * @default dw_openpose_image_processor - * @constant - * @enum {string} - */ - type: "dw_openpose_image_processor"; - }; - /** DeleteBoardResult */ - DeleteBoardResult: { - /** - * Board Id - * @description The id of the board that was deleted. - */ - board_id: string; - /** - * Deleted Board Images - * @description The image names of the board-images relationships that were deleted. - */ - deleted_board_images: string[]; - /** - * Deleted Images - * @description The names of the images that were deleted. - */ - deleted_images: string[]; - }; - /** DeleteImagesFromListResult */ - DeleteImagesFromListResult: { - /** Deleted Images */ - deleted_images: string[]; - }; - /** - * Denoise Latents - * @description Denoises noisy latents to decodable images - */ - DenoiseLatentsInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * Positive Conditioning - * @description Positive conditioning tensor - * @default null - */ - positive_conditioning?: components["schemas"]["ConditioningField"] | components["schemas"]["ConditioningField"][]; - /** - * Negative Conditioning - * @description Negative conditioning tensor - * @default null - */ - negative_conditioning?: components["schemas"]["ConditioningField"] | components["schemas"]["ConditioningField"][]; - /** - * @description Noise tensor - * @default null - */ - noise?: components["schemas"]["LatentsField"] | null; - /** - * Steps - * @description Number of steps to run - * @default 10 - */ - steps?: number; - /** - * CFG Scale - * @description Classifier-Free Guidance scale - * @default 7.5 - */ - cfg_scale?: number | number[]; - /** - * Denoising Start - * @description When to start denoising, expressed a percentage of total steps - * @default 0 - */ - denoising_start?: number; - /** - * Denoising End - * @description When to stop denoising, expressed a percentage of total steps - * @default 1 - */ - denoising_end?: number; - /** - * Scheduler - * @description Scheduler to use during inference - * @default euler - * @enum {string} - */ - scheduler?: "ddim" | "ddpm" | "deis" | "deis_k" | "lms" | "lms_k" | "pndm" | "heun" | "heun_k" | "euler" | "euler_k" | "euler_a" | "kdpm_2" | "kdpm_2_k" | "kdpm_2_a" | "kdpm_2_a_k" | "dpmpp_2s" | "dpmpp_2s_k" | "dpmpp_2m" | "dpmpp_2m_k" | "dpmpp_2m_sde" | "dpmpp_2m_sde_k" | "dpmpp_3m" | "dpmpp_3m_k" | "dpmpp_sde" | "dpmpp_sde_k" | "unipc" | "unipc_k" | "lcm" | "tcd"; - /** - * UNet - * @description UNet (scheduler, LoRAs) - * @default null - */ - unet?: components["schemas"]["UNetField"]; - /** - * Control - * @default null - */ - control?: components["schemas"]["ControlField"] | components["schemas"]["ControlField"][] | null; - /** - * IP-Adapter - * @description IP-Adapter to apply - * @default null - */ - ip_adapter?: components["schemas"]["IPAdapterField"] | components["schemas"]["IPAdapterField"][] | null; - /** - * T2I-Adapter - * @description T2I-Adapter(s) to apply - * @default null - */ - t2i_adapter?: components["schemas"]["T2IAdapterField"] | components["schemas"]["T2IAdapterField"][] | null; - /** - * CFG Rescale Multiplier - * @description Rescale multiplier for CFG guidance, used for models trained with zero-terminal SNR - * @default 0 - */ - cfg_rescale_multiplier?: number; - /** - * @description Latents tensor - * @default null - */ - latents?: components["schemas"]["LatentsField"] | null; - /** - * @description The mask to use for the operation - * @default null - */ - denoise_mask?: components["schemas"]["DenoiseMaskField"] | null; - /** - * type - * @default denoise_latents - * @constant - * @enum {string} - */ - type: "denoise_latents"; - }; - /** - * DenoiseMaskField - * @description An inpaint mask field - */ - DenoiseMaskField: { - /** - * Mask Name - * @description The name of the mask image - */ - mask_name: string; - /** - * Masked Latents Name - * @description The name of the masked image latents - * @default null - */ - masked_latents_name?: string | null; - /** - * Gradient - * @description Used for gradient inpainting - * @default false - */ - gradient?: boolean; - }; - /** - * DenoiseMaskOutput - * @description Base class for nodes that output a single image - */ - DenoiseMaskOutput: { - /** @description Mask for denoise model run */ - denoise_mask: components["schemas"]["DenoiseMaskField"]; - /** - * type - * @default denoise_mask_output - * @constant - * @enum {string} - */ - type: "denoise_mask_output"; - }; - /** - * Depth Anything Processor - * @description Generates a depth map based on the Depth Anything algorithm - */ - DepthAnythingImageProcessorInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to process - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * Model Size - * @description The size of the depth model to use - * @default small - * @enum {string} - */ - model_size?: "large" | "base" | "small"; - /** - * Resolution - * @description Pixel resolution for output image - * @default 512 - */ - resolution?: number; - /** - * type - * @default depth_anything_image_processor - * @constant - * @enum {string} - */ - type: "depth_anything_image_processor"; - }; - /** - * Divide Integers - * @description Divides two numbers - */ - DivideInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * A - * @description The first number - * @default 0 - */ - a?: number; - /** - * B - * @description The second number - * @default 0 - */ - b?: number; - /** - * type - * @default div - * @constant - * @enum {string} - */ - type: "div"; - }; - /** - * DownloadCancelledEvent - * @description Event model for download_cancelled - */ - DownloadCancelledEvent: { - /** - * Timestamp - * @description The timestamp of the event - */ - timestamp: number; - /** - * Source - * @description The source of the download - */ - source: string; - }; - /** - * DownloadCompleteEvent - * @description Event model for download_complete - */ - DownloadCompleteEvent: { - /** - * Timestamp - * @description The timestamp of the event - */ - timestamp: number; - /** - * Source - * @description The source of the download - */ - source: string; - /** - * Download Path - * @description The local path where the download is saved - */ - download_path: string; - /** - * Total Bytes - * @description The total number of bytes downloaded - */ - total_bytes: number; - }; - /** - * DownloadErrorEvent - * @description Event model for download_error - */ - DownloadErrorEvent: { - /** - * Timestamp - * @description The timestamp of the event - */ - timestamp: number; - /** - * Source - * @description The source of the download - */ - source: string; - /** - * Error Type - * @description The type of error - */ - error_type: string; - /** - * Error - * @description The error message - */ - error: string; - }; - /** - * DownloadJob - * @description Class to monitor and control a model download request. - */ - DownloadJob: { - /** - * Id - * @description Numeric ID of this job - * @default -1 - */ - id?: number; - /** - * Dest - * Format: path - * @description Initial destination of downloaded model on local disk; a directory or file path - */ - dest: string; - /** - * Download Path - * @description Final location of downloaded file or directory - */ - download_path?: string | null; - /** - * @description Status of the download - * @default waiting - */ - status?: components["schemas"]["DownloadJobStatus"]; - /** - * Bytes - * @description Bytes downloaded so far - * @default 0 - */ - bytes?: number; - /** - * Total Bytes - * @description Total file size (bytes) - * @default 0 - */ - total_bytes?: number; - /** - * Error Type - * @description Name of exception that caused an error - */ - error_type?: string | null; - /** - * Error - * @description Traceback of the exception that caused an error - */ - error?: string | null; - /** - * Source - * Format: uri - * @description Where to download from. Specific types specified in child classes. - */ - source: string; - /** - * Access Token - * @description authorization token for protected resources - */ - access_token?: string | null; - /** - * Priority - * @description Queue priority; lower values are higher priority - * @default 10 - */ - priority?: number; - /** - * Job Started - * @description Timestamp for when the download job started - */ - job_started?: string | null; - /** - * Job Ended - * @description Timestamp for when the download job ende1d (completed or errored) - */ - job_ended?: string | null; - /** - * Content Type - * @description Content type of downloaded file - */ - content_type?: string | null; - }; - /** - * DownloadJobStatus - * @description State of a download job. - * @enum {string} - */ - DownloadJobStatus: "waiting" | "running" | "completed" | "cancelled" | "error"; - /** - * DownloadProgressEvent - * @description Event model for download_progress - */ - DownloadProgressEvent: { - /** - * Timestamp - * @description The timestamp of the event - */ - timestamp: number; - /** - * Source - * @description The source of the download - */ - source: string; - /** - * Download Path - * @description The local path where the download is saved - */ - download_path: string; - /** - * Current Bytes - * @description The number of bytes downloaded so far - */ - current_bytes: number; - /** - * Total Bytes - * @description The total number of bytes to be downloaded - */ - total_bytes: number; - }; - /** - * DownloadStartedEvent - * @description Event model for download_started - */ - DownloadStartedEvent: { - /** - * Timestamp - * @description The timestamp of the event - */ - timestamp: number; - /** - * Source - * @description The source of the download - */ - source: string; - /** - * Download Path - * @description The local path where the download is saved - */ - download_path: string; - }; - /** - * Dynamic Prompt - * @description Parses a prompt using adieyal/dynamicprompts' random or combinatorial generator - */ - DynamicPromptInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default false - */ - use_cache?: boolean; - /** - * Prompt - * @description The prompt to parse with dynamicprompts - * @default null - */ - prompt?: string; - /** - * Max Prompts - * @description The number of prompts to generate - * @default 1 - */ - max_prompts?: number; - /** - * Combinatorial - * @description Whether to use the combinatorial generator - * @default false - */ - combinatorial?: boolean; - /** - * type - * @default dynamic_prompt - * @constant - * @enum {string} - */ - type: "dynamic_prompt"; - }; - /** DynamicPromptsResponse */ - DynamicPromptsResponse: { - /** Prompts */ - prompts: string[]; - /** Error */ - error?: string | null; - }; - /** - * Upscale (RealESRGAN) - * @description Upscales an image using RealESRGAN. - */ - ESRGANInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The input image - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * Model Name - * @description The Real-ESRGAN model to use - * @default RealESRGAN_x4plus.pth - * @enum {string} - */ - model_name?: "RealESRGAN_x4plus.pth" | "RealESRGAN_x4plus_anime_6B.pth" | "ESRGAN_SRx4_DF2KOST_official-ff704c30.pth" | "RealESRGAN_x2plus.pth"; - /** - * Tile Size - * @description Tile size for tiled ESRGAN upscaling (0=tiling disabled) - * @default 400 - */ - tile_size?: number; - /** - * type - * @default esrgan - * @constant - * @enum {string} - */ - type: "esrgan"; - }; - /** Edge */ - Edge: { - /** @description The connection for the edge's from node and field */ - source: components["schemas"]["EdgeConnection"]; - /** @description The connection for the edge's to node and field */ - destination: components["schemas"]["EdgeConnection"]; - }; - /** EdgeConnection */ - EdgeConnection: { - /** - * Node Id - * @description The id of the node for this edge connection - */ - node_id: string; - /** - * Field - * @description The field for this connection - */ - field: string; - }; - /** EnqueueBatchResult */ - EnqueueBatchResult: { - /** - * Queue Id - * @description The ID of the queue - */ - queue_id: string; - /** - * Enqueued - * @description The total number of queue items enqueued - */ - enqueued: number; - /** - * Requested - * @description The total number of queue items requested to be enqueued - */ - requested: number; - /** @description The batch that was enqueued */ - batch: components["schemas"]["Batch"]; - /** - * Priority - * @description The priority of the enqueued batch - */ - priority: number; - }; - /** ExposedField */ - ExposedField: { - /** Nodeid */ - nodeId: string; - /** Fieldname */ - fieldName: string; - }; - /** - * FaceIdentifier - * @description Outputs an image with detected face IDs printed on each face. For use with other FaceTools. - */ - FaceIdentifierInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description Image to face detect - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * Minimum Confidence - * @description Minimum confidence for face detection (lower if detection is failing) - * @default 0.5 - */ - minimum_confidence?: number; - /** - * Chunk - * @description Whether to bypass full image face detection and default to image chunking. Chunking will occur if no faces are found in the full image. - * @default false - */ - chunk?: boolean; - /** - * type - * @default face_identifier - * @constant - * @enum {string} - */ - type: "face_identifier"; - }; - /** - * FaceMask - * @description Face mask creation using mediapipe face detection - */ - FaceMaskInvocation: { - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description Image to face detect - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * Face Ids - * @description Comma-separated list of face ids to mask eg '0,2,7'. Numbered from 0. Leave empty to mask all. Find face IDs with FaceIdentifier node. - * @default - */ - face_ids?: string; - /** - * Minimum Confidence - * @description Minimum confidence for face detection (lower if detection is failing) - * @default 0.5 - */ - minimum_confidence?: number; - /** - * X Offset - * @description Offset for the X-axis of the face mask - * @default 0 - */ - x_offset?: number; - /** - * Y Offset - * @description Offset for the Y-axis of the face mask - * @default 0 - */ - y_offset?: number; - /** - * Chunk - * @description Whether to bypass full image face detection and default to image chunking. Chunking will occur if no faces are found in the full image. - * @default false - */ - chunk?: boolean; - /** - * Invert Mask - * @description Toggle to invert the mask - * @default false - */ - invert_mask?: boolean; - /** - * type - * @default face_mask_detection - * @constant - * @enum {string} - */ - type: "face_mask_detection"; - }; - /** - * FaceMaskOutput - * @description Base class for FaceMask output - */ - FaceMaskOutput: { - /** @description The output image */ - image: components["schemas"]["ImageField"]; - /** - * Width - * @description The width of the image in pixels - */ - width: number; - /** - * Height - * @description The height of the image in pixels - */ - height: number; - /** - * type - * @default face_mask_output - * @constant - * @enum {string} - */ - type: "face_mask_output"; - /** @description The output mask */ - mask: components["schemas"]["ImageField"]; - }; - /** - * FaceOff - * @description Bound, extract, and mask a face from an image using MediaPipe detection - */ - FaceOffInvocation: { - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description Image for face detection - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * Face Id - * @description The face ID to process, numbered from 0. Multiple faces not supported. Find a face's ID with FaceIdentifier node. - * @default 0 - */ - face_id?: number; - /** - * Minimum Confidence - * @description Minimum confidence for face detection (lower if detection is failing) - * @default 0.5 - */ - minimum_confidence?: number; - /** - * X Offset - * @description X-axis offset of the mask - * @default 0 - */ - x_offset?: number; - /** - * Y Offset - * @description Y-axis offset of the mask - * @default 0 - */ - y_offset?: number; - /** - * Padding - * @description All-axis padding around the mask in pixels - * @default 0 - */ - padding?: number; - /** - * Chunk - * @description Whether to bypass full image face detection and default to image chunking. Chunking will occur if no faces are found in the full image. - * @default false - */ - chunk?: boolean; - /** - * type - * @default face_off - * @constant - * @enum {string} - */ - type: "face_off"; - }; - /** - * FaceOffOutput - * @description Base class for FaceOff Output - */ - FaceOffOutput: { - /** @description The output image */ - image: components["schemas"]["ImageField"]; - /** - * Width - * @description The width of the image in pixels - */ - width: number; - /** - * Height - * @description The height of the image in pixels - */ - height: number; - /** - * type - * @default face_off_output - * @constant - * @enum {string} - */ - type: "face_off_output"; - /** @description The output mask */ - mask: components["schemas"]["ImageField"]; - /** - * X - * @description The x coordinate of the bounding box's left side - */ - x: number; - /** - * Y - * @description The y coordinate of the bounding box's top side - */ - y: number; - }; - /** - * FieldKind - * @description The kind of field. - * - `Input`: An input field on a node. - * - `Output`: An output field on a node. - * - `Internal`: A field which is treated as an input, but cannot be used in node definitions. Metadata is - * one example. It is provided to nodes via the WithMetadata class, and we want to reserve the field name - * "metadata" for this on all nodes. `FieldKind` is used to short-circuit the field name validation logic, - * allowing "metadata" for that field. - * - `NodeAttribute`: The field is a node attribute. These are fields which are not inputs or outputs, - * but which are used to store information about the node. For example, the `id` and `type` fields are node - * attributes. - * - * The presence of this in `json_schema_extra["field_kind"]` is used when initializing node schemas on app - * startup, and when generating the OpenAPI schema for the workflow editor. - * @enum {string} - */ - FieldKind: "input" | "output" | "internal" | "node_attribute"; - /** - * Float Collection Primitive - * @description A collection of float primitive values - */ - FloatCollectionInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * Collection - * @description The collection of float values - * @default [] - */ - collection?: number[]; - /** - * type - * @default float_collection - * @constant - * @enum {string} - */ - type: "float_collection"; - }; - /** - * FloatCollectionOutput - * @description Base class for nodes that output a collection of floats - */ - FloatCollectionOutput: { - /** - * Collection - * @description The float collection - */ - collection: number[]; - /** - * type - * @default float_collection_output - * @constant - * @enum {string} - */ - type: "float_collection_output"; - }; - /** - * Float Primitive - * @description A float primitive value - */ - FloatInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * Value - * @description The float value - * @default 0 - */ - value?: number; - /** - * type - * @default float - * @constant - * @enum {string} - */ - type: "float"; - }; - /** - * Float Range - * @description Creates a range - */ - FloatLinearRangeInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * Start - * @description The first value of the range - * @default 5 - */ - start?: number; - /** - * Stop - * @description The last value of the range - * @default 10 - */ - stop?: number; - /** - * Steps - * @description number of values to interpolate over (including start and stop) - * @default 30 - */ - steps?: number; - /** - * type - * @default float_range - * @constant - * @enum {string} - */ - type: "float_range"; - }; - /** - * Float Math - * @description Performs floating point math. - */ - FloatMathInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * Operation - * @description The operation to perform - * @default ADD - * @enum {string} - */ - operation?: "ADD" | "SUB" | "MUL" | "DIV" | "EXP" | "ABS" | "SQRT" | "MIN" | "MAX"; - /** - * A - * @description The first number - * @default 1 - */ - a?: number; - /** - * B - * @description The second number - * @default 1 - */ - b?: number; - /** - * type - * @default float_math - * @constant - * @enum {string} - */ - type: "float_math"; - }; - /** - * FloatOutput - * @description Base class for nodes that output a single float - */ - FloatOutput: { - /** - * Value - * @description The output float - */ - value: number; - /** - * type - * @default float_output - * @constant - * @enum {string} - */ - type: "float_output"; - }; - /** - * Float To Integer - * @description Rounds a float number to (a multiple of) an integer. - */ - FloatToIntegerInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * Value - * @description The value to round - * @default 0 - */ - value?: number; - /** - * Multiple of - * @description The multiple to round to - * @default 1 - */ - multiple?: number; - /** - * Method - * @description The method to use for rounding - * @default Nearest - * @enum {string} - */ - method?: "Nearest" | "Floor" | "Ceiling" | "Truncate"; - /** - * type - * @default float_to_int - * @constant - * @enum {string} - */ - type: "float_to_int"; - }; - /** - * Flux Main Model - * @description Loads a flux base model, outputting its submodels. - */ - FluxModelLoaderInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** @description Flux model (Transformer, VAE, CLIP) to load */ - model: components["schemas"]["ModelIdentifierField"]; - /** @description T5 tokenizer and text encoder */ - t5_encoder: components["schemas"]["ModelIdentifierField"]; - /** - * type - * @default flux_model_loader - * @constant - * @enum {string} - */ - type: "flux_model_loader"; - }; - /** - * FluxModelLoaderOutput - * @description Flux base model loader output - */ - FluxModelLoaderOutput: { - /** - * Transformer - * @description Transformer - */ - transformer: components["schemas"]["TransformerField"]; - /** - * CLIP - * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count - */ - clip: components["schemas"]["CLIPField"]; - /** - * T5 Encoder - * @description T5 tokenizer and text encoder - */ - t5_encoder: components["schemas"]["T5EncoderField"]; - /** - * VAE - * @description VAE - */ - vae: components["schemas"]["VAEField"]; - /** - * Max Seq Length - * @description VAE - * @enum {integer} - */ - max_seq_len: 256 | 512; - /** - * type - * @default flux_model_loader_output - * @constant - * @enum {string} - */ - type: "flux_model_loader_output"; - }; - /** - * FLUX Text Encoding - * @description Encodes and preps a prompt for a flux image. - */ - FluxTextEncoderInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * CLIP - * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count - * @default null - */ - clip?: components["schemas"]["CLIPField"]; - /** - * T5Encoder - * @description T5 tokenizer and text encoder - * @default null - */ - t5_encoder?: components["schemas"]["T5EncoderField"]; - /** - * Max Seq Len - * @description Max sequence length for the desired flux model - * @default null - * @enum {integer} - */ - max_seq_len?: 256 | 512; - /** - * Positive Prompt - * @description Positive prompt for text-to-image generation. - * @default null - */ - positive_prompt?: string; - /** - * type - * @default flux_text_encoder - * @constant - * @enum {string} - */ - type: "flux_text_encoder"; - }; - /** - * FLUX Text to Image - * @description Text-to-image generation using a FLUX model. - */ - FluxTextToImageInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * Transformer - * @description UNet (scheduler, LoRAs) - * @default null - */ - transformer?: components["schemas"]["TransformerField"]; - /** - * @description VAE - * @default null - */ - vae?: components["schemas"]["VAEField"]; - /** - * @description Positive conditioning tensor - * @default null - */ - positive_text_conditioning?: components["schemas"]["ConditioningField"]; - /** - * Width - * @description Width of the generated image. - * @default 1024 - */ - width?: number; - /** - * Height - * @description Height of the generated image. - * @default 1024 - */ - height?: number; - /** - * Num Steps - * @description Number of diffusion steps. - * @default 4 - */ - num_steps?: number; - /** - * Guidance - * @description The guidance strength. Higher values adhere more strictly to the prompt, and will produce less diverse images. - * @default 4 - */ - guidance?: number; - /** - * Seed - * @description Randomness seed for reproducibility. - * @default 0 - */ - seed?: number; - /** - * type - * @default flux_text_to_image - * @constant - * @enum {string} - */ - type: "flux_text_to_image"; - }; - /** FoundModel */ - FoundModel: { - /** - * Path - * @description Path to the model - */ - path: string; - /** - * Is Installed - * @description Whether or not the model is already installed - */ - is_installed: boolean; - }; - /** - * FreeUConfig - * @description Configuration for the FreeU hyperparameters. - * - https://huggingface.co/docs/diffusers/main/en/using-diffusers/freeu - * - https://github.com/ChenyangSi/FreeU - */ - FreeUConfig: { - /** - * S1 - * @description Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to mitigate the "oversmoothing effect" in the enhanced denoising process. - */ - s1: number; - /** - * S2 - * @description Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to mitigate the "oversmoothing effect" in the enhanced denoising process. - */ - s2: number; - /** - * B1 - * @description Scaling factor for stage 1 to amplify the contributions of backbone features. - */ - b1: number; - /** - * B2 - * @description Scaling factor for stage 2 to amplify the contributions of backbone features. - */ - b2: number; - }; - /** - * FreeU - * @description Applies FreeU to the UNet. Suggested values (b1/b2/s1/s2): - * - * SD1.5: 1.2/1.4/0.9/0.2, - * SD2: 1.1/1.2/0.9/0.2, - * SDXL: 1.1/1.2/0.6/0.4, - */ - FreeUInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * UNet - * @description UNet (scheduler, LoRAs) - * @default null - */ - unet?: components["schemas"]["UNetField"]; - /** - * B1 - * @description Scaling factor for stage 1 to amplify the contributions of backbone features. - * @default 1.2 - */ - b1?: number; - /** - * B2 - * @description Scaling factor for stage 2 to amplify the contributions of backbone features. - * @default 1.4 - */ - b2?: number; - /** - * S1 - * @description Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to mitigate the "oversmoothing effect" in the enhanced denoising process. - * @default 0.9 - */ - s1?: number; - /** - * S2 - * @description Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to mitigate the "oversmoothing effect" in the enhanced denoising process. - * @default 0.2 - */ - s2?: number; - /** - * type - * @default freeu - * @constant - * @enum {string} - */ - type: "freeu"; - }; - /** - * GradientMaskOutput - * @description Outputs a denoise mask and an image representing the total gradient of the mask. - */ - GradientMaskOutput: { - /** @description Mask for denoise model run */ - denoise_mask: components["schemas"]["DenoiseMaskField"]; - /** @description Image representing the total gradient area of the mask. For paste-back purposes. */ - expanded_mask_area: components["schemas"]["ImageField"]; - /** - * type - * @default gradient_mask_output - * @constant - * @enum {string} - */ - type: "gradient_mask_output"; - }; - /** Graph */ - Graph: { - /** - * Id - * @description The id of this graph - */ - id?: string; - /** - * Nodes - * @description The nodes in this graph - */ - nodes?: { - [key: string]: components["schemas"]["AddInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeImageProcessorInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DepthAnythingImageProcessorInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxTextToImageInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"]; - }; - /** - * Edges - * @description The connections between nodes and their fields in this graph - */ - edges?: components["schemas"]["Edge"][]; - }; - /** - * GraphExecutionState - * @description Tracks the state of a graph execution - */ - GraphExecutionState: { - /** - * Id - * @description The id of the execution state - */ - id?: string; - /** @description The graph being executed */ - graph: components["schemas"]["Graph"]; - /** @description The expanded graph of activated and executed nodes */ - execution_graph?: components["schemas"]["Graph"]; - /** - * Executed - * @description The set of node ids that have been executed - */ - executed?: string[]; - /** - * Executed History - * @description The list of node ids that have been executed, in order of execution - */ - executed_history?: string[]; - /** - * Results - * @description The results of node executions - */ - results?: { - [key: string]: components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["BoundingBoxCollectionOutput"] | components["schemas"]["BoundingBoxOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["CLIPSkipInvocationOutput"] | components["schemas"]["CalculateImageTilesOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["FluxModelLoaderOutput"] | components["schemas"]["GradientMaskOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["IdealSizeOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["LoRALoaderOutput"] | components["schemas"]["LoRASelectorOutput"] | components["schemas"]["MaskOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["ModelIdentifierOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["PairTileImageOutput"] | components["schemas"]["SDXLLoRALoaderOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["String2Output"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["TileToPropertiesOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["VAEOutput"]; - }; - /** - * Errors - * @description Errors raised when executing nodes - */ - errors?: { - [key: string]: string; - }; - /** - * Prepared Source Mapping - * @description The map of prepared nodes to original graph nodes - */ - prepared_source_mapping?: { - [key: string]: string; - }; - /** - * Source Prepared Mapping - * @description The map of original graph nodes to prepared nodes - */ - source_prepared_mapping?: { - [key: string]: string[]; - }; - }; - /** - * Grounding DINO (Text Prompt Object Detection) - * @description Runs a Grounding DINO model. Performs zero-shot bounding-box object detection from a text prompt. - */ - GroundingDinoInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * Model - * @description The Grounding DINO model to use. - * @default null - * @enum {string} - */ - model?: "grounding-dino-tiny" | "grounding-dino-base"; - /** - * Prompt - * @description The prompt describing the object to segment. - * @default null - */ - prompt?: string; - /** - * @description The image to segment. - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * Detection Threshold - * @description The detection threshold for the Grounding DINO model. All detected bounding boxes with scores above this threshold will be returned. - * @default 0.3 - */ - detection_threshold?: number; - /** - * type - * @default grounding_dino - * @constant - * @enum {string} - */ - type: "grounding_dino"; - }; - /** - * HFModelSource - * @description A HuggingFace repo_id with optional variant, sub-folder and access token. - * Note that the variant option, if not provided to the constructor, will default to fp16, which is - * what people (almost) always want. - */ - HFModelSource: { - /** Repo Id */ - repo_id: string; - /** @default fp16 */ - variant?: components["schemas"]["ModelRepoVariant"] | null; - /** Subfolder */ - subfolder?: string | null; - /** Access Token */ - access_token?: string | null; - /** - * Type - * @default hf - * @constant - * @enum {string} - */ - type?: "hf"; - }; - /** HTTPValidationError */ - HTTPValidationError: { - /** Detail */ - detail?: components["schemas"]["ValidationError"][]; - }; - /** - * HED (softedge) Processor - * @description Applies HED edge detection to image - */ - HedImageProcessorInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to process - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * Detect Resolution - * @description Pixel resolution for detection - * @default 512 - */ - detect_resolution?: number; - /** - * Image Resolution - * @description Pixel resolution for output image - * @default 512 - */ - image_resolution?: number; - /** - * Scribble - * @description Whether or not to use scribble mode - * @default false - */ - scribble?: boolean; - /** - * type - * @default hed_image_processor - * @constant - * @enum {string} - */ - type: "hed_image_processor"; - }; - /** - * Heuristic Resize - * @description Resize an image using a heuristic method. Preserves edge maps. - */ - HeuristicResizeInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to resize - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * Width - * @description The width to resize to (px) - * @default 512 - */ - width?: number; - /** - * Height - * @description The height to resize to (px) - * @default 512 - */ - height?: number; - /** - * type - * @default heuristic_resize - * @constant - * @enum {string} - */ - type: "heuristic_resize"; - }; - /** - * HuggingFaceMetadata - * @description Extended metadata fields provided by HuggingFace. - */ - HuggingFaceMetadata: { - /** - * Name - * @description model's name - */ - name: string; - /** - * Files - * @description model files and their sizes - */ - files?: components["schemas"]["RemoteModelFile"][]; - /** - * Type - * @default huggingface - * @constant - * @enum {string} - */ - type?: "huggingface"; - /** - * Id - * @description The HF model id - */ - id: string; - /** - * Api Response - * @description Response from the HF API as stringified JSON - */ - api_response?: string | null; - /** - * Is Diffusers - * @description Whether the metadata is for a Diffusers format model - * @default false - */ - is_diffusers?: boolean; - /** - * Ckpt Urls - * @description URLs for all checkpoint format models in the metadata - */ - ckpt_urls?: string[] | null; - }; - /** HuggingFaceModels */ - HuggingFaceModels: { - /** - * Urls - * @description URLs for all checkpoint format models in the metadata - */ - urls: string[] | null; - /** - * Is Diffusers - * @description Whether the metadata is for a Diffusers format model - */ - is_diffusers: boolean; - }; - /** - * IPAdapterCheckpointConfig - * @description Model config for IP Adapter checkpoint format models. - */ - IPAdapterCheckpointConfig: { - /** - * Key - * @description A unique key for this model. - */ - key: string; - /** - * Hash - * @description The hash of the model file(s). - */ - hash: string; - /** - * Path - * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. - */ - path: string; - /** - * Name - * @description Name of the model. - */ - name: string; - /** @description The base model. */ - base: components["schemas"]["BaseModelType"]; - /** - * Description - * @description Model description - */ - description?: string | null; - /** - * Source - * @description The original source of the model (path, URL or repo_id). - */ - source: string; - /** @description The type of source */ - source_type: components["schemas"]["ModelSourceType"]; - /** - * Source Api Response - * @description The original API response from the source, as stringified JSON. - */ - source_api_response?: string | null; - /** - * Cover Image - * @description Url for image to preview model - */ - cover_image?: string | null; - /** - * Type - * @default ip_adapter - * @constant - * @enum {string} - */ - type: "ip_adapter"; - /** - * Format - * @constant - * @enum {string} - */ - format: "checkpoint"; - }; - /** IPAdapterField */ - IPAdapterField: { - /** - * Image - * @description The IP-Adapter image prompt(s). - */ - image: components["schemas"]["ImageField"] | components["schemas"]["ImageField"][]; - /** @description The IP-Adapter model to use. */ - ip_adapter_model: components["schemas"]["ModelIdentifierField"]; - /** @description The name of the CLIP image encoder model. */ - image_encoder_model: components["schemas"]["ModelIdentifierField"]; - /** - * Weight - * @description The weight given to the IP-Adapter. - * @default 1 - */ - weight?: number | number[]; - /** - * Target Blocks - * @description The IP Adapter blocks to apply - * @default [] - */ - target_blocks?: string[]; - /** - * Begin Step Percent - * @description When the IP-Adapter is first applied (% of total steps) - * @default 0 - */ - begin_step_percent?: number; - /** - * End Step Percent - * @description When the IP-Adapter is last applied (% of total steps) - * @default 1 - */ - end_step_percent?: number; - /** - * @description The bool mask associated with this IP-Adapter. Excluded regions should be set to False, included regions should be set to True. - * @default null - */ - mask?: components["schemas"]["TensorField"] | null; - }; - /** - * IP-Adapter - * @description Collects IP-Adapter info to pass to other nodes. - */ - IPAdapterInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * Image - * @description The IP-Adapter image prompt(s). - * @default null - */ - image?: components["schemas"]["ImageField"] | components["schemas"]["ImageField"][]; - /** - * IP-Adapter Model - * @description The IP-Adapter model. - * @default null - */ - ip_adapter_model?: components["schemas"]["ModelIdentifierField"]; - /** - * Clip Vision Model - * @description CLIP Vision model to use. Overrides model settings. Mandatory for checkpoint models. - * @default ViT-H - * @enum {string} - */ - clip_vision_model?: "ViT-H" | "ViT-G"; - /** - * Weight - * @description The weight given to the IP-Adapter - * @default 1 - */ - weight?: number | number[]; - /** - * Method - * @description The method to apply the IP-Adapter - * @default full - * @enum {string} - */ - method?: "full" | "style" | "composition"; - /** - * Begin Step Percent - * @description When the IP-Adapter is first applied (% of total steps) - * @default 0 - */ - begin_step_percent?: number; - /** - * End Step Percent - * @description When the IP-Adapter is last applied (% of total steps) - * @default 1 - */ - end_step_percent?: number; - /** - * @description A mask defining the region that this IP-Adapter applies to. - * @default null - */ - mask?: components["schemas"]["TensorField"] | null; - /** - * type - * @default ip_adapter - * @constant - * @enum {string} - */ - type: "ip_adapter"; - }; - /** - * IPAdapterInvokeAIConfig - * @description Model config for IP Adapter diffusers format models. - */ - IPAdapterInvokeAIConfig: { - /** - * Key - * @description A unique key for this model. - */ - key: string; - /** - * Hash - * @description The hash of the model file(s). - */ - hash: string; - /** - * Path - * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. - */ - path: string; - /** - * Name - * @description Name of the model. - */ - name: string; - /** @description The base model. */ - base: components["schemas"]["BaseModelType"]; - /** - * Description - * @description Model description - */ - description?: string | null; - /** - * Source - * @description The original source of the model (path, URL or repo_id). - */ - source: string; - /** @description The type of source */ - source_type: components["schemas"]["ModelSourceType"]; - /** - * Source Api Response - * @description The original API response from the source, as stringified JSON. - */ - source_api_response?: string | null; - /** - * Cover Image - * @description Url for image to preview model - */ - cover_image?: string | null; - /** - * Type - * @default ip_adapter - * @constant - * @enum {string} - */ - type: "ip_adapter"; - /** Image Encoder Model Id */ - image_encoder_model_id: string; - /** - * Format - * @constant - * @enum {string} - */ - format: "invokeai"; - }; - /** - * IPAdapterMetadataField - * @description IP Adapter Field, minus the CLIP Vision Encoder model - */ - IPAdapterMetadataField: { - /** @description The IP-Adapter image prompt. */ - image: components["schemas"]["ImageField"]; - /** @description The IP-Adapter model. */ - ip_adapter_model: components["schemas"]["ModelIdentifierField"]; - /** - * Clip Vision Model - * @description The CLIP Vision model - * @enum {string} - */ - clip_vision_model: "ViT-H" | "ViT-G"; - /** - * Method - * @description Method to apply IP Weights with - * @enum {string} - */ - method: "full" | "style" | "composition"; - /** - * Weight - * @description The weight given to the IP-Adapter - */ - weight: number | number[]; - /** - * Begin Step Percent - * @description When the IP-Adapter is first applied (% of total steps) - */ - begin_step_percent: number; - /** - * End Step Percent - * @description When the IP-Adapter is last applied (% of total steps) - */ - end_step_percent: number; - }; - /** IPAdapterOutput */ - IPAdapterOutput: { - /** - * IP-Adapter - * @description IP-Adapter to apply - */ - ip_adapter: components["schemas"]["IPAdapterField"]; - /** - * type - * @default ip_adapter_output - * @constant - * @enum {string} - */ - type: "ip_adapter_output"; - }; - /** - * Ideal Size - * @description Calculates the ideal size for generation to avoid duplication - */ - IdealSizeInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * Width - * @description Final image width - * @default 1024 - */ - width?: number; - /** - * Height - * @description Final image height - * @default 576 - */ - height?: number; - /** - * @description UNet (scheduler, LoRAs) - * @default null - */ - unet?: components["schemas"]["UNetField"]; - /** - * Multiplier - * @description Amount to multiply the model's dimensions by when calculating the ideal size (may result in initial generation artifacts if too large) - * @default 1 - */ - multiplier?: number; - /** - * type - * @default ideal_size - * @constant - * @enum {string} - */ - type: "ideal_size"; - }; - /** - * IdealSizeOutput - * @description Base class for invocations that output an image - */ - IdealSizeOutput: { - /** - * Width - * @description The ideal width of the image (in pixels) - */ - width: number; - /** - * Height - * @description The ideal height of the image (in pixels) - */ - height: number; - /** - * type - * @default ideal_size_output - * @constant - * @enum {string} - */ - type: "ideal_size_output"; - }; - /** - * Blur Image - * @description Blurs an image - */ - ImageBlurInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to blur - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * Radius - * @description The blur radius - * @default 8 - */ - radius?: number; - /** - * Blur Type - * @description The type of blur - * @default gaussian - * @enum {string} - */ - blur_type?: "gaussian" | "box"; - /** - * type - * @default img_blur - * @constant - * @enum {string} - */ - type: "img_blur"; - }; - /** - * ImageCategory - * @description The category of an image. - * - * - GENERAL: The image is an output, init image, or otherwise an image without a specialized purpose. - * - MASK: The image is a mask image. - * - CONTROL: The image is a ControlNet control image. - * - USER: The image is a user-provide image. - * - OTHER: The image is some other type of image with a specialized purpose. To be used by external nodes. - * @enum {string} - */ - ImageCategory: "general" | "mask" | "control" | "user" | "other"; - /** - * Extract Image Channel - * @description Gets a channel from an image. - */ - ImageChannelInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to get the channel from - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * Channel - * @description The channel to get - * @default A - * @enum {string} - */ - channel?: "A" | "R" | "G" | "B"; - /** - * type - * @default img_chan - * @constant - * @enum {string} - */ - type: "img_chan"; - }; - /** - * Multiply Image Channel - * @description Scale a specific color channel of an image. - */ - ImageChannelMultiplyInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to adjust - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * Channel - * @description Which channel to adjust - * @default null - * @enum {string} - */ - channel?: "Red (RGBA)" | "Green (RGBA)" | "Blue (RGBA)" | "Alpha (RGBA)" | "Cyan (CMYK)" | "Magenta (CMYK)" | "Yellow (CMYK)" | "Black (CMYK)" | "Hue (HSV)" | "Saturation (HSV)" | "Value (HSV)" | "Luminosity (LAB)" | "A (LAB)" | "B (LAB)" | "Y (YCbCr)" | "Cb (YCbCr)" | "Cr (YCbCr)"; - /** - * Scale - * @description The amount to scale the channel by. - * @default 1 - */ - scale?: number; - /** - * Invert Channel - * @description Invert the channel after scaling - * @default false - */ - invert_channel?: boolean; - /** - * type - * @default img_channel_multiply - * @constant - * @enum {string} - */ - type: "img_channel_multiply"; - }; - /** - * Offset Image Channel - * @description Add or subtract a value from a specific color channel of an image. - */ - ImageChannelOffsetInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to adjust - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * Channel - * @description Which channel to adjust - * @default null - * @enum {string} - */ - channel?: "Red (RGBA)" | "Green (RGBA)" | "Blue (RGBA)" | "Alpha (RGBA)" | "Cyan (CMYK)" | "Magenta (CMYK)" | "Yellow (CMYK)" | "Black (CMYK)" | "Hue (HSV)" | "Saturation (HSV)" | "Value (HSV)" | "Luminosity (LAB)" | "A (LAB)" | "B (LAB)" | "Y (YCbCr)" | "Cb (YCbCr)" | "Cr (YCbCr)"; - /** - * Offset - * @description The amount to adjust the channel by - * @default 0 - */ - offset?: number; - /** - * type - * @default img_channel_offset - * @constant - * @enum {string} - */ - type: "img_channel_offset"; - }; - /** - * Image Collection Primitive - * @description A collection of image primitive values - */ - ImageCollectionInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * Collection - * @description The collection of image values - * @default null - */ - collection?: components["schemas"]["ImageField"][]; - /** - * type - * @default image_collection - * @constant - * @enum {string} - */ - type: "image_collection"; - }; - /** - * ImageCollectionOutput - * @description Base class for nodes that output a collection of images - */ - ImageCollectionOutput: { - /** - * Collection - * @description The output images - */ - collection: components["schemas"]["ImageField"][]; - /** - * type - * @default image_collection_output - * @constant - * @enum {string} - */ - type: "image_collection_output"; - }; - /** - * Convert Image Mode - * @description Converts an image to a different mode. - */ - ImageConvertInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to convert - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * Mode - * @description The mode to convert to - * @default L - * @enum {string} - */ - mode?: "L" | "RGB" | "RGBA" | "CMYK" | "YCbCr" | "LAB" | "HSV" | "I" | "F"; - /** - * type - * @default img_conv - * @constant - * @enum {string} - */ - type: "img_conv"; - }; - /** - * Crop Image - * @description Crops an image to a specified box. The box can be outside of the image. - */ - ImageCropInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to crop - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * X - * @description The left x coordinate of the crop rectangle - * @default 0 - */ - x?: number; - /** - * Y - * @description The top y coordinate of the crop rectangle - * @default 0 - */ - y?: number; - /** - * Width - * @description The width of the crop rectangle - * @default 512 - */ - width?: number; - /** - * Height - * @description The height of the crop rectangle - * @default 512 - */ - height?: number; - /** - * type - * @default img_crop - * @constant - * @enum {string} - */ - type: "img_crop"; - }; - /** - * ImageDTO - * @description Deserialized image record, enriched for the frontend. - */ - ImageDTO: { - /** - * Image Name - * @description The unique name of the image. - */ - image_name: string; - /** - * Image Url - * @description The URL of the image. - */ - image_url: string; - /** - * Thumbnail Url - * @description The URL of the image's thumbnail. - */ - thumbnail_url: string; - /** @description The type of the image. */ - image_origin: components["schemas"]["ResourceOrigin"]; - /** @description The category of the image. */ - image_category: components["schemas"]["ImageCategory"]; - /** - * Width - * @description The width of the image in px. - */ - width: number; - /** - * Height - * @description The height of the image in px. - */ - height: number; - /** - * Created At - * @description The created timestamp of the image. - */ - created_at: string; - /** - * Updated At - * @description The updated timestamp of the image. - */ - updated_at: string; - /** - * Deleted At - * @description The deleted timestamp of the image. - */ - deleted_at?: string | null; - /** - * Is Intermediate - * @description Whether this is an intermediate image. - */ - is_intermediate: boolean; - /** - * Session Id - * @description The session ID that generated this image, if it is a generated image. - */ - session_id?: string | null; - /** - * Node Id - * @description The node ID that generated this image, if it is a generated image. - */ - node_id?: string | null; - /** - * Starred - * @description Whether this image is starred. - */ - starred: boolean; - /** - * Has Workflow - * @description Whether this image has a workflow. - */ - has_workflow: boolean; - /** - * Board Id - * @description The id of the board the image belongs to, if one exists. - */ - board_id?: string | null; - }; - /** - * ImageField - * @description An image primitive field - */ - ImageField: { - /** - * Image Name - * @description The name of the image - */ - image_name: string; - }; - /** - * Adjust Image Hue - * @description Adjusts the Hue of an image. - */ - ImageHueAdjustmentInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to adjust - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * Hue - * @description The degrees by which to rotate the hue, 0-360 - * @default 0 - */ - hue?: number; - /** - * type - * @default img_hue_adjust - * @constant - * @enum {string} - */ - type: "img_hue_adjust"; - }; - /** - * Inverse Lerp Image - * @description Inverse linear interpolation of all pixels of an image - */ - ImageInverseLerpInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to lerp - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * Min - * @description The minimum input value - * @default 0 - */ - min?: number; - /** - * Max - * @description The maximum input value - * @default 255 - */ - max?: number; - /** - * type - * @default img_ilerp - * @constant - * @enum {string} - */ - type: "img_ilerp"; - }; - /** - * Image Primitive - * @description An image primitive value - */ - ImageInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to load - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * type - * @default image - * @constant - * @enum {string} - */ - type: "image"; - }; - /** - * Lerp Image - * @description Linear interpolation of all pixels of an image - */ - ImageLerpInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to lerp - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * Min - * @description The minimum output value - * @default 0 - */ - min?: number; - /** - * Max - * @description The maximum output value - * @default 255 - */ - max?: number; - /** - * type - * @default img_lerp - * @constant - * @enum {string} - */ - type: "img_lerp"; - }; - /** - * Image Mask to Tensor - * @description Convert a mask image to a tensor. Converts the image to grayscale and uses thresholding at the specified value. - */ - ImageMaskToTensorInvocation: { - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The mask image to convert. - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * Cutoff - * @description Cutoff (<) - * @default 128 - */ - cutoff?: number; - /** - * Invert - * @description Whether to invert the mask. - * @default false - */ - invert?: boolean; - /** - * type - * @default image_mask_to_tensor - * @constant - * @enum {string} - */ - type: "image_mask_to_tensor"; - }; - /** - * Multiply Images - * @description Multiplies two images together using `PIL.ImageChops.multiply()`. - */ - ImageMultiplyInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The first image to multiply - * @default null - */ - image1?: components["schemas"]["ImageField"]; - /** - * @description The second image to multiply - * @default null - */ - image2?: components["schemas"]["ImageField"]; - /** - * type - * @default img_mul - * @constant - * @enum {string} - */ - type: "img_mul"; - }; - /** - * Blur NSFW Image - * @description Add blur to NSFW-flagged images - */ - ImageNSFWBlurInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to check - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * type - * @default img_nsfw - * @constant - * @enum {string} - */ - type: "img_nsfw"; - }; - /** - * ImageOutput - * @description Base class for nodes that output a single image - */ - ImageOutput: { - /** @description The output image */ - image: components["schemas"]["ImageField"]; - /** - * Width - * @description The width of the image in pixels - */ - width: number; - /** - * Height - * @description The height of the image in pixels - */ - height: number; - /** - * type - * @default image_output - * @constant - * @enum {string} - */ - type: "image_output"; - }; - /** - * Paste Image - * @description Pastes an image into another image. - */ - ImagePasteInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The base image - * @default null - */ - base_image?: components["schemas"]["ImageField"]; - /** - * @description The image to paste - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * @description The mask to use when pasting - * @default null - */ - mask?: components["schemas"]["ImageField"] | null; - /** - * X - * @description The left x coordinate at which to paste the image - * @default 0 - */ - x?: number; - /** - * Y - * @description The top y coordinate at which to paste the image - * @default 0 - */ - y?: number; - /** - * Crop - * @description Crop to base image dimensions - * @default false - */ - crop?: boolean; - /** - * type - * @default img_paste - * @constant - * @enum {string} - */ - type: "img_paste"; - }; - /** - * ImageRecordChanges - * @description A set of changes to apply to an image record. - * - * Only limited changes are valid: - * - `image_category`: change the category of an image - * - `session_id`: change the session associated with an image - * - `is_intermediate`: change the image's `is_intermediate` flag - * - `starred`: change whether the image is starred - */ - ImageRecordChanges: { - /** @description The image's new category. */ - image_category?: components["schemas"]["ImageCategory"] | null; - /** - * Session Id - * @description The image's new session ID. - */ - session_id?: string | null; - /** - * Is Intermediate - * @description The image's new `is_intermediate` flag. - */ - is_intermediate?: boolean | null; - /** - * Starred - * @description The image's new `starred` state - */ - starred?: boolean | null; - [key: string]: unknown; - }; - /** - * Resize Image - * @description Resizes an image to specific dimensions - */ - ImageResizeInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to resize - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * Width - * @description The width to resize to (px) - * @default 512 - */ - width?: number; - /** - * Height - * @description The height to resize to (px) - * @default 512 - */ - height?: number; - /** - * Resample Mode - * @description The resampling mode - * @default bicubic - * @enum {string} - */ - resample_mode?: "nearest" | "box" | "bilinear" | "hamming" | "bicubic" | "lanczos"; - /** - * type - * @default img_resize - * @constant - * @enum {string} - */ - type: "img_resize"; - }; - /** - * Scale Image - * @description Scales an image by a factor - */ - ImageScaleInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to scale - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * Scale Factor - * @description The factor by which to scale the image - * @default 2 - */ - scale_factor?: number; - /** - * Resample Mode - * @description The resampling mode - * @default bicubic - * @enum {string} - */ - resample_mode?: "nearest" | "box" | "bilinear" | "hamming" | "bicubic" | "lanczos"; - /** - * type - * @default img_scale - * @constant - * @enum {string} - */ - type: "img_scale"; - }; - /** - * Image to Latents - * @description Encodes an image into latents. - */ - ImageToLatentsInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to encode - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * @description VAE - * @default null - */ - vae?: components["schemas"]["VAEField"]; - /** - * Tiled - * @description Processing using overlapping tiles (reduce memory consumption) - * @default false - */ - tiled?: boolean; - /** - * Tile Size - * @description The tile size for VAE tiling in pixels (image space). If set to 0, the default tile size for the model will be used. Larger tile sizes generally produce better results at the cost of higher memory usage. - * @default 0 - */ - tile_size?: number; - /** - * Fp32 - * @description Whether or not to use full float32 precision - * @default false - */ - fp32?: boolean; - /** - * type - * @default i2l - * @constant - * @enum {string} - */ - type: "i2l"; - }; - /** - * ImageUrlsDTO - * @description The URLs for an image and its thumbnail. - */ - ImageUrlsDTO: { - /** - * Image Name - * @description The unique name of the image. - */ - image_name: string; - /** - * Image Url - * @description The URL of the image. - */ - image_url: string; - /** - * Thumbnail Url - * @description The URL of the image's thumbnail. - */ - thumbnail_url: string; - }; - /** - * Add Invisible Watermark - * @description Add an invisible watermark to an image - */ - ImageWatermarkInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to check - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * Text - * @description Watermark text - * @default InvokeAI - */ - text?: string; - /** - * type - * @default img_watermark - * @constant - * @enum {string} - */ - type: "img_watermark"; - }; - /** ImagesDownloaded */ - ImagesDownloaded: { - /** - * Response - * @description The message to display to the user when images begin downloading - */ - response?: string | null; - /** - * Bulk Download Item Name - * @description The name of the bulk download item for which events will be emitted - */ - bulk_download_item_name?: string | null; - }; - /** ImagesUpdatedFromListResult */ - ImagesUpdatedFromListResult: { - /** - * Updated Image Names - * @description The image names that were updated - */ - updated_image_names: string[]; - }; - /** - * Solid Color Infill - * @description Infills transparent areas of an image with a solid color - */ - InfillColorInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to process - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * @description The color to use to infill - * @default { - * "r": 127, - * "g": 127, - * "b": 127, - * "a": 255 - * } - */ - color?: components["schemas"]["ColorField"]; - /** - * type - * @default infill_rgba - * @constant - * @enum {string} - */ - type: "infill_rgba"; - }; - /** - * PatchMatch Infill - * @description Infills transparent areas of an image using the PatchMatch algorithm - */ - InfillPatchMatchInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to process - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * Downscale - * @description Run patchmatch on downscaled image to speedup infill - * @default 2 - */ - downscale?: number; - /** - * Resample Mode - * @description The resampling mode - * @default bicubic - * @enum {string} - */ - resample_mode?: "nearest" | "box" | "bilinear" | "hamming" | "bicubic" | "lanczos"; - /** - * type - * @default infill_patchmatch - * @constant - * @enum {string} - */ - type: "infill_patchmatch"; - }; - /** - * Tile Infill - * @description Infills transparent areas of an image with tiles of the image - */ - InfillTileInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to process - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * Tile Size - * @description The tile size (px) - * @default 32 - */ - tile_size?: number; - /** - * Seed - * @description The seed to use for tile generation (omit for random) - * @default 0 - */ - seed?: number; - /** - * type - * @default infill_tile - * @constant - * @enum {string} - */ - type: "infill_tile"; - }; - /** - * Input - * @description The type of input a field accepts. - * - `Input.Direct`: The field must have its value provided directly, when the invocation and field are instantiated. - * - `Input.Connection`: The field must have its value provided by a connection. - * - `Input.Any`: The field may have its value provided either directly or by a connection. - * @enum {string} - */ - Input: "connection" | "direct" | "any"; - /** - * InputFieldJSONSchemaExtra - * @description Extra attributes to be added to input fields and their OpenAPI schema. Used during graph execution, - * and by the workflow editor during schema parsing and UI rendering. - */ - InputFieldJSONSchemaExtra: { - input: components["schemas"]["Input"]; - /** Orig Required */ - orig_required: boolean; - field_kind: components["schemas"]["FieldKind"]; - /** - * Default - * @default null - */ - default: unknown; - /** - * Orig Default - * @default null - */ - orig_default: unknown; - /** - * Ui Hidden - * @default false - */ - ui_hidden: boolean; - /** @default null */ - ui_type: components["schemas"]["UIType"] | null; - /** @default null */ - ui_component: components["schemas"]["UIComponent"] | null; - /** - * Ui Order - * @default null - */ - ui_order: number | null; - /** - * Ui Choice Labels - * @default null - */ - ui_choice_labels: { - [key: string]: string; - } | null; - }; - /** - * InstallStatus - * @description State of an install job running in the background. - * @enum {string} - */ - InstallStatus: "waiting" | "downloading" | "downloads_done" | "running" | "completed" | "error" | "cancelled"; - /** - * Integer Collection Primitive - * @description A collection of integer primitive values - */ - IntegerCollectionInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * Collection - * @description The collection of integer values - * @default [] - */ - collection?: number[]; - /** - * type - * @default integer_collection - * @constant - * @enum {string} - */ - type: "integer_collection"; - }; - /** - * IntegerCollectionOutput - * @description Base class for nodes that output a collection of integers - */ - IntegerCollectionOutput: { - /** - * Collection - * @description The int collection - */ - collection: number[]; - /** - * type - * @default integer_collection_output - * @constant - * @enum {string} - */ - type: "integer_collection_output"; - }; - /** - * Integer Primitive - * @description An integer primitive value - */ - IntegerInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * Value - * @description The integer value - * @default 0 - */ - value?: number; - /** - * type - * @default integer - * @constant - * @enum {string} - */ - type: "integer"; - }; - /** - * Integer Math - * @description Performs integer math. - */ - IntegerMathInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * Operation - * @description The operation to perform - * @default ADD - * @enum {string} - */ - operation?: "ADD" | "SUB" | "MUL" | "DIV" | "EXP" | "MOD" | "ABS" | "MIN" | "MAX"; - /** - * A - * @description The first number - * @default 1 - */ - a?: number; - /** - * B - * @description The second number - * @default 1 - */ - b?: number; - /** - * type - * @default integer_math - * @constant - * @enum {string} - */ - type: "integer_math"; - }; - /** - * IntegerOutput - * @description Base class for nodes that output a single integer - */ - IntegerOutput: { - /** - * Value - * @description The output integer - */ - value: number; - /** - * type - * @default integer_output - * @constant - * @enum {string} - */ - type: "integer_output"; - }; - /** - * Invert Tensor Mask - * @description Inverts a tensor mask. - */ - InvertTensorMaskInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The tensor mask to convert. - * @default null - */ - mask?: components["schemas"]["TensorField"]; - /** - * type - * @default invert_tensor_mask - * @constant - * @enum {string} - */ - type: "invert_tensor_mask"; - }; - /** InvocationCacheStatus */ - InvocationCacheStatus: { - /** - * Size - * @description The current size of the invocation cache - */ - size: number; - /** - * Hits - * @description The number of cache hits - */ - hits: number; - /** - * Misses - * @description The number of cache misses - */ - misses: number; - /** - * Enabled - * @description Whether the invocation cache is enabled - */ - enabled: boolean; - /** - * Max Size - * @description The maximum size of the invocation cache - */ - max_size: number; - }; - /** - * InvocationCompleteEvent - * @description Event model for invocation_complete - */ - InvocationCompleteEvent: { - /** - * Timestamp - * @description The timestamp of the event - */ - timestamp: number; - /** - * Queue Id - * @description The ID of the queue - */ - queue_id: string; - /** - * Item Id - * @description The ID of the queue item - */ - item_id: number; - /** - * Batch Id - * @description The ID of the queue batch - */ - batch_id: string; - /** - * Session Id - * @description The ID of the session (aka graph execution state) - */ - session_id: string; - /** - * Invocation - * @description The ID of the invocation - */ - invocation: components["schemas"]["AddInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeImageProcessorInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DepthAnythingImageProcessorInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxTextToImageInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"]; - /** - * Invocation Source Id - * @description The ID of the prepared invocation's source node - */ - invocation_source_id: string; - /** - * Result - * @description The result of the invocation - */ - result: components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["BoundingBoxCollectionOutput"] | components["schemas"]["BoundingBoxOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["CLIPSkipInvocationOutput"] | components["schemas"]["CalculateImageTilesOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["FluxModelLoaderOutput"] | components["schemas"]["GradientMaskOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["IdealSizeOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["LoRALoaderOutput"] | components["schemas"]["LoRASelectorOutput"] | components["schemas"]["MaskOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["ModelIdentifierOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["PairTileImageOutput"] | components["schemas"]["SDXLLoRALoaderOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["String2Output"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["TileToPropertiesOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["VAEOutput"]; - }; - /** - * InvocationDenoiseProgressEvent - * @description Event model for invocation_denoise_progress - */ - InvocationDenoiseProgressEvent: { - /** - * Timestamp - * @description The timestamp of the event - */ - timestamp: number; - /** - * Queue Id - * @description The ID of the queue - */ - queue_id: string; - /** - * Item Id - * @description The ID of the queue item - */ - item_id: number; - /** - * Batch Id - * @description The ID of the queue batch - */ - batch_id: string; - /** - * Session Id - * @description The ID of the session (aka graph execution state) - */ - session_id: string; - /** - * Invocation - * @description The ID of the invocation - */ - invocation: components["schemas"]["AddInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeImageProcessorInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DepthAnythingImageProcessorInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxTextToImageInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"]; - /** - * Invocation Source Id - * @description The ID of the prepared invocation's source node - */ - invocation_source_id: string; - /** @description The progress image sent at each step during processing */ - progress_image: components["schemas"]["ProgressImage"]; - /** - * Step - * @description The current step of the invocation - */ - step: number; - /** - * Total Steps - * @description The total number of steps in the invocation - */ - total_steps: number; - /** - * Order - * @description The order of the invocation in the session - */ - order: number; - /** - * Percentage - * @description The percentage of completion of the invocation - */ - percentage: number; - }; - /** - * InvocationErrorEvent - * @description Event model for invocation_error - */ - InvocationErrorEvent: { - /** - * Timestamp - * @description The timestamp of the event - */ - timestamp: number; - /** - * Queue Id - * @description The ID of the queue - */ - queue_id: string; - /** - * Item Id - * @description The ID of the queue item - */ - item_id: number; - /** - * Batch Id - * @description The ID of the queue batch - */ - batch_id: string; - /** - * Session Id - * @description The ID of the session (aka graph execution state) - */ - session_id: string; - /** - * Invocation - * @description The ID of the invocation - */ - invocation: components["schemas"]["AddInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeImageProcessorInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DepthAnythingImageProcessorInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxTextToImageInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"]; - /** - * Invocation Source Id - * @description The ID of the prepared invocation's source node - */ - invocation_source_id: string; - /** - * Error Type - * @description The error type - */ - error_type: string; - /** - * Error Message - * @description The error message - */ - error_message: string; - /** - * Error Traceback - * @description The error traceback - */ - error_traceback: string; - /** - * User Id - * @description The ID of the user who created the invocation - * @default null - */ - user_id: string | null; - /** - * Project Id - * @description The ID of the user who created the invocation - * @default null - */ - project_id: string | null; - }; - InvocationOutputMap: { - img_conv: components["schemas"]["ImageOutput"]; - mlsd_image_processor: components["schemas"]["ImageOutput"]; - canny_image_processor: components["schemas"]["ImageOutput"]; - alpha_mask_to_tensor: components["schemas"]["MaskOutput"]; - img_watermark: components["schemas"]["ImageOutput"]; - img_mul: components["schemas"]["ImageOutput"]; - float_to_int: components["schemas"]["IntegerOutput"]; - normalbae_image_processor: components["schemas"]["ImageOutput"]; - img_channel_multiply: components["schemas"]["ImageOutput"]; - string_split: components["schemas"]["String2Output"]; - grounding_dino: components["schemas"]["BoundingBoxCollectionOutput"]; - scheduler: components["schemas"]["SchedulerOutput"]; - float_range: components["schemas"]["FloatCollectionOutput"]; - metadata_item: components["schemas"]["MetadataItemOutput"]; - rand_float: components["schemas"]["FloatOutput"]; - infill_lama: components["schemas"]["ImageOutput"]; - img_resize: components["schemas"]["ImageOutput"]; - string_replace: components["schemas"]["StringOutput"]; - compel: components["schemas"]["ConditioningOutput"]; - segment_anything: components["schemas"]["MaskOutput"]; - flux_text_encoder: components["schemas"]["ConditioningOutput"]; - cv_inpaint: components["schemas"]["ImageOutput"]; - image_collection: components["schemas"]["ImageCollectionOutput"]; - pidi_image_processor: components["schemas"]["ImageOutput"]; - infill_tile: components["schemas"]["ImageOutput"]; - unsharp_mask: components["schemas"]["ImageOutput"]; - string_join: components["schemas"]["StringOutput"]; - blank_image: components["schemas"]["ImageOutput"]; - save_image: components["schemas"]["ImageOutput"]; - show_image: components["schemas"]["ImageOutput"]; - string_split_neg: components["schemas"]["StringPosNegOutput"]; - face_mask_detection: components["schemas"]["FaceMaskOutput"]; - conditioning: components["schemas"]["ConditioningOutput"]; - metadata: components["schemas"]["MetadataOutput"]; - collect: components["schemas"]["CollectInvocationOutput"]; - pair_tile_image: components["schemas"]["PairTileImageOutput"]; - spandrel_image_to_image: components["schemas"]["ImageOutput"]; - lora_selector: components["schemas"]["LoRASelectorOutput"]; - face_identifier: components["schemas"]["ImageOutput"]; - depth_anything_image_processor: components["schemas"]["ImageOutput"]; - boolean_collection: components["schemas"]["BooleanCollectionOutput"]; - tensor_mask_to_image: components["schemas"]["ImageOutput"]; - esrgan: components["schemas"]["ImageOutput"]; - img_nsfw: components["schemas"]["ImageOutput"]; - calculate_image_tiles: components["schemas"]["CalculateImageTilesOutput"]; - img_scale: components["schemas"]["ImageOutput"]; - lora_collection_loader: components["schemas"]["LoRALoaderOutput"]; - img_pad_crop: components["schemas"]["ImageOutput"]; - create_gradient_mask: components["schemas"]["GradientMaskOutput"]; - leres_image_processor: components["schemas"]["ImageOutput"]; - img_blur: components["schemas"]["ImageOutput"]; - main_model_loader: components["schemas"]["ModelLoaderOutput"]; - dynamic_prompt: components["schemas"]["StringCollectionOutput"]; - lblend: components["schemas"]["LatentsOutput"]; - rectangle_mask: components["schemas"]["MaskOutput"]; - add: components["schemas"]["IntegerOutput"]; - img_lerp: components["schemas"]["ImageOutput"]; - sdxl_refiner_model_loader: components["schemas"]["SDXLRefinerModelLoaderOutput"]; - flux_text_to_image: components["schemas"]["ImageOutput"]; - lresize: components["schemas"]["LatentsOutput"]; - dw_openpose_image_processor: components["schemas"]["ImageOutput"]; - noise: components["schemas"]["NoiseOutput"]; - canvas_paste_back: components["schemas"]["ImageOutput"]; - range: components["schemas"]["IntegerCollectionOutput"]; - color: components["schemas"]["ColorOutput"]; - sub: components["schemas"]["IntegerOutput"]; - ip_adapter: components["schemas"]["IPAdapterOutput"]; - crop_latents: components["schemas"]["LatentsOutput"]; - latents: components["schemas"]["LatentsOutput"]; - color_map_image_processor: components["schemas"]["ImageOutput"]; - float: components["schemas"]["FloatOutput"]; - infill_cv2: components["schemas"]["ImageOutput"]; - lscale: components["schemas"]["LatentsOutput"]; - denoise_latents: components["schemas"]["LatentsOutput"]; - string_collection: components["schemas"]["StringCollectionOutput"]; - zoe_depth_image_processor: components["schemas"]["ImageOutput"]; - mul: components["schemas"]["IntegerOutput"]; - clip_skip: components["schemas"]["CLIPSkipInvocationOutput"]; - sdxl_lora_loader: components["schemas"]["SDXLLoRALoaderOutput"]; - sdxl_model_loader: components["schemas"]["SDXLModelLoaderOutput"]; - step_param_easing: components["schemas"]["FloatCollectionOutput"]; - tile_image_processor: components["schemas"]["ImageOutput"]; - merge_metadata: components["schemas"]["MetadataOutput"]; - midas_depth_image_processor: components["schemas"]["ImageOutput"]; - invert_tensor_mask: components["schemas"]["MaskOutput"]; - tile_to_properties: components["schemas"]["TileToPropertiesOutput"]; - sdxl_refiner_compel_prompt: components["schemas"]["ConditioningOutput"]; - freeu: components["schemas"]["UNetOutput"]; - bounding_box: components["schemas"]["BoundingBoxOutput"]; - infill_rgba: components["schemas"]["ImageOutput"]; - image_mask_to_tensor: components["schemas"]["MaskOutput"]; - seamless: components["schemas"]["SeamlessModeOutput"]; - model_identifier: components["schemas"]["ModelIdentifierOutput"]; - conditioning_collection: components["schemas"]["ConditioningCollectionOutput"]; - heuristic_resize: components["schemas"]["ImageOutput"]; - content_shuffle_image_processor: components["schemas"]["ImageOutput"]; - boolean: components["schemas"]["BooleanOutput"]; - string_join_three: components["schemas"]["StringOutput"]; - t2i_adapter: components["schemas"]["T2IAdapterOutput"]; - mask_from_id: components["schemas"]["ImageOutput"]; - tiled_multi_diffusion_denoise_latents: components["schemas"]["LatentsOutput"]; - prompt_from_file: components["schemas"]["StringCollectionOutput"]; - range_of_size: components["schemas"]["IntegerCollectionOutput"]; - float_math: components["schemas"]["FloatOutput"]; - controlnet: components["schemas"]["ControlOutput"]; - mask_combine: components["schemas"]["ImageOutput"]; - img_ilerp: components["schemas"]["ImageOutput"]; - img_paste: components["schemas"]["ImageOutput"]; - latents_collection: components["schemas"]["LatentsCollectionOutput"]; - round_float: components["schemas"]["FloatOutput"]; - lineart_anime_image_processor: components["schemas"]["ImageOutput"]; - mask_edge: components["schemas"]["ImageOutput"]; - img_chan: components["schemas"]["ImageOutput"]; - vae_loader: components["schemas"]["VAEOutput"]; - l2i: components["schemas"]["ImageOutput"]; - img_hue_adjust: components["schemas"]["ImageOutput"]; - img_crop: components["schemas"]["ImageOutput"]; - image: components["schemas"]["ImageOutput"]; - core_metadata: components["schemas"]["MetadataOutput"]; - float_collection: components["schemas"]["FloatCollectionOutput"]; - infill_patchmatch: components["schemas"]["ImageOutput"]; - iterate: components["schemas"]["IterateInvocationOutput"]; - color_correct: components["schemas"]["ImageOutput"]; - calculate_image_tiles_even_split: components["schemas"]["CalculateImageTilesOutput"]; - hed_image_processor: components["schemas"]["ImageOutput"]; - sdxl_compel_prompt: components["schemas"]["ConditioningOutput"]; - sdxl_lora_collection_loader: components["schemas"]["SDXLLoRALoaderOutput"]; - face_off: components["schemas"]["FaceOffOutput"]; - random_range: components["schemas"]["IntegerCollectionOutput"]; - string: components["schemas"]["StringOutput"]; - rand_int: components["schemas"]["IntegerOutput"]; - merge_tiles_to_image: components["schemas"]["ImageOutput"]; - calculate_image_tiles_min_overlap: components["schemas"]["CalculateImageTilesOutput"]; - lora_loader: components["schemas"]["LoRALoaderOutput"]; - segment_anything_processor: components["schemas"]["ImageOutput"]; - integer_math: components["schemas"]["IntegerOutput"]; - tomask: components["schemas"]["ImageOutput"]; - spandrel_image_to_image_autoscale: components["schemas"]["ImageOutput"]; - flux_model_loader: components["schemas"]["FluxModelLoaderOutput"]; - i2l: components["schemas"]["LatentsOutput"]; - mediapipe_face_processor: components["schemas"]["ImageOutput"]; - integer: components["schemas"]["IntegerOutput"]; - lineart_image_processor: components["schemas"]["ImageOutput"]; - div: components["schemas"]["IntegerOutput"]; - img_channel_offset: components["schemas"]["ImageOutput"]; - ideal_size: components["schemas"]["IdealSizeOutput"]; - create_denoise_mask: components["schemas"]["DenoiseMaskOutput"]; - integer_collection: components["schemas"]["IntegerCollectionOutput"]; - }; - /** - * InvocationStartedEvent - * @description Event model for invocation_started - */ - InvocationStartedEvent: { - /** - * Timestamp - * @description The timestamp of the event - */ - timestamp: number; - /** - * Queue Id - * @description The ID of the queue - */ - queue_id: string; - /** - * Item Id - * @description The ID of the queue item - */ - item_id: number; - /** - * Batch Id - * @description The ID of the queue batch - */ - batch_id: string; - /** - * Session Id - * @description The ID of the session (aka graph execution state) - */ - session_id: string; - /** - * Invocation - * @description The ID of the invocation - */ - invocation: components["schemas"]["AddInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeImageProcessorInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DepthAnythingImageProcessorInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxTextToImageInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"]; - /** - * Invocation Source Id - * @description The ID of the prepared invocation's source node - */ - invocation_source_id: string; - }; - /** - * IterateInvocation - * @description Iterates over a list of items - */ - IterateInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * Collection - * @description The list of items to iterate over - * @default [] - */ - collection?: unknown[]; - /** - * Index - * @description The index, will be provided on executed iterators - * @default 0 - */ - index?: number; - /** - * type - * @default iterate - * @constant - * @enum {string} - */ - type: "iterate"; - }; - /** - * IterateInvocationOutput - * @description Used to connect iteration outputs. Will be expanded to a specific output. - */ - IterateInvocationOutput: { - /** - * Collection Item - * @description The item being iterated over - */ - item: unknown; - /** - * Index - * @description The index of the item - */ - index: number; - /** - * Total - * @description The total number of items - */ - total: number; - /** - * type - * @default iterate_output - * @constant - * @enum {string} - */ - type: "iterate_output"; - }; - JsonValue: unknown; - /** - * LaMa Infill - * @description Infills transparent areas of an image using the LaMa model - */ - LaMaInfillInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to process - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * type - * @default infill_lama - * @constant - * @enum {string} - */ - type: "infill_lama"; - }; - /** - * Latents Collection Primitive - * @description A collection of latents tensor primitive values - */ - LatentsCollectionInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * Collection - * @description The collection of latents tensors - * @default null - */ - collection?: components["schemas"]["LatentsField"][]; - /** - * type - * @default latents_collection - * @constant - * @enum {string} - */ - type: "latents_collection"; - }; - /** - * LatentsCollectionOutput - * @description Base class for nodes that output a collection of latents tensors - */ - LatentsCollectionOutput: { - /** - * Collection - * @description Latents tensor - */ - collection: components["schemas"]["LatentsField"][]; - /** - * type - * @default latents_collection_output - * @constant - * @enum {string} - */ - type: "latents_collection_output"; - }; - /** - * LatentsField - * @description A latents tensor primitive field - */ - LatentsField: { - /** - * Latents Name - * @description The name of the latents - */ - latents_name: string; - /** - * Seed - * @description Seed used to generate this latents - * @default null - */ - seed?: number | null; - }; - /** - * Latents Primitive - * @description A latents tensor primitive value - */ - LatentsInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The latents tensor - * @default null - */ - latents?: components["schemas"]["LatentsField"]; - /** - * type - * @default latents - * @constant - * @enum {string} - */ - type: "latents"; - }; - /** - * LatentsOutput - * @description Base class for nodes that output a single latents tensor - */ - LatentsOutput: { - /** @description Latents tensor */ - latents: components["schemas"]["LatentsField"]; - /** - * Width - * @description Width of output (px) - */ - width: number; - /** - * Height - * @description Height of output (px) - */ - height: number; - /** - * type - * @default latents_output - * @constant - * @enum {string} - */ - type: "latents_output"; - }; - /** - * Latents to Image - * @description Generates an image from latents. - */ - LatentsToImageInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description Latents tensor - * @default null - */ - latents?: components["schemas"]["LatentsField"]; - /** - * @description VAE - * @default null - */ - vae?: components["schemas"]["VAEField"]; - /** - * Tiled - * @description Processing using overlapping tiles (reduce memory consumption) - * @default false - */ - tiled?: boolean; - /** - * Tile Size - * @description The tile size for VAE tiling in pixels (image space). If set to 0, the default tile size for the model will be used. Larger tile sizes generally produce better results at the cost of higher memory usage. - * @default 0 - */ - tile_size?: number; - /** - * Fp32 - * @description Whether or not to use full float32 precision - * @default false - */ - fp32?: boolean; - /** - * type - * @default l2i - * @constant - * @enum {string} - */ - type: "l2i"; - }; - /** - * Leres (Depth) Processor - * @description Applies leres processing to image - */ - LeresImageProcessorInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to process - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * Thr A - * @description Leres parameter `thr_a` - * @default 0 - */ - thr_a?: number; - /** - * Thr B - * @description Leres parameter `thr_b` - * @default 0 - */ - thr_b?: number; - /** - * Boost - * @description Whether to use boost mode - * @default false - */ - boost?: boolean; - /** - * Detect Resolution - * @description Pixel resolution for detection - * @default 512 - */ - detect_resolution?: number; - /** - * Image Resolution - * @description Pixel resolution for output image - * @default 512 - */ - image_resolution?: number; - /** - * type - * @default leres_image_processor - * @constant - * @enum {string} - */ - type: "leres_image_processor"; - }; - /** - * Lineart Anime Processor - * @description Applies line art anime processing to image - */ - LineartAnimeImageProcessorInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to process - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * Detect Resolution - * @description Pixel resolution for detection - * @default 512 - */ - detect_resolution?: number; - /** - * Image Resolution - * @description Pixel resolution for output image - * @default 512 - */ - image_resolution?: number; - /** - * type - * @default lineart_anime_image_processor - * @constant - * @enum {string} - */ - type: "lineart_anime_image_processor"; - }; - /** - * Lineart Processor - * @description Applies line art processing to image - */ - LineartImageProcessorInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to process - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * Detect Resolution - * @description Pixel resolution for detection - * @default 512 - */ - detect_resolution?: number; - /** - * Image Resolution - * @description Pixel resolution for output image - * @default 512 - */ - image_resolution?: number; - /** - * Coarse - * @description Whether to use coarse mode - * @default false - */ - coarse?: boolean; - /** - * type - * @default lineart_image_processor - * @constant - * @enum {string} - */ - type: "lineart_image_processor"; - }; - /** - * LoRA Collection Loader - * @description Applies a collection of LoRAs to the provided UNet and CLIP models. - */ - LoRACollectionLoader: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * LoRAs - * @description LoRA models and weights. May be a single LoRA or collection. - * @default null - */ - loras?: components["schemas"]["LoRAField"] | components["schemas"]["LoRAField"][]; - /** - * UNet - * @description UNet (scheduler, LoRAs) - * @default null - */ - unet?: components["schemas"]["UNetField"] | null; - /** - * CLIP - * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count - * @default null - */ - clip?: components["schemas"]["CLIPField"] | null; - /** - * type - * @default lora_collection_loader - * @constant - * @enum {string} - */ - type: "lora_collection_loader"; - }; - /** - * LoRADiffusersConfig - * @description Model config for LoRA/Diffusers models. - */ - LoRADiffusersConfig: { - /** - * Key - * @description A unique key for this model. - */ - key: string; - /** - * Hash - * @description The hash of the model file(s). - */ - hash: string; - /** - * Path - * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. - */ - path: string; - /** - * Name - * @description Name of the model. - */ - name: string; - /** @description The base model. */ - base: components["schemas"]["BaseModelType"]; - /** - * Description - * @description Model description - */ - description?: string | null; - /** - * Source - * @description The original source of the model (path, URL or repo_id). - */ - source: string; - /** @description The type of source */ - source_type: components["schemas"]["ModelSourceType"]; - /** - * Source Api Response - * @description The original API response from the source, as stringified JSON. - */ - source_api_response?: string | null; - /** - * Cover Image - * @description Url for image to preview model - */ - cover_image?: string | null; - /** - * Type - * @default lora - * @constant - * @enum {string} - */ - type: "lora"; - /** - * Trigger Phrases - * @description Set of trigger phrases for this model - */ - trigger_phrases?: string[] | null; - /** - * Format - * @default diffusers - * @constant - * @enum {string} - */ - format: "diffusers"; - }; - /** LoRAField */ - LoRAField: { - /** @description Info to load lora model */ - lora: components["schemas"]["ModelIdentifierField"]; - /** - * Weight - * @description Weight to apply to lora model - */ - weight: number; - }; - /** - * LoRA - * @description Apply selected lora to unet and text_encoder. - */ - LoRALoaderInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * LoRA - * @description LoRA model to load - * @default null - */ - lora?: components["schemas"]["ModelIdentifierField"]; - /** - * Weight - * @description The weight at which the LoRA is applied to each model - * @default 0.75 - */ - weight?: number; - /** - * UNet - * @description UNet (scheduler, LoRAs) - * @default null - */ - unet?: components["schemas"]["UNetField"] | null; - /** - * CLIP - * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count - * @default null - */ - clip?: components["schemas"]["CLIPField"] | null; - /** - * type - * @default lora_loader - * @constant - * @enum {string} - */ - type: "lora_loader"; - }; - /** - * LoRALoaderOutput - * @description Model loader output - */ - LoRALoaderOutput: { - /** - * UNet - * @description UNet (scheduler, LoRAs) - * @default null - */ - unet: components["schemas"]["UNetField"] | null; - /** - * CLIP - * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count - * @default null - */ - clip: components["schemas"]["CLIPField"] | null; - /** - * type - * @default lora_loader_output - * @constant - * @enum {string} - */ - type: "lora_loader_output"; - }; - /** - * LoRALyCORISConfig - * @description Model config for LoRA/Lycoris models. - */ - LoRALyCORISConfig: { - /** - * Key - * @description A unique key for this model. - */ - key: string; - /** - * Hash - * @description The hash of the model file(s). - */ - hash: string; - /** - * Path - * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. - */ - path: string; - /** - * Name - * @description Name of the model. - */ - name: string; - /** @description The base model. */ - base: components["schemas"]["BaseModelType"]; - /** - * Description - * @description Model description - */ - description?: string | null; - /** - * Source - * @description The original source of the model (path, URL or repo_id). - */ - source: string; - /** @description The type of source */ - source_type: components["schemas"]["ModelSourceType"]; - /** - * Source Api Response - * @description The original API response from the source, as stringified JSON. - */ - source_api_response?: string | null; - /** - * Cover Image - * @description Url for image to preview model - */ - cover_image?: string | null; - /** - * Type - * @default lora - * @constant - * @enum {string} - */ - type: "lora"; - /** - * Trigger Phrases - * @description Set of trigger phrases for this model - */ - trigger_phrases?: string[] | null; - /** - * Format - * @default lycoris - * @constant - * @enum {string} - */ - format: "lycoris"; - }; - /** - * LoRAMetadataField - * @description LoRA Metadata Field - */ - LoRAMetadataField: { - /** @description LoRA model to load */ - model: components["schemas"]["ModelIdentifierField"]; - /** - * Weight - * @description The weight at which the LoRA is applied to each model - */ - weight: number; - }; - /** - * LoRA Selector - * @description Selects a LoRA model and weight. - */ - LoRASelectorInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * LoRA - * @description LoRA model to load - * @default null - */ - lora?: components["schemas"]["ModelIdentifierField"]; - /** - * Weight - * @description The weight at which the LoRA is applied to each model - * @default 0.75 - */ - weight?: number; - /** - * type - * @default lora_selector - * @constant - * @enum {string} - */ - type: "lora_selector"; - }; - /** - * LoRASelectorOutput - * @description Model loader output - */ - LoRASelectorOutput: { - /** - * LoRA - * @description LoRA model and weight - */ - lora: components["schemas"]["LoRAField"]; - /** - * type - * @default lora_selector_output - * @constant - * @enum {string} - */ - type: "lora_selector_output"; - }; - /** - * LocalModelSource - * @description A local file or directory path. - */ - LocalModelSource: { - /** Path */ - path: string; - /** - * Inplace - * @default false - */ - inplace?: boolean | null; - /** - * Type - * @default local - * @constant - * @enum {string} - */ - type?: "local"; - }; - /** - * LogLevel - * @enum {integer} - */ - LogLevel: 0 | 10 | 20 | 30 | 40 | 50; - /** - * MainBnbQuantized4bCheckpointConfig - * @description Model config for main checkpoint models. - */ - MainBnbQuantized4bCheckpointConfig: { - /** - * Key - * @description A unique key for this model. - */ - key: string; - /** - * Hash - * @description The hash of the model file(s). - */ - hash: string; - /** - * Path - * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. - */ - path: string; - /** - * Name - * @description Name of the model. - */ - name: string; - /** @description The base model. */ - base: components["schemas"]["BaseModelType"]; - /** - * Description - * @description Model description - */ - description?: string | null; - /** - * Source - * @description The original source of the model (path, URL or repo_id). - */ - source: string; - /** @description The type of source */ - source_type: components["schemas"]["ModelSourceType"]; - /** - * Source Api Response - * @description The original API response from the source, as stringified JSON. - */ - source_api_response?: string | null; - /** - * Cover Image - * @description Url for image to preview model - */ - cover_image?: string | null; - /** - * Type - * @default main - * @constant - * @enum {string} - */ - type: "main"; - /** - * Trigger Phrases - * @description Set of trigger phrases for this model - */ - trigger_phrases?: string[] | null; - /** @description Default settings for this model */ - default_settings?: components["schemas"]["MainModelDefaultSettings"] | null; - /** @default normal */ - variant?: components["schemas"]["ModelVariantType"]; - /** - * Format - * @description Format of the provided checkpoint model - * @default checkpoint - * @enum {string} - */ - format: "checkpoint" | "bnb_quantized_nf4b"; - /** - * Config Path - * @description path to the checkpoint model config file - */ - config_path: string; - /** - * Converted At - * @description When this model was last converted to diffusers - */ - converted_at?: number | null; - /** @default epsilon */ - prediction_type?: components["schemas"]["SchedulerPredictionType"]; - /** - * Upcast Attention - * @default false - */ - upcast_attention?: boolean; - }; - /** - * MainCheckpointConfig - * @description Model config for main checkpoint models. - */ - MainCheckpointConfig: { - /** - * Key - * @description A unique key for this model. - */ - key: string; - /** - * Hash - * @description The hash of the model file(s). - */ - hash: string; - /** - * Path - * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. - */ - path: string; - /** - * Name - * @description Name of the model. - */ - name: string; - /** @description The base model. */ - base: components["schemas"]["BaseModelType"]; - /** - * Description - * @description Model description - */ - description?: string | null; - /** - * Source - * @description The original source of the model (path, URL or repo_id). - */ - source: string; - /** @description The type of source */ - source_type: components["schemas"]["ModelSourceType"]; - /** - * Source Api Response - * @description The original API response from the source, as stringified JSON. - */ - source_api_response?: string | null; - /** - * Cover Image - * @description Url for image to preview model - */ - cover_image?: string | null; - /** - * Type - * @default main - * @constant - * @enum {string} - */ - type: "main"; - /** - * Trigger Phrases - * @description Set of trigger phrases for this model - */ - trigger_phrases?: string[] | null; - /** @description Default settings for this model */ - default_settings?: components["schemas"]["MainModelDefaultSettings"] | null; - /** @default normal */ - variant?: components["schemas"]["ModelVariantType"]; - /** - * Format - * @description Format of the provided checkpoint model - * @default checkpoint - * @enum {string} - */ - format: "checkpoint" | "bnb_quantized_nf4b"; - /** - * Config Path - * @description path to the checkpoint model config file - */ - config_path: string; - /** - * Converted At - * @description When this model was last converted to diffusers - */ - converted_at?: number | null; - /** @default epsilon */ - prediction_type?: components["schemas"]["SchedulerPredictionType"]; - /** - * Upcast Attention - * @default false - */ - upcast_attention?: boolean; - }; - /** - * MainDiffusersConfig - * @description Model config for main diffusers models. - */ - MainDiffusersConfig: { - /** - * Key - * @description A unique key for this model. - */ - key: string; - /** - * Hash - * @description The hash of the model file(s). - */ - hash: string; - /** - * Path - * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. - */ - path: string; - /** - * Name - * @description Name of the model. - */ - name: string; - /** @description The base model. */ - base: components["schemas"]["BaseModelType"]; - /** - * Description - * @description Model description - */ - description?: string | null; - /** - * Source - * @description The original source of the model (path, URL or repo_id). - */ - source: string; - /** @description The type of source */ - source_type: components["schemas"]["ModelSourceType"]; - /** - * Source Api Response - * @description The original API response from the source, as stringified JSON. - */ - source_api_response?: string | null; - /** - * Cover Image - * @description Url for image to preview model - */ - cover_image?: string | null; - /** - * Type - * @default main - * @constant - * @enum {string} - */ - type: "main"; - /** - * Trigger Phrases - * @description Set of trigger phrases for this model - */ - trigger_phrases?: string[] | null; - /** @description Default settings for this model */ - default_settings?: components["schemas"]["MainModelDefaultSettings"] | null; - /** @default normal */ - variant?: components["schemas"]["ModelVariantType"]; - /** - * Format - * @default diffusers - * @constant - * @enum {string} - */ - format: "diffusers"; - /** @default */ - repo_variant?: components["schemas"]["ModelRepoVariant"] | null; - }; - /** MainModelDefaultSettings */ - MainModelDefaultSettings: { - /** - * Vae - * @description Default VAE for this model (model key) - */ - vae?: string | null; - /** - * Vae Precision - * @description Default VAE precision for this model - */ - vae_precision?: ("fp16" | "fp32") | null; - /** - * Scheduler - * @description Default scheduler for this model - */ - scheduler?: ("ddim" | "ddpm" | "deis" | "deis_k" | "lms" | "lms_k" | "pndm" | "heun" | "heun_k" | "euler" | "euler_k" | "euler_a" | "kdpm_2" | "kdpm_2_k" | "kdpm_2_a" | "kdpm_2_a_k" | "dpmpp_2s" | "dpmpp_2s_k" | "dpmpp_2m" | "dpmpp_2m_k" | "dpmpp_2m_sde" | "dpmpp_2m_sde_k" | "dpmpp_3m" | "dpmpp_3m_k" | "dpmpp_sde" | "dpmpp_sde_k" | "unipc" | "unipc_k" | "lcm" | "tcd") | null; - /** - * Steps - * @description Default number of steps for this model - */ - steps?: number | null; - /** - * Cfg Scale - * @description Default CFG Scale for this model - */ - cfg_scale?: number | null; - /** - * Cfg Rescale Multiplier - * @description Default CFG Rescale Multiplier for this model - */ - cfg_rescale_multiplier?: number | null; - /** - * Width - * @description Default width for this model - */ - width?: number | null; - /** - * Height - * @description Default height for this model - */ - height?: number | null; - }; - /** - * Main Model - * @description Loads a main model, outputting its submodels. - */ - MainModelLoaderInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description Main model (UNet, VAE, CLIP) to load - * @default null - */ - model?: components["schemas"]["ModelIdentifierField"]; - /** - * type - * @default main_model_loader - * @constant - * @enum {string} - */ - type: "main_model_loader"; - }; - /** - * Combine Masks - * @description Combine two masks together by multiplying them using `PIL.ImageChops.multiply()`. - */ - MaskCombineInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The first mask to combine - * @default null - */ - mask1?: components["schemas"]["ImageField"]; - /** - * @description The second image to combine - * @default null - */ - mask2?: components["schemas"]["ImageField"]; - /** - * type - * @default mask_combine - * @constant - * @enum {string} - */ - type: "mask_combine"; - }; - /** - * Mask Edge - * @description Applies an edge mask to an image - */ - MaskEdgeInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to apply the mask to - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * Edge Size - * @description The size of the edge - * @default null - */ - edge_size?: number; - /** - * Edge Blur - * @description The amount of blur on the edge - * @default null - */ - edge_blur?: number; - /** - * Low Threshold - * @description First threshold for the hysteresis procedure in Canny edge detection - * @default null - */ - low_threshold?: number; - /** - * High Threshold - * @description Second threshold for the hysteresis procedure in Canny edge detection - * @default null - */ - high_threshold?: number; - /** - * type - * @default mask_edge - * @constant - * @enum {string} - */ - type: "mask_edge"; - }; - /** - * Mask from Alpha - * @description Extracts the alpha channel of an image as a mask. - */ - MaskFromAlphaInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to create the mask from - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * Invert - * @description Whether or not to invert the mask - * @default false - */ - invert?: boolean; - /** - * type - * @default tomask - * @constant - * @enum {string} - */ - type: "tomask"; - }; - /** - * Mask from ID - * @description Generate a mask for a particular color in an ID Map - */ - MaskFromIDInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to create the mask from - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * @description ID color to mask - * @default null - */ - color?: components["schemas"]["ColorField"]; - /** - * Threshold - * @description Threshold for color detection - * @default 100 - */ - threshold?: number; - /** - * Invert - * @description Whether or not to invert the mask - * @default false - */ - invert?: boolean; - /** - * type - * @default mask_from_id - * @constant - * @enum {string} - */ - type: "mask_from_id"; - }; - /** - * MaskOutput - * @description A torch mask tensor. - */ - MaskOutput: { - /** @description The mask. */ - mask: components["schemas"]["TensorField"]; - /** - * Width - * @description The width of the mask in pixels. - */ - width: number; - /** - * Height - * @description The height of the mask in pixels. - */ - height: number; - /** - * type - * @default mask_output - * @constant - * @enum {string} - */ - type: "mask_output"; - }; - /** - * Tensor Mask to Image - * @description Convert a mask tensor to an image. - */ - MaskTensorToImageInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The mask tensor to convert. - * @default null - */ - mask?: components["schemas"]["TensorField"]; - /** - * type - * @default tensor_mask_to_image - * @constant - * @enum {string} - */ - type: "tensor_mask_to_image"; - }; - /** - * Mediapipe Face Processor - * @description Applies mediapipe face processing to image - */ - MediapipeFaceProcessorInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to process - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * Max Faces - * @description Maximum number of faces to detect - * @default 1 - */ - max_faces?: number; - /** - * Min Confidence - * @description Minimum confidence for face detection - * @default 0.5 - */ - min_confidence?: number; - /** - * Detect Resolution - * @description Pixel resolution for detection - * @default 512 - */ - detect_resolution?: number; - /** - * Image Resolution - * @description Pixel resolution for output image - * @default 512 - */ - image_resolution?: number; - /** - * type - * @default mediapipe_face_processor - * @constant - * @enum {string} - */ - type: "mediapipe_face_processor"; - }; - /** - * Metadata Merge - * @description Merged a collection of MetadataDict into a single MetadataDict. - */ - MergeMetadataInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * Collection - * @description Collection of Metadata - * @default null - */ - collection?: components["schemas"]["MetadataField"][]; - /** - * type - * @default merge_metadata - * @constant - * @enum {string} - */ - type: "merge_metadata"; - }; - /** - * Merge Tiles to Image - * @description Merge multiple tile images into a single image. - */ - MergeTilesToImageInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * Tiles With Images - * @description A list of tile images with tile properties. - * @default null - */ - tiles_with_images?: components["schemas"]["TileWithImage"][]; - /** - * Blend Mode - * @description blending type Linear or Seam - * @default Seam - * @enum {string} - */ - blend_mode?: "Linear" | "Seam"; - /** - * Blend Amount - * @description The amount to blend adjacent tiles in pixels. Must be <= the amount of overlap between adjacent tiles. - * @default 32 - */ - blend_amount?: number; - /** - * type - * @default merge_tiles_to_image - * @constant - * @enum {string} - */ - type: "merge_tiles_to_image"; - }; - /** - * MetadataField - * @description Pydantic model for metadata with custom root of type dict[str, Any]. - * Metadata is stored without a strict schema. - */ - MetadataField: Record; - /** - * Metadata - * @description Takes a MetadataItem or collection of MetadataItems and outputs a MetadataDict. - */ - MetadataInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * Items - * @description A single metadata item or collection of metadata items - * @default null - */ - items?: components["schemas"]["MetadataItemField"][] | components["schemas"]["MetadataItemField"]; - /** - * type - * @default metadata - * @constant - * @enum {string} - */ - type: "metadata"; - }; - /** MetadataItemField */ - MetadataItemField: { - /** - * Label - * @description Label for this metadata item - */ - label: string; - /** - * Value - * @description The value for this metadata item (may be any type) - */ - value: unknown; - }; - /** - * Metadata Item - * @description Used to create an arbitrary metadata item. Provide "label" and make a connection to "value" to store that data as the value. - */ - MetadataItemInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * Label - * @description Label for this metadata item - * @default null - */ - label?: string; - /** - * Value - * @description The value for this metadata item (may be any type) - * @default null - */ - value?: unknown; - /** - * type - * @default metadata_item - * @constant - * @enum {string} - */ - type: "metadata_item"; - }; - /** - * MetadataItemOutput - * @description Metadata Item Output - */ - MetadataItemOutput: { - /** @description Metadata Item */ - item: components["schemas"]["MetadataItemField"]; - /** - * type - * @default metadata_item_output - * @constant - * @enum {string} - */ - type: "metadata_item_output"; - }; - /** MetadataOutput */ - MetadataOutput: { - /** @description Metadata Dict */ - metadata: components["schemas"]["MetadataField"]; - /** - * type - * @default metadata_output - * @constant - * @enum {string} - */ - type: "metadata_output"; - }; - /** - * Midas Depth Processor - * @description Applies Midas depth processing to image - */ - MidasDepthImageProcessorInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to process - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * A Mult - * @description Midas parameter `a_mult` (a = a_mult * PI) - * @default 2 - */ - a_mult?: number; - /** - * Bg Th - * @description Midas parameter `bg_th` - * @default 0.1 - */ - bg_th?: number; - /** - * Detect Resolution - * @description Pixel resolution for detection - * @default 512 - */ - detect_resolution?: number; - /** - * Image Resolution - * @description Pixel resolution for output image - * @default 512 - */ - image_resolution?: number; - /** - * type - * @default midas_depth_image_processor - * @constant - * @enum {string} - */ - type: "midas_depth_image_processor"; - }; - /** - * MLSD Processor - * @description Applies MLSD processing to image - */ - MlsdImageProcessorInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to process - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * Detect Resolution - * @description Pixel resolution for detection - * @default 512 - */ - detect_resolution?: number; - /** - * Image Resolution - * @description Pixel resolution for output image - * @default 512 - */ - image_resolution?: number; - /** - * Thr V - * @description MLSD parameter `thr_v` - * @default 0.1 - */ - thr_v?: number; - /** - * Thr D - * @description MLSD parameter `thr_d` - * @default 0.1 - */ - thr_d?: number; - /** - * type - * @default mlsd_image_processor - * @constant - * @enum {string} - */ - type: "mlsd_image_processor"; - }; - /** - * ModelFormat - * @description Storage format of model. - * @enum {string} - */ - ModelFormat: "diffusers" | "checkpoint" | "lycoris" | "onnx" | "olive" | "embedding_file" | "embedding_folder" | "invokeai" | "t5_encoder" | "t5_encoder_8b" | "t5_encoder_4b" | "bnb_quantized_nf4b"; - /** ModelIdentifierField */ - ModelIdentifierField: { - /** - * Key - * @description The model's unique key - */ - key: string; - /** - * Hash - * @description The model's BLAKE3 hash - */ - hash: string; - /** - * Name - * @description The model's name - */ - name: string; - /** @description The model's base model type */ - base: components["schemas"]["BaseModelType"]; - /** @description The model's type */ - type: components["schemas"]["ModelType"]; - /** - * @description The submodel to load, if this is a main model - * @default null - */ - submodel_type?: components["schemas"]["SubModelType"] | null; - }; - /** - * Model identifier - * @description Selects any model, outputting it its identifier. Be careful with this one! The identifier will be accepted as - * input for any model, even if the model types don't match. If you connect this to a mismatched input, you'll get an - * error. - */ - ModelIdentifierInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * Model - * @description The model to select - * @default null - */ - model?: components["schemas"]["ModelIdentifierField"]; - /** - * type - * @default model_identifier - * @constant - * @enum {string} - */ - type: "model_identifier"; - }; - /** - * ModelIdentifierOutput - * @description Model identifier output - */ - ModelIdentifierOutput: { - /** - * Model - * @description Model identifier - */ - model: components["schemas"]["ModelIdentifierField"]; - /** - * type - * @default model_identifier_output - * @constant - * @enum {string} - */ - type: "model_identifier_output"; - }; - /** - * ModelInstallCancelledEvent - * @description Event model for model_install_cancelled - */ - ModelInstallCancelledEvent: { - /** - * Timestamp - * @description The timestamp of the event - */ - timestamp: number; - /** - * Id - * @description The ID of the install job - */ - id: number; - /** - * Source - * @description Source of the model; local path, repo_id or url - */ - source: string; - }; - /** - * ModelInstallCompleteEvent - * @description Event model for model_install_complete - */ - ModelInstallCompleteEvent: { - /** - * Timestamp - * @description The timestamp of the event - */ - timestamp: number; - /** - * Id - * @description The ID of the install job - */ - id: number; - /** - * Source - * @description Source of the model; local path, repo_id or url - */ - source: string; - /** - * Key - * @description Model config record key - */ - key: string; - /** - * Total Bytes - * @description Size of the model (may be None for installation of a local path) - */ - total_bytes: number | null; - }; - /** - * ModelInstallDownloadProgressEvent - * @description Event model for model_install_download_progress - */ - ModelInstallDownloadProgressEvent: { - /** - * Timestamp - * @description The timestamp of the event - */ - timestamp: number; - /** - * Id - * @description The ID of the install job - */ - id: number; - /** - * Source - * @description Source of the model; local path, repo_id or url - */ - source: string; - /** - * Local Path - * @description Where model is downloading to - */ - local_path: string; - /** - * Bytes - * @description Number of bytes downloaded so far - */ - bytes: number; - /** - * Total Bytes - * @description Total size of download, including all files - */ - total_bytes: number; - /** - * Parts - * @description Progress of downloading URLs that comprise the model, if any - */ - parts: ({ - [key: string]: number | string; - })[]; - }; - /** - * ModelInstallDownloadStartedEvent - * @description Event model for model_install_download_started - */ - ModelInstallDownloadStartedEvent: { - /** - * Timestamp - * @description The timestamp of the event - */ - timestamp: number; - /** - * Id - * @description The ID of the install job - */ - id: number; - /** - * Source - * @description Source of the model; local path, repo_id or url - */ - source: string; - /** - * Local Path - * @description Where model is downloading to - */ - local_path: string; - /** - * Bytes - * @description Number of bytes downloaded so far - */ - bytes: number; - /** - * Total Bytes - * @description Total size of download, including all files - */ - total_bytes: number; - /** - * Parts - * @description Progress of downloading URLs that comprise the model, if any - */ - parts: ({ - [key: string]: number | string; - })[]; - }; - /** - * ModelInstallDownloadsCompleteEvent - * @description Emitted once when an install job becomes active. - */ - ModelInstallDownloadsCompleteEvent: { - /** - * Timestamp - * @description The timestamp of the event - */ - timestamp: number; - /** - * Id - * @description The ID of the install job - */ - id: number; - /** - * Source - * @description Source of the model; local path, repo_id or url - */ - source: string; - }; - /** - * ModelInstallErrorEvent - * @description Event model for model_install_error - */ - ModelInstallErrorEvent: { - /** - * Timestamp - * @description The timestamp of the event - */ - timestamp: number; - /** - * Id - * @description The ID of the install job - */ - id: number; - /** - * Source - * @description Source of the model; local path, repo_id or url - */ - source: string; - /** - * Error Type - * @description The name of the exception - */ - error_type: string; - /** - * Error - * @description A text description of the exception - */ - error: string; - }; - /** - * ModelInstallJob - * @description Object that tracks the current status of an install request. - */ - ModelInstallJob: { - /** - * Id - * @description Unique ID for this job - */ - id: number; - /** - * @description Current status of install process - * @default waiting - */ - status?: components["schemas"]["InstallStatus"]; - /** - * Error Reason - * @description Information about why the job failed - */ - error_reason?: string | null; - /** @description Configuration information (e.g. 'description') to apply to model. */ - config_in?: components["schemas"]["ModelRecordChanges"]; - /** - * Config Out - * @description After successful installation, this will hold the configuration object. - */ - config_out?: (components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["T5Encoder8bConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]) | null; - /** - * Inplace - * @description Leave model in its current location; otherwise install under models directory - * @default false - */ - inplace?: boolean; - /** - * Source - * @description Source (URL, repo_id, or local path) of model - */ - source: components["schemas"]["LocalModelSource"] | components["schemas"]["HFModelSource"] | components["schemas"]["URLModelSource"]; - /** - * Local Path - * Format: path - * @description Path to locally-downloaded model; may be the same as the source - */ - local_path: string; - /** - * Bytes - * @description For a remote model, the number of bytes downloaded so far (may not be available) - * @default 0 - */ - bytes?: number; - /** - * Total Bytes - * @description Total size of the model to be installed - * @default 0 - */ - total_bytes?: number; - /** - * Source Metadata - * @description Metadata provided by the model source - */ - source_metadata?: (components["schemas"]["BaseMetadata"] | components["schemas"]["HuggingFaceMetadata"]) | null; - /** - * Download Parts - * @description Download jobs contributing to this install - */ - download_parts?: components["schemas"]["DownloadJob"][]; - /** - * Error - * @description On an error condition, this field will contain the text of the exception - */ - error?: string | null; - /** - * Error Traceback - * @description On an error condition, this field will contain the exception traceback - */ - error_traceback?: string | null; - }; - /** - * ModelInstallStartedEvent - * @description Event model for model_install_started - */ - ModelInstallStartedEvent: { - /** - * Timestamp - * @description The timestamp of the event - */ - timestamp: number; - /** - * Id - * @description The ID of the install job - */ - id: number; - /** - * Source - * @description Source of the model; local path, repo_id or url - */ - source: string; - }; - /** - * ModelLoadCompleteEvent - * @description Event model for model_load_complete - */ - ModelLoadCompleteEvent: { - /** - * Timestamp - * @description The timestamp of the event - */ - timestamp: number; - /** - * Config - * @description The model's config - */ - config: components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["T5Encoder8bConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; - /** - * @description The submodel type, if any - * @default null - */ - submodel_type: components["schemas"]["SubModelType"] | null; - }; - /** - * ModelLoadStartedEvent - * @description Event model for model_load_started - */ - ModelLoadStartedEvent: { - /** - * Timestamp - * @description The timestamp of the event - */ - timestamp: number; - /** - * Config - * @description The model's config - */ - config: components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["T5Encoder8bConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; - /** - * @description The submodel type, if any - * @default null - */ - submodel_type: components["schemas"]["SubModelType"] | null; - }; - /** - * ModelLoaderOutput - * @description Model loader output - */ - ModelLoaderOutput: { - /** - * VAE - * @description VAE - */ - vae: components["schemas"]["VAEField"]; - /** - * type - * @default model_loader_output - * @constant - * @enum {string} - */ - type: "model_loader_output"; - /** - * CLIP - * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count - */ - clip: components["schemas"]["CLIPField"]; - /** - * UNet - * @description UNet (scheduler, LoRAs) - */ - unet: components["schemas"]["UNetField"]; - }; - /** - * ModelRecordChanges - * @description A set of changes to apply to a model. - */ - ModelRecordChanges: { - /** - * Source - * @description original source of the model - */ - source?: string | null; - /** @description type of model source */ - source_type?: components["schemas"]["ModelSourceType"] | null; - /** - * Source Api Response - * @description metadata from remote source - */ - source_api_response?: string | null; - /** - * Name - * @description Name of the model. - */ - name?: string | null; - /** - * Path - * @description Path to the model. - */ - path?: string | null; - /** - * Description - * @description Model description - */ - description?: string | null; - /** @description The base model. */ - base?: components["schemas"]["BaseModelType"] | null; - /** @description Type of model */ - type?: components["schemas"]["ModelType"] | null; - /** - * Key - * @description Database ID for this model - */ - key?: string | null; - /** - * Hash - * @description hash of model file - */ - hash?: string | null; - /** - * Format - * @description format of model file - */ - format?: string | null; - /** - * Trigger Phrases - * @description Set of trigger phrases for this model - */ - trigger_phrases?: string[] | null; - /** - * Default Settings - * @description Default settings for this model - */ - default_settings?: components["schemas"]["MainModelDefaultSettings"] | components["schemas"]["ControlAdapterDefaultSettings"] | null; - /** @description The variant of the model. */ - variant?: components["schemas"]["ModelVariantType"] | null; - /** @description The prediction type of the model. */ - prediction_type?: components["schemas"]["SchedulerPredictionType"] | null; - /** - * Upcast Attention - * @description Whether to upcast attention. - */ - upcast_attention?: boolean | null; - /** - * Config Path - * @description Path to config file for model - */ - config_path?: string | null; - }; - /** - * ModelRepoVariant - * @description Various hugging face variants on the diffusers format. - * @enum {string} - */ - ModelRepoVariant: "" | "fp16" | "fp32" | "onnx" | "openvino" | "flax"; - /** - * ModelSourceType - * @description Model source type. - * @enum {string} - */ - ModelSourceType: "path" | "url" | "hf_repo_id"; - /** - * ModelType - * @description Model type. - * @enum {string} - */ - ModelType: "onnx" | "main" | "vae" | "lora" | "controlnet" | "embedding" | "ip_adapter" | "clip_vision" | "clip_embed" | "t2i_adapter" | "t5_encoder" | "spandrel_image_to_image"; - /** - * ModelVariantType - * @description Variant type. - * @enum {string} - */ - ModelVariantType: "normal" | "inpaint" | "depth"; - /** - * ModelsList - * @description Return list of configs. - */ - ModelsList: { - /** Models */ - models: (components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["T5Encoder8bConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"])[]; - }; - /** - * Multiply Integers - * @description Multiplies two numbers - */ - MultiplyInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * A - * @description The first number - * @default 0 - */ - a?: number; - /** - * B - * @description The second number - * @default 0 - */ - b?: number; - /** - * type - * @default mul - * @constant - * @enum {string} - */ - type: "mul"; - }; - /** NodeFieldValue */ - NodeFieldValue: { - /** - * Node Path - * @description The node into which this batch data item will be substituted. - */ - node_path: string; - /** - * Field Name - * @description The field into which this batch data item will be substituted. - */ - field_name: string; - /** - * Value - * @description The value to substitute into the node/field. - */ - value: string | number; - }; - /** - * Noise - * @description Generates latent noise. - */ - NoiseInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * Seed - * @description Seed for random number generation - * @default 0 - */ - seed?: number; - /** - * Width - * @description Width of output (px) - * @default 512 - */ - width?: number; - /** - * Height - * @description Height of output (px) - * @default 512 - */ - height?: number; - /** - * Use Cpu - * @description Use CPU for noise generation (for reproducible results across platforms) - * @default true - */ - use_cpu?: boolean; - /** - * type - * @default noise - * @constant - * @enum {string} - */ - type: "noise"; - }; - /** - * NoiseOutput - * @description Invocation noise output - */ - NoiseOutput: { - /** @description Noise tensor */ - noise: components["schemas"]["LatentsField"]; - /** - * Width - * @description Width of output (px) - */ - width: number; - /** - * Height - * @description Height of output (px) - */ - height: number; - /** - * type - * @default noise_output - * @constant - * @enum {string} - */ - type: "noise_output"; - }; - /** - * Normal BAE Processor - * @description Applies NormalBae processing to image - */ - NormalbaeImageProcessorInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to process - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * Detect Resolution - * @description Pixel resolution for detection - * @default 512 - */ - detect_resolution?: number; - /** - * Image Resolution - * @description Pixel resolution for output image - * @default 512 - */ - image_resolution?: number; - /** - * type - * @default normalbae_image_processor - * @constant - * @enum {string} - */ - type: "normalbae_image_processor"; - }; - /** OffsetPaginatedResults[BoardDTO] */ - OffsetPaginatedResults_BoardDTO_: { - /** - * Limit - * @description Limit of items to get - */ - limit: number; - /** - * Offset - * @description Offset from which to retrieve items - */ - offset: number; - /** - * Total - * @description Total number of items in result - */ - total: number; - /** - * Items - * @description Items - */ - items: components["schemas"]["BoardDTO"][]; - }; - /** OffsetPaginatedResults[ImageDTO] */ - OffsetPaginatedResults_ImageDTO_: { - /** - * Limit - * @description Limit of items to get - */ - limit: number; - /** - * Offset - * @description Offset from which to retrieve items - */ - offset: number; - /** - * Total - * @description Total number of items in result - */ - total: number; - /** - * Items - * @description Items - */ - items: components["schemas"]["ImageDTO"][]; - }; - /** - * OutputFieldJSONSchemaExtra - * @description Extra attributes to be added to input fields and their OpenAPI schema. Used by the workflow editor - * during schema parsing and UI rendering. - */ - OutputFieldJSONSchemaExtra: { - field_kind: components["schemas"]["FieldKind"]; - /** Ui Hidden */ - ui_hidden: boolean; - ui_type: components["schemas"]["UIType"] | null; - /** Ui Order */ - ui_order: number | null; - }; - /** PaginatedResults[WorkflowRecordListItemDTO] */ - PaginatedResults_WorkflowRecordListItemDTO_: { - /** - * Page - * @description Current Page - */ - page: number; - /** - * Pages - * @description Total number of pages - */ - pages: number; - /** - * Per Page - * @description Number of items per page - */ - per_page: number; - /** - * Total - * @description Total number of items in result - */ - total: number; - /** - * Items - * @description Items - */ - items: components["schemas"]["WorkflowRecordListItemDTO"][]; - }; - /** - * Pair Tile with Image - * @description Pair an image with its tile properties. - */ - PairTileImageInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The tile image. - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * @description The tile properties. - * @default null - */ - tile?: components["schemas"]["Tile"]; - /** - * type - * @default pair_tile_image - * @constant - * @enum {string} - */ - type: "pair_tile_image"; - }; - /** PairTileImageOutput */ - PairTileImageOutput: { - /** @description A tile description with its corresponding image. */ - tile_with_image: components["schemas"]["TileWithImage"]; - /** - * type - * @default pair_tile_image_output - * @constant - * @enum {string} - */ - type: "pair_tile_image_output"; - }; - /** - * PIDI Processor - * @description Applies PIDI processing to image - */ - PidiImageProcessorInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to process - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * Detect Resolution - * @description Pixel resolution for detection - * @default 512 - */ - detect_resolution?: number; - /** - * Image Resolution - * @description Pixel resolution for output image - * @default 512 - */ - image_resolution?: number; - /** - * Safe - * @description Whether or not to use safe mode - * @default false - */ - safe?: boolean; - /** - * Scribble - * @description Whether or not to use scribble mode - * @default false - */ - scribble?: boolean; - /** - * type - * @default pidi_image_processor - * @constant - * @enum {string} - */ - type: "pidi_image_processor"; - }; - /** - * ProgressImage - * @description The progress image sent intermittently during processing - */ - ProgressImage: { - /** - * Width - * @description The effective width of the image in pixels - */ - width: number; - /** - * Height - * @description The effective height of the image in pixels - */ - height: number; - /** - * Dataurl - * @description The image data as a b64 data URL - */ - dataURL: string; - }; - /** - * Prompts from File - * @description Loads prompts from a text file - */ - PromptsFromFileInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * File Path - * @description Path to prompt text file - * @default null - */ - file_path?: string; - /** - * Pre Prompt - * @description String to prepend to each prompt - * @default null - */ - pre_prompt?: string | null; - /** - * Post Prompt - * @description String to append to each prompt - * @default null - */ - post_prompt?: string | null; - /** - * Start Line - * @description Line in the file to start start from - * @default 1 - */ - start_line?: number; - /** - * Max Prompts - * @description Max lines to read from file (0=all) - * @default 1 - */ - max_prompts?: number; - /** - * type - * @default prompt_from_file - * @constant - * @enum {string} - */ - type: "prompt_from_file"; - }; - /** - * PruneResult - * @description Result of pruning the session queue - */ - PruneResult: { - /** - * Deleted - * @description Number of queue items deleted - */ - deleted: number; - }; - /** - * QueueClearedEvent - * @description Event model for queue_cleared - */ - QueueClearedEvent: { - /** - * Timestamp - * @description The timestamp of the event - */ - timestamp: number; - /** - * Queue Id - * @description The ID of the queue - */ - queue_id: string; - }; - /** - * QueueItemStatusChangedEvent - * @description Event model for queue_item_status_changed - */ - QueueItemStatusChangedEvent: { - /** - * Timestamp - * @description The timestamp of the event - */ - timestamp: number; - /** - * Queue Id - * @description The ID of the queue - */ - queue_id: string; - /** - * Item Id - * @description The ID of the queue item - */ - item_id: number; - /** - * Batch Id - * @description The ID of the queue batch - */ - batch_id: string; - /** - * Status - * @description The new status of the queue item - * @enum {string} - */ - status: "pending" | "in_progress" | "completed" | "failed" | "canceled"; - /** - * Error Type - * @description The error type, if any - * @default null - */ - error_type: string | null; - /** - * Error Message - * @description The error message, if any - * @default null - */ - error_message: string | null; - /** - * Error Traceback - * @description The error traceback, if any - * @default null - */ - error_traceback: string | null; - /** - * Created At - * @description The timestamp when the queue item was created - * @default null - */ - created_at: string | null; - /** - * Updated At - * @description The timestamp when the queue item was last updated - * @default null - */ - updated_at: string | null; - /** - * Started At - * @description The timestamp when the queue item was started - * @default null - */ - started_at: string | null; - /** - * Completed At - * @description The timestamp when the queue item was completed - * @default null - */ - completed_at: string | null; - /** @description The status of the batch */ - batch_status: components["schemas"]["BatchStatus"]; - /** @description The status of the queue */ - queue_status: components["schemas"]["SessionQueueStatus"]; - /** - * Session Id - * @description The ID of the session (aka graph execution state) - */ - session_id: string; - }; - /** - * Random Float - * @description Outputs a single random float - */ - RandomFloatInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default false - */ - use_cache?: boolean; - /** - * Low - * @description The inclusive low value - * @default 0 - */ - low?: number; - /** - * High - * @description The exclusive high value - * @default 1 - */ - high?: number; - /** - * Decimals - * @description The number of decimal places to round to - * @default 2 - */ - decimals?: number; - /** - * type - * @default rand_float - * @constant - * @enum {string} - */ - type: "rand_float"; - }; - /** - * Random Integer - * @description Outputs a single random integer. - */ - RandomIntInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default false - */ - use_cache?: boolean; - /** - * Low - * @description The inclusive low value - * @default 0 - */ - low?: number; - /** - * High - * @description The exclusive high value - * @default 2147483647 - */ - high?: number; - /** - * type - * @default rand_int - * @constant - * @enum {string} - */ - type: "rand_int"; - }; - /** - * Random Range - * @description Creates a collection of random numbers - */ - RandomRangeInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default false - */ - use_cache?: boolean; - /** - * Low - * @description The inclusive low value - * @default 0 - */ - low?: number; - /** - * High - * @description The exclusive high value - * @default 2147483647 - */ - high?: number; - /** - * Size - * @description The number of values to generate - * @default 1 - */ - size?: number; - /** - * Seed - * @description The seed for the RNG (omit for random) - * @default 0 - */ - seed?: number; - /** - * type - * @default random_range - * @constant - * @enum {string} - */ - type: "random_range"; - }; - /** - * Integer Range - * @description Creates a range of numbers from start to stop with step - */ - RangeInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * Start - * @description The start of the range - * @default 0 - */ - start?: number; - /** - * Stop - * @description The stop of the range - * @default 10 - */ - stop?: number; - /** - * Step - * @description The step of the range - * @default 1 - */ - step?: number; - /** - * type - * @default range - * @constant - * @enum {string} - */ - type: "range"; - }; - /** - * Integer Range of Size - * @description Creates a range from start to start + (size * step) incremented by step - */ - RangeOfSizeInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * Start - * @description The start of the range - * @default 0 - */ - start?: number; - /** - * Size - * @description The number of values - * @default 1 - */ - size?: number; - /** - * Step - * @description The step of the range - * @default 1 - */ - step?: number; - /** - * type - * @default range_of_size - * @constant - * @enum {string} - */ - type: "range_of_size"; - }; - /** - * Create Rectangle Mask - * @description Create a rectangular mask. - */ - RectangleMaskInvocation: { - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * Width - * @description The width of the entire mask. - * @default null - */ - width?: number; - /** - * Height - * @description The height of the entire mask. - * @default null - */ - height?: number; - /** - * X Left - * @description The left x-coordinate of the rectangular masked region (inclusive). - * @default null - */ - x_left?: number; - /** - * Y Top - * @description The top y-coordinate of the rectangular masked region (inclusive). - * @default null - */ - y_top?: number; - /** - * Rectangle Width - * @description The width of the rectangular masked region. - * @default null - */ - rectangle_width?: number; - /** - * Rectangle Height - * @description The height of the rectangular masked region. - * @default null - */ - rectangle_height?: number; - /** - * type - * @default rectangle_mask - * @constant - * @enum {string} - */ - type: "rectangle_mask"; - }; - /** - * RemoteModelFile - * @description Information about a downloadable file that forms part of a model. - */ - RemoteModelFile: { - /** - * Url - * Format: uri - * @description The url to download this model file - */ - url: string; - /** - * Path - * Format: path - * @description The path to the file, relative to the model root - */ - path: string; - /** - * Size - * @description The size of this file, in bytes - * @default 0 - */ - size?: number | null; - /** - * Sha256 - * @description SHA256 hash of this model (not always available) - */ - sha256?: string | null; - }; - /** RemoveImagesFromBoardResult */ - RemoveImagesFromBoardResult: { - /** - * Removed Image Names - * @description The image names that were removed from their board - */ - removed_image_names: string[]; - }; - /** - * Resize Latents - * @description Resizes latents to explicit width/height (in pixels). Provided dimensions are floor-divided by 8. - */ - ResizeLatentsInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description Latents tensor - * @default null - */ - latents?: components["schemas"]["LatentsField"]; - /** - * Width - * @description Width of output (px) - * @default null - */ - width?: number; - /** - * Height - * @description Width of output (px) - * @default null - */ - height?: number; - /** - * Mode - * @description Interpolation mode - * @default bilinear - * @enum {string} - */ - mode?: "nearest" | "linear" | "bilinear" | "bicubic" | "trilinear" | "area" | "nearest-exact"; - /** - * Antialias - * @description Whether or not to apply antialiasing (bilinear or bicubic only) - * @default false - */ - antialias?: boolean; - /** - * type - * @default lresize - * @constant - * @enum {string} - */ - type: "lresize"; - }; - /** - * ResourceOrigin - * @description The origin of a resource (eg image). - * - * - INTERNAL: The resource was created by the application. - * - EXTERNAL: The resource was not created by the application. - * This may be a user-initiated upload, or an internal application upload (eg Canvas init image). - * @enum {string} - */ - ResourceOrigin: "internal" | "external"; - /** - * Round Float - * @description Rounds a float to a specified number of decimal places. - */ - RoundInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * Value - * @description The float value - * @default 0 - */ - value?: number; - /** - * Decimals - * @description The number of decimal places - * @default 0 - */ - decimals?: number; - /** - * type - * @default round_float - * @constant - * @enum {string} - */ - type: "round_float"; - }; - /** - * SDXL Prompt - * @description Parse prompt using compel package to conditioning. - */ - SDXLCompelPromptInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * Prompt - * @description Prompt to be parsed by Compel to create a conditioning tensor - * @default - */ - prompt?: string; - /** - * Style - * @description Prompt to be parsed by Compel to create a conditioning tensor - * @default - */ - style?: string; - /** - * Original Width - * @default 1024 - */ - original_width?: number; - /** - * Original Height - * @default 1024 - */ - original_height?: number; - /** - * Crop Top - * @default 0 - */ - crop_top?: number; - /** - * Crop Left - * @default 0 - */ - crop_left?: number; - /** - * Target Width - * @default 1024 - */ - target_width?: number; - /** - * Target Height - * @default 1024 - */ - target_height?: number; - /** - * CLIP 1 - * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count - * @default null - */ - clip?: components["schemas"]["CLIPField"]; - /** - * CLIP 2 - * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count - * @default null - */ - clip2?: components["schemas"]["CLIPField"]; - /** - * @description A mask defining the region that this conditioning prompt applies to. - * @default null - */ - mask?: components["schemas"]["TensorField"] | null; - /** - * type - * @default sdxl_compel_prompt - * @constant - * @enum {string} - */ - type: "sdxl_compel_prompt"; - }; - /** - * SDXL LoRA Collection Loader - * @description Applies a collection of SDXL LoRAs to the provided UNet and CLIP models. - */ - SDXLLoRACollectionLoader: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * LoRAs - * @description LoRA models and weights. May be a single LoRA or collection. - * @default null - */ - loras?: components["schemas"]["LoRAField"] | components["schemas"]["LoRAField"][]; - /** - * UNet - * @description UNet (scheduler, LoRAs) - * @default null - */ - unet?: components["schemas"]["UNetField"] | null; - /** - * CLIP - * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count - * @default null - */ - clip?: components["schemas"]["CLIPField"] | null; - /** - * CLIP 2 - * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count - * @default null - */ - clip2?: components["schemas"]["CLIPField"] | null; - /** - * type - * @default sdxl_lora_collection_loader - * @constant - * @enum {string} - */ - type: "sdxl_lora_collection_loader"; - }; - /** - * SDXL LoRA - * @description Apply selected lora to unet and text_encoder. - */ - SDXLLoRALoaderInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * LoRA - * @description LoRA model to load - * @default null - */ - lora?: components["schemas"]["ModelIdentifierField"]; - /** - * Weight - * @description The weight at which the LoRA is applied to each model - * @default 0.75 - */ - weight?: number; - /** - * UNet - * @description UNet (scheduler, LoRAs) - * @default null - */ - unet?: components["schemas"]["UNetField"] | null; - /** - * CLIP 1 - * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count - * @default null - */ - clip?: components["schemas"]["CLIPField"] | null; - /** - * CLIP 2 - * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count - * @default null - */ - clip2?: components["schemas"]["CLIPField"] | null; - /** - * type - * @default sdxl_lora_loader - * @constant - * @enum {string} - */ - type: "sdxl_lora_loader"; - }; - /** - * SDXLLoRALoaderOutput - * @description SDXL LoRA Loader Output - */ - SDXLLoRALoaderOutput: { - /** - * UNet - * @description UNet (scheduler, LoRAs) - * @default null - */ - unet: components["schemas"]["UNetField"] | null; - /** - * CLIP 1 - * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count - * @default null - */ - clip: components["schemas"]["CLIPField"] | null; - /** - * CLIP 2 - * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count - * @default null - */ - clip2: components["schemas"]["CLIPField"] | null; - /** - * type - * @default sdxl_lora_loader_output - * @constant - * @enum {string} - */ - type: "sdxl_lora_loader_output"; - }; - /** - * SDXL Main Model - * @description Loads an sdxl base model, outputting its submodels. - */ - SDXLModelLoaderInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description SDXL Main model (UNet, VAE, CLIP1, CLIP2) to load - * @default null - */ - model?: components["schemas"]["ModelIdentifierField"]; - /** - * type - * @default sdxl_model_loader - * @constant - * @enum {string} - */ - type: "sdxl_model_loader"; - }; - /** - * SDXLModelLoaderOutput - * @description SDXL base model loader output - */ - SDXLModelLoaderOutput: { - /** - * UNet - * @description UNet (scheduler, LoRAs) - */ - unet: components["schemas"]["UNetField"]; - /** - * CLIP 1 - * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count - */ - clip: components["schemas"]["CLIPField"]; - /** - * CLIP 2 - * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count - */ - clip2: components["schemas"]["CLIPField"]; - /** - * VAE - * @description VAE - */ - vae: components["schemas"]["VAEField"]; - /** - * type - * @default sdxl_model_loader_output - * @constant - * @enum {string} - */ - type: "sdxl_model_loader_output"; - }; - /** - * SDXL Refiner Prompt - * @description Parse prompt using compel package to conditioning. - */ - SDXLRefinerCompelPromptInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * Style - * @description Prompt to be parsed by Compel to create a conditioning tensor - * @default - */ - style?: string; - /** - * Original Width - * @default 1024 - */ - original_width?: number; - /** - * Original Height - * @default 1024 - */ - original_height?: number; - /** - * Crop Top - * @default 0 - */ - crop_top?: number; - /** - * Crop Left - * @default 0 - */ - crop_left?: number; - /** - * Aesthetic Score - * @description The aesthetic score to apply to the conditioning tensor - * @default 6 - */ - aesthetic_score?: number; - /** - * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count - * @default null - */ - clip2?: components["schemas"]["CLIPField"]; - /** - * type - * @default sdxl_refiner_compel_prompt - * @constant - * @enum {string} - */ - type: "sdxl_refiner_compel_prompt"; - }; - /** - * SDXL Refiner Model - * @description Loads an sdxl refiner model, outputting its submodels. - */ - SDXLRefinerModelLoaderInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description SDXL Refiner Main Modde (UNet, VAE, CLIP2) to load - * @default null - */ - model?: components["schemas"]["ModelIdentifierField"]; - /** - * type - * @default sdxl_refiner_model_loader - * @constant - * @enum {string} - */ - type: "sdxl_refiner_model_loader"; - }; - /** - * SDXLRefinerModelLoaderOutput - * @description SDXL refiner model loader output - */ - SDXLRefinerModelLoaderOutput: { - /** - * UNet - * @description UNet (scheduler, LoRAs) - */ - unet: components["schemas"]["UNetField"]; - /** - * CLIP 2 - * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count - */ - clip2: components["schemas"]["CLIPField"]; - /** - * VAE - * @description VAE - */ - vae: components["schemas"]["VAEField"]; - /** - * type - * @default sdxl_refiner_model_loader_output - * @constant - * @enum {string} - */ - type: "sdxl_refiner_model_loader_output"; - }; - /** - * SQLiteDirection - * @enum {string} - */ - SQLiteDirection: "ASC" | "DESC"; - /** - * Save Image - * @description Saves an image. Unlike an image primitive, this invocation stores a copy of the image. - */ - SaveImageInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default false - */ - use_cache?: boolean; - /** - * @description The image to process - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * type - * @default save_image - * @constant - * @enum {string} - */ - type: "save_image"; - }; - /** - * Scale Latents - * @description Scales latents by a given factor. - */ - ScaleLatentsInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description Latents tensor - * @default null - */ - latents?: components["schemas"]["LatentsField"]; - /** - * Scale Factor - * @description The factor by which to scale - * @default null - */ - scale_factor?: number; - /** - * Mode - * @description Interpolation mode - * @default bilinear - * @enum {string} - */ - mode?: "nearest" | "linear" | "bilinear" | "bicubic" | "trilinear" | "area" | "nearest-exact"; - /** - * Antialias - * @description Whether or not to apply antialiasing (bilinear or bicubic only) - * @default false - */ - antialias?: boolean; - /** - * type - * @default lscale - * @constant - * @enum {string} - */ - type: "lscale"; - }; - /** - * Scheduler - * @description Selects a scheduler. - */ - SchedulerInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * Scheduler - * @description Scheduler to use during inference - * @default euler - * @enum {string} - */ - scheduler?: "ddim" | "ddpm" | "deis" | "deis_k" | "lms" | "lms_k" | "pndm" | "heun" | "heun_k" | "euler" | "euler_k" | "euler_a" | "kdpm_2" | "kdpm_2_k" | "kdpm_2_a" | "kdpm_2_a_k" | "dpmpp_2s" | "dpmpp_2s_k" | "dpmpp_2m" | "dpmpp_2m_k" | "dpmpp_2m_sde" | "dpmpp_2m_sde_k" | "dpmpp_3m" | "dpmpp_3m_k" | "dpmpp_sde" | "dpmpp_sde_k" | "unipc" | "unipc_k" | "lcm" | "tcd"; - /** - * type - * @default scheduler - * @constant - * @enum {string} - */ - type: "scheduler"; - }; - /** SchedulerOutput */ - SchedulerOutput: { - /** - * Scheduler - * @description Scheduler to use during inference - * @enum {string} - */ - scheduler: "ddim" | "ddpm" | "deis" | "deis_k" | "lms" | "lms_k" | "pndm" | "heun" | "heun_k" | "euler" | "euler_k" | "euler_a" | "kdpm_2" | "kdpm_2_k" | "kdpm_2_a" | "kdpm_2_a_k" | "dpmpp_2s" | "dpmpp_2s_k" | "dpmpp_2m" | "dpmpp_2m_k" | "dpmpp_2m_sde" | "dpmpp_2m_sde_k" | "dpmpp_3m" | "dpmpp_3m_k" | "dpmpp_sde" | "dpmpp_sde_k" | "unipc" | "unipc_k" | "lcm" | "tcd"; - /** - * type - * @default scheduler_output - * @constant - * @enum {string} - */ - type: "scheduler_output"; - }; - /** - * SchedulerPredictionType - * @description Scheduler prediction type. - * @enum {string} - */ - SchedulerPredictionType: "epsilon" | "v_prediction" | "sample"; - /** - * Seamless - * @description Applies the seamless transformation to the Model UNet and VAE. - */ - SeamlessModeInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * UNet - * @description UNet (scheduler, LoRAs) - * @default null - */ - unet?: components["schemas"]["UNetField"] | null; - /** - * VAE - * @description VAE model to load - * @default null - */ - vae?: components["schemas"]["VAEField"] | null; - /** - * Seamless Y - * @description Specify whether Y axis is seamless - * @default true - */ - seamless_y?: boolean; - /** - * Seamless X - * @description Specify whether X axis is seamless - * @default true - */ - seamless_x?: boolean; - /** - * type - * @default seamless - * @constant - * @enum {string} - */ - type: "seamless"; - }; - /** - * SeamlessModeOutput - * @description Modified Seamless Model output - */ - SeamlessModeOutput: { - /** - * UNet - * @description UNet (scheduler, LoRAs) - * @default null - */ - unet: components["schemas"]["UNetField"] | null; - /** - * VAE - * @description VAE - * @default null - */ - vae: components["schemas"]["VAEField"] | null; - /** - * type - * @default seamless_output - * @constant - * @enum {string} - */ - type: "seamless_output"; - }; - /** - * Segment Anything - * @description Runs a Segment Anything Model. - */ - SegmentAnythingInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * Model - * @description The Segment Anything model to use. - * @default null - * @enum {string} - */ - model?: "segment-anything-base" | "segment-anything-large" | "segment-anything-huge"; - /** - * @description The image to segment. - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * Bounding Boxes - * @description The bounding boxes to prompt the SAM model with. - * @default null - */ - bounding_boxes?: components["schemas"]["BoundingBoxField"][]; - /** - * Apply Polygon Refinement - * @description Whether to apply polygon refinement to the masks. This will smooth the edges of the masks slightly and ensure that each mask consists of a single closed polygon (before merging). - * @default true - */ - apply_polygon_refinement?: boolean; - /** - * Mask Filter - * @description The filtering to apply to the detected masks before merging them into a final output. - * @default all - * @enum {string} - */ - mask_filter?: "all" | "largest" | "highest_box_score"; - /** - * type - * @default segment_anything - * @constant - * @enum {string} - */ - type: "segment_anything"; - }; - /** - * Segment Anything Processor - * @description Applies segment anything processing to image - */ - SegmentAnythingProcessorInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to process - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * Detect Resolution - * @description Pixel resolution for detection - * @default 512 - */ - detect_resolution?: number; - /** - * Image Resolution - * @description Pixel resolution for output image - * @default 512 - */ - image_resolution?: number; - /** - * type - * @default segment_anything_processor - * @constant - * @enum {string} - */ - type: "segment_anything_processor"; - }; - /** SessionProcessorStatus */ - SessionProcessorStatus: { - /** - * Is Started - * @description Whether the session processor is started - */ - is_started: boolean; - /** - * Is Processing - * @description Whether a session is being processed - */ - is_processing: boolean; - }; - /** - * SessionQueueAndProcessorStatus - * @description The overall status of session queue and processor - */ - SessionQueueAndProcessorStatus: { - queue: components["schemas"]["SessionQueueStatus"]; - processor: components["schemas"]["SessionProcessorStatus"]; - }; - /** SessionQueueItem */ - SessionQueueItem: { - /** - * Item Id - * @description The identifier of the session queue item - */ - item_id: number; - /** - * Status - * @description The status of this queue item - * @default pending - * @enum {string} - */ - status: "pending" | "in_progress" | "completed" | "failed" | "canceled"; - /** - * Priority - * @description The priority of this queue item - * @default 0 - */ - priority: number; - /** - * Batch Id - * @description The ID of the batch associated with this queue item - */ - batch_id: string; - /** - * Session Id - * @description The ID of the session associated with this queue item. The session doesn't exist in graph_executions until the queue item is executed. - */ - session_id: string; - /** - * Error Type - * @description The error type if this queue item errored - */ - error_type?: string | null; - /** - * Error Message - * @description The error message if this queue item errored - */ - error_message?: string | null; - /** - * Error Traceback - * @description The error traceback if this queue item errored - */ - error_traceback?: string | null; - /** - * Created At - * @description When this queue item was created - */ - created_at: string; - /** - * Updated At - * @description When this queue item was updated - */ - updated_at: string; - /** - * Started At - * @description When this queue item was started - */ - started_at?: string | null; - /** - * Completed At - * @description When this queue item was completed - */ - completed_at?: string | null; - /** - * Queue Id - * @description The id of the queue with which this item is associated - */ - queue_id: string; - /** - * Field Values - * @description The field values that were used for this queue item - */ - field_values?: components["schemas"]["NodeFieldValue"][] | null; - /** @description The fully-populated session to be executed */ - session: components["schemas"]["GraphExecutionState"]; - /** @description The workflow associated with this queue item */ - workflow?: components["schemas"]["WorkflowWithoutID"] | null; - }; - /** SessionQueueItemDTO */ - SessionQueueItemDTO: { - /** - * Item Id - * @description The identifier of the session queue item - */ - item_id: number; - /** - * Status - * @description The status of this queue item - * @default pending - * @enum {string} - */ - status: "pending" | "in_progress" | "completed" | "failed" | "canceled"; - /** - * Priority - * @description The priority of this queue item - * @default 0 - */ - priority: number; - /** - * Batch Id - * @description The ID of the batch associated with this queue item - */ - batch_id: string; - /** - * Session Id - * @description The ID of the session associated with this queue item. The session doesn't exist in graph_executions until the queue item is executed. - */ - session_id: string; - /** - * Error Type - * @description The error type if this queue item errored - */ - error_type?: string | null; - /** - * Error Message - * @description The error message if this queue item errored - */ - error_message?: string | null; - /** - * Error Traceback - * @description The error traceback if this queue item errored - */ - error_traceback?: string | null; - /** - * Created At - * @description When this queue item was created - */ - created_at: string; - /** - * Updated At - * @description When this queue item was updated - */ - updated_at: string; - /** - * Started At - * @description When this queue item was started - */ - started_at?: string | null; - /** - * Completed At - * @description When this queue item was completed - */ - completed_at?: string | null; - /** - * Queue Id - * @description The id of the queue with which this item is associated - */ - queue_id: string; - /** - * Field Values - * @description The field values that were used for this queue item - */ - field_values?: components["schemas"]["NodeFieldValue"][] | null; - }; - /** SessionQueueStatus */ - SessionQueueStatus: { - /** - * Queue Id - * @description The ID of the queue - */ - queue_id: string; - /** - * Item Id - * @description The current queue item id - */ - item_id: number | null; - /** - * Batch Id - * @description The current queue item's batch id - */ - batch_id: string | null; - /** - * Session Id - * @description The current queue item's session id - */ - session_id: string | null; - /** - * Pending - * @description Number of queue items with status 'pending' - */ - pending: number; - /** - * In Progress - * @description Number of queue items with status 'in_progress' - */ - in_progress: number; - /** - * Completed - * @description Number of queue items with status 'complete' - */ - completed: number; - /** - * Failed - * @description Number of queue items with status 'error' - */ - failed: number; - /** - * Canceled - * @description Number of queue items with status 'canceled' - */ - canceled: number; - /** - * Total - * @description Total number of queue items - */ - total: number; - }; - /** - * Show Image - * @description Displays a provided image using the OS image viewer, and passes it forward in the pipeline. - */ - ShowImageInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to show - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * type - * @default show_image - * @constant - * @enum {string} - */ - type: "show_image"; - }; - /** - * Image-to-Image (Autoscale) - * @description Run any spandrel image-to-image model (https://github.com/chaiNNer-org/spandrel) until the target scale is reached. - */ - SpandrelImageToImageAutoscaleInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The input image - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * Image-to-Image Model - * @description Image-to-Image model - * @default null - */ - image_to_image_model?: components["schemas"]["ModelIdentifierField"]; - /** - * Tile Size - * @description The tile size for tiled image-to-image. Set to 0 to disable tiling. - * @default 512 - */ - tile_size?: number; - /** - * type - * @default spandrel_image_to_image_autoscale - * @constant - * @enum {string} - */ - type: "spandrel_image_to_image_autoscale"; - /** - * Scale - * @description The final scale of the output image. If the model does not upscale the image, this will be ignored. - * @default 4 - */ - scale?: number; - /** - * Fit To Multiple Of 8 - * @description If true, the output image will be resized to the nearest multiple of 8 in both dimensions. - * @default false - */ - fit_to_multiple_of_8?: boolean; - }; - /** - * SpandrelImageToImageConfig - * @description Model config for Spandrel Image to Image models. - */ - SpandrelImageToImageConfig: { - /** - * Key - * @description A unique key for this model. - */ - key: string; - /** - * Hash - * @description The hash of the model file(s). - */ - hash: string; - /** - * Path - * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. - */ - path: string; - /** - * Name - * @description Name of the model. - */ - name: string; - /** @description The base model. */ - base: components["schemas"]["BaseModelType"]; - /** - * Description - * @description Model description - */ - description?: string | null; - /** - * Source - * @description The original source of the model (path, URL or repo_id). - */ - source: string; - /** @description The type of source */ - source_type: components["schemas"]["ModelSourceType"]; - /** - * Source Api Response - * @description The original API response from the source, as stringified JSON. - */ - source_api_response?: string | null; - /** - * Cover Image - * @description Url for image to preview model - */ - cover_image?: string | null; - /** - * Type - * @default spandrel_image_to_image - * @constant - * @enum {string} - */ - type: "spandrel_image_to_image"; - /** - * Format - * @default checkpoint - * @constant - * @enum {string} - */ - format: "checkpoint"; - }; - /** - * Image-to-Image - * @description Run any spandrel image-to-image model (https://github.com/chaiNNer-org/spandrel). - */ - SpandrelImageToImageInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The input image - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * Image-to-Image Model - * @description Image-to-Image model - * @default null - */ - image_to_image_model?: components["schemas"]["ModelIdentifierField"]; - /** - * Tile Size - * @description The tile size for tiled image-to-image. Set to 0 to disable tiling. - * @default 512 - */ - tile_size?: number; - /** - * type - * @default spandrel_image_to_image - * @constant - * @enum {string} - */ - type: "spandrel_image_to_image"; - }; - /** StarterModel */ - StarterModel: { - /** Description */ - description: string; - /** Source */ - source: string; - /** Name */ - name: string; - base: components["schemas"]["BaseModelType"]; - type: components["schemas"]["ModelType"]; - /** - * Is Installed - * @default false - */ - is_installed?: boolean; - /** Dependencies */ - dependencies?: components["schemas"]["StarterModelWithoutDependencies"][] | null; - }; - /** StarterModelWithoutDependencies */ - StarterModelWithoutDependencies: { - /** Description */ - description: string; - /** Source */ - source: string; - /** Name */ - name: string; - base: components["schemas"]["BaseModelType"]; - type: components["schemas"]["ModelType"]; - /** - * Is Installed - * @default false - */ - is_installed?: boolean; - }; - /** - * Step Param Easing - * @description Experimental per-step parameter easing for denoising steps - */ - StepParamEasingInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * Easing - * @description The easing function to use - * @default Linear - * @enum {string} - */ - easing?: "Linear" | "QuadIn" | "QuadOut" | "QuadInOut" | "CubicIn" | "CubicOut" | "CubicInOut" | "QuarticIn" | "QuarticOut" | "QuarticInOut" | "QuinticIn" | "QuinticOut" | "QuinticInOut" | "SineIn" | "SineOut" | "SineInOut" | "CircularIn" | "CircularOut" | "CircularInOut" | "ExponentialIn" | "ExponentialOut" | "ExponentialInOut" | "ElasticIn" | "ElasticOut" | "ElasticInOut" | "BackIn" | "BackOut" | "BackInOut" | "BounceIn" | "BounceOut" | "BounceInOut"; - /** - * Num Steps - * @description number of denoising steps - * @default 20 - */ - num_steps?: number; - /** - * Start Value - * @description easing starting value - * @default 0 - */ - start_value?: number; - /** - * End Value - * @description easing ending value - * @default 1 - */ - end_value?: number; - /** - * Start Step Percent - * @description fraction of steps at which to start easing - * @default 0 - */ - start_step_percent?: number; - /** - * End Step Percent - * @description fraction of steps after which to end easing - * @default 1 - */ - end_step_percent?: number; - /** - * Pre Start Value - * @description value before easing start - * @default null - */ - pre_start_value?: number | null; - /** - * Post End Value - * @description value after easing end - * @default null - */ - post_end_value?: number | null; - /** - * Mirror - * @description include mirror of easing function - * @default false - */ - mirror?: boolean; - /** - * Show Easing Plot - * @description show easing plot - * @default false - */ - show_easing_plot?: boolean; - /** - * type - * @default step_param_easing - * @constant - * @enum {string} - */ - type: "step_param_easing"; - }; - /** - * String2Output - * @description Base class for invocations that output two strings - */ - String2Output: { - /** - * String 1 - * @description string 1 - */ - string_1: string; - /** - * String 2 - * @description string 2 - */ - string_2: string; - /** - * type - * @default string_2_output - * @constant - * @enum {string} - */ - type: "string_2_output"; - }; - /** - * String Collection Primitive - * @description A collection of string primitive values - */ - StringCollectionInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * Collection - * @description The collection of string values - * @default [] - */ - collection?: string[]; - /** - * type - * @default string_collection - * @constant - * @enum {string} - */ - type: "string_collection"; - }; - /** - * StringCollectionOutput - * @description Base class for nodes that output a collection of strings - */ - StringCollectionOutput: { - /** - * Collection - * @description The output strings - */ - collection: string[]; - /** - * type - * @default string_collection_output - * @constant - * @enum {string} - */ - type: "string_collection_output"; - }; - /** - * String Primitive - * @description A string primitive value - */ - StringInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * Value - * @description The string value - * @default - */ - value?: string; - /** - * type - * @default string - * @constant - * @enum {string} - */ - type: "string"; - }; - /** - * String Join - * @description Joins string left to string right - */ - StringJoinInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * String Left - * @description String Left - * @default - */ - string_left?: string; - /** - * String Right - * @description String Right - * @default - */ - string_right?: string; - /** - * type - * @default string_join - * @constant - * @enum {string} - */ - type: "string_join"; - }; - /** - * String Join Three - * @description Joins string left to string middle to string right - */ - StringJoinThreeInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * String Left - * @description String Left - * @default - */ - string_left?: string; - /** - * String Middle - * @description String Middle - * @default - */ - string_middle?: string; - /** - * String Right - * @description String Right - * @default - */ - string_right?: string; - /** - * type - * @default string_join_three - * @constant - * @enum {string} - */ - type: "string_join_three"; - }; - /** - * StringOutput - * @description Base class for nodes that output a single string - */ - StringOutput: { - /** - * Value - * @description The output string - */ - value: string; - /** - * type - * @default string_output - * @constant - * @enum {string} - */ - type: "string_output"; - }; - /** - * StringPosNegOutput - * @description Base class for invocations that output a positive and negative string - */ - StringPosNegOutput: { - /** - * Positive String - * @description Positive string - */ - positive_string: string; - /** - * Negative String - * @description Negative string - */ - negative_string: string; - /** - * type - * @default string_pos_neg_output - * @constant - * @enum {string} - */ - type: "string_pos_neg_output"; - }; - /** - * String Replace - * @description Replaces the search string with the replace string - */ - StringReplaceInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * String - * @description String to work on - * @default - */ - string?: string; - /** - * Search String - * @description String to search for - * @default - */ - search_string?: string; - /** - * Replace String - * @description String to replace the search - * @default - */ - replace_string?: string; - /** - * Use Regex - * @description Use search string as a regex expression (non regex is case insensitive) - * @default false - */ - use_regex?: boolean; - /** - * type - * @default string_replace - * @constant - * @enum {string} - */ - type: "string_replace"; - }; - /** - * String Split - * @description Splits string into two strings, based on the first occurance of the delimiter. The delimiter will be removed from the string - */ - StringSplitInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * String - * @description String to split - * @default - */ - string?: string; - /** - * Delimiter - * @description Delimiter to spilt with. blank will split on the first whitespace - * @default - */ - delimiter?: string; - /** - * type - * @default string_split - * @constant - * @enum {string} - */ - type: "string_split"; - }; - /** - * String Split Negative - * @description Splits string into two strings, inside [] goes into negative string everthing else goes into positive string. Each [ and ] character is replaced with a space - */ - StringSplitNegInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * String - * @description String to split - * @default - */ - string?: string; - /** - * type - * @default string_split_neg - * @constant - * @enum {string} - */ - type: "string_split_neg"; - }; - /** - * SubModelType - * @description Submodel type. - * @enum {string} - */ - SubModelType: "unet" | "transformer" | "text_encoder" | "text_encoder_2" | "tokenizer" | "tokenizer_2" | "vae" | "vae_decoder" | "vae_encoder" | "scheduler" | "safety_checker"; - /** - * Subtract Integers - * @description Subtracts two numbers - */ - SubtractInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * A - * @description The first number - * @default 0 - */ - a?: number; - /** - * B - * @description The second number - * @default 0 - */ - b?: number; - /** - * type - * @default sub - * @constant - * @enum {string} - */ - type: "sub"; - }; - /** - * T2IAdapterConfig - * @description Model config for T2I. - */ - T2IAdapterConfig: { - /** @description Default settings for this model */ - default_settings?: components["schemas"]["ControlAdapterDefaultSettings"] | null; - /** - * Key - * @description A unique key for this model. - */ - key: string; - /** - * Hash - * @description The hash of the model file(s). - */ - hash: string; - /** - * Path - * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. - */ - path: string; - /** - * Name - * @description Name of the model. - */ - name: string; - /** @description The base model. */ - base: components["schemas"]["BaseModelType"]; - /** - * Description - * @description Model description - */ - description?: string | null; - /** - * Source - * @description The original source of the model (path, URL or repo_id). - */ - source: string; - /** @description The type of source */ - source_type: components["schemas"]["ModelSourceType"]; - /** - * Source Api Response - * @description The original API response from the source, as stringified JSON. - */ - source_api_response?: string | null; - /** - * Cover Image - * @description Url for image to preview model - */ - cover_image?: string | null; - /** - * Format - * @default diffusers - * @constant - * @enum {string} - */ - format: "diffusers"; - /** @default */ - repo_variant?: components["schemas"]["ModelRepoVariant"] | null; - /** - * Type - * @default t2i_adapter - * @constant - * @enum {string} - */ - type: "t2i_adapter"; - }; - /** T2IAdapterField */ - T2IAdapterField: { - /** @description The T2I-Adapter image prompt. */ - image: components["schemas"]["ImageField"]; - /** @description The T2I-Adapter model to use. */ - t2i_adapter_model: components["schemas"]["ModelIdentifierField"]; - /** - * Weight - * @description The weight given to the T2I-Adapter - * @default 1 - */ - weight?: number | number[]; - /** - * Begin Step Percent - * @description When the T2I-Adapter is first applied (% of total steps) - * @default 0 - */ - begin_step_percent?: number; - /** - * End Step Percent - * @description When the T2I-Adapter is last applied (% of total steps) - * @default 1 - */ - end_step_percent?: number; - /** - * Resize Mode - * @description The resize mode to use - * @default just_resize - * @enum {string} - */ - resize_mode?: "just_resize" | "crop_resize" | "fill_resize" | "just_resize_simple"; - }; - /** - * T2I-Adapter - * @description Collects T2I-Adapter info to pass to other nodes. - */ - T2IAdapterInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The IP-Adapter image prompt. - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * T2I-Adapter Model - * @description The T2I-Adapter model. - * @default null - */ - t2i_adapter_model?: components["schemas"]["ModelIdentifierField"]; - /** - * Weight - * @description The weight given to the T2I-Adapter - * @default 1 - */ - weight?: number | number[]; - /** - * Begin Step Percent - * @description When the T2I-Adapter is first applied (% of total steps) - * @default 0 - */ - begin_step_percent?: number; - /** - * End Step Percent - * @description When the T2I-Adapter is last applied (% of total steps) - * @default 1 - */ - end_step_percent?: number; - /** - * Resize Mode - * @description The resize mode applied to the T2I-Adapter input image so that it matches the target output size. - * @default just_resize - * @enum {string} - */ - resize_mode?: "just_resize" | "crop_resize" | "fill_resize" | "just_resize_simple"; - /** - * type - * @default t2i_adapter - * @constant - * @enum {string} - */ - type: "t2i_adapter"; - }; - /** T2IAdapterMetadataField */ - T2IAdapterMetadataField: { - /** @description The control image. */ - image: components["schemas"]["ImageField"]; - /** - * @description The control image, after processing. - * @default null - */ - processed_image?: components["schemas"]["ImageField"] | null; - /** @description The T2I-Adapter model to use. */ - t2i_adapter_model: components["schemas"]["ModelIdentifierField"]; - /** - * Weight - * @description The weight given to the T2I-Adapter - * @default 1 - */ - weight?: number | number[]; - /** - * Begin Step Percent - * @description When the T2I-Adapter is first applied (% of total steps) - * @default 0 - */ - begin_step_percent?: number; - /** - * End Step Percent - * @description When the T2I-Adapter is last applied (% of total steps) - * @default 1 - */ - end_step_percent?: number; - /** - * Resize Mode - * @description The resize mode to use - * @default just_resize - * @enum {string} - */ - resize_mode?: "just_resize" | "crop_resize" | "fill_resize" | "just_resize_simple"; - }; - /** T2IAdapterOutput */ - T2IAdapterOutput: { - /** - * T2I Adapter - * @description T2I-Adapter(s) to apply - */ - t2i_adapter: components["schemas"]["T2IAdapterField"]; - /** - * type - * @default t2i_adapter_output - * @constant - * @enum {string} - */ - type: "t2i_adapter_output"; - }; - /** T5Encoder8bConfig */ - T5Encoder8bConfig: { - /** - * Key - * @description A unique key for this model. - */ - key: string; - /** - * Hash - * @description The hash of the model file(s). - */ - hash: string; - /** - * Path - * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. - */ - path: string; - /** - * Name - * @description Name of the model. - */ - name: string; - /** @description The base model. */ - base: components["schemas"]["BaseModelType"]; - /** - * Description - * @description Model description - */ - description?: string | null; - /** - * Source - * @description The original source of the model (path, URL or repo_id). - */ - source: string; - /** @description The type of source */ - source_type: components["schemas"]["ModelSourceType"]; - /** - * Source Api Response - * @description The original API response from the source, as stringified JSON. - */ - source_api_response?: string | null; - /** - * Cover Image - * @description Url for image to preview model - */ - cover_image?: string | null; - /** - * Type - * @default t5_encoder - * @constant - * @enum {string} - */ - type: "t5_encoder"; - /** - * Format - * @default t5_encoder_8b - * @constant - * @enum {string} - */ - format: "t5_encoder_8b"; - }; - /** T5EncoderConfig */ - T5EncoderConfig: { - /** - * Key - * @description A unique key for this model. - */ - key: string; - /** - * Hash - * @description The hash of the model file(s). - */ - hash: string; - /** - * Path - * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. - */ - path: string; - /** - * Name - * @description Name of the model. - */ - name: string; - /** @description The base model. */ - base: components["schemas"]["BaseModelType"]; - /** - * Description - * @description Model description - */ - description?: string | null; - /** - * Source - * @description The original source of the model (path, URL or repo_id). - */ - source: string; - /** @description The type of source */ - source_type: components["schemas"]["ModelSourceType"]; - /** - * Source Api Response - * @description The original API response from the source, as stringified JSON. - */ - source_api_response?: string | null; - /** - * Cover Image - * @description Url for image to preview model - */ - cover_image?: string | null; - /** - * Type - * @default t5_encoder - * @constant - * @enum {string} - */ - type: "t5_encoder"; - /** - * Format - * @default t5_encoder - * @constant - * @enum {string} - */ - format: "t5_encoder"; - }; - /** T5EncoderField */ - T5EncoderField: { - /** @description Info to load tokenizer submodel */ - tokenizer: components["schemas"]["ModelIdentifierField"]; - /** @description Info to load text_encoder submodel */ - text_encoder: components["schemas"]["ModelIdentifierField"]; - }; - /** TBLR */ - TBLR: { - /** Top */ - top: number; - /** Bottom */ - bottom: number; - /** Left */ - left: number; - /** Right */ - right: number; - }; - /** - * TensorField - * @description A tensor primitive field. - */ - TensorField: { - /** - * Tensor Name - * @description The name of a tensor. - */ - tensor_name: string; - }; - /** - * TextualInversionFileConfig - * @description Model config for textual inversion embeddings. - */ - TextualInversionFileConfig: { - /** - * Key - * @description A unique key for this model. - */ - key: string; - /** - * Hash - * @description The hash of the model file(s). - */ - hash: string; - /** - * Path - * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. - */ - path: string; - /** - * Name - * @description Name of the model. - */ - name: string; - /** @description The base model. */ - base: components["schemas"]["BaseModelType"]; - /** - * Description - * @description Model description - */ - description?: string | null; - /** - * Source - * @description The original source of the model (path, URL or repo_id). - */ - source: string; - /** @description The type of source */ - source_type: components["schemas"]["ModelSourceType"]; - /** - * Source Api Response - * @description The original API response from the source, as stringified JSON. - */ - source_api_response?: string | null; - /** - * Cover Image - * @description Url for image to preview model - */ - cover_image?: string | null; - /** - * Type - * @default embedding - * @constant - * @enum {string} - */ - type: "embedding"; - /** - * Format - * @default embedding_file - * @constant - * @enum {string} - */ - format: "embedding_file"; - }; - /** - * TextualInversionFolderConfig - * @description Model config for textual inversion embeddings. - */ - TextualInversionFolderConfig: { - /** - * Key - * @description A unique key for this model. - */ - key: string; - /** - * Hash - * @description The hash of the model file(s). - */ - hash: string; - /** - * Path - * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. - */ - path: string; - /** - * Name - * @description Name of the model. - */ - name: string; - /** @description The base model. */ - base: components["schemas"]["BaseModelType"]; - /** - * Description - * @description Model description - */ - description?: string | null; - /** - * Source - * @description The original source of the model (path, URL or repo_id). - */ - source: string; - /** @description The type of source */ - source_type: components["schemas"]["ModelSourceType"]; - /** - * Source Api Response - * @description The original API response from the source, as stringified JSON. - */ - source_api_response?: string | null; - /** - * Cover Image - * @description Url for image to preview model - */ - cover_image?: string | null; - /** - * Type - * @default embedding - * @constant - * @enum {string} - */ - type: "embedding"; - /** - * Format - * @default embedding_folder - * @constant - * @enum {string} - */ - format: "embedding_folder"; - }; - /** Tile */ - Tile: { - /** @description The coordinates of this tile relative to its parent image. */ - coords: components["schemas"]["TBLR"]; - /** @description The amount of overlap with adjacent tiles on each side of this tile. */ - overlap: components["schemas"]["TBLR"]; - }; - /** - * Tile Resample Processor - * @description Tile resampler processor - */ - TileResamplerProcessorInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to process - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * Down Sampling Rate - * @description Down sampling rate - * @default 1 - */ - down_sampling_rate?: number; - /** - * type - * @default tile_image_processor - * @constant - * @enum {string} - */ - type: "tile_image_processor"; - }; - /** - * Tile to Properties - * @description Split a Tile into its individual properties. - */ - TileToPropertiesInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The tile to split into properties. - * @default null - */ - tile?: components["schemas"]["Tile"]; - /** - * type - * @default tile_to_properties - * @constant - * @enum {string} - */ - type: "tile_to_properties"; - }; - /** TileToPropertiesOutput */ - TileToPropertiesOutput: { - /** - * Coords Left - * @description Left coordinate of the tile relative to its parent image. - */ - coords_left: number; - /** - * Coords Right - * @description Right coordinate of the tile relative to its parent image. - */ - coords_right: number; - /** - * Coords Top - * @description Top coordinate of the tile relative to its parent image. - */ - coords_top: number; - /** - * Coords Bottom - * @description Bottom coordinate of the tile relative to its parent image. - */ - coords_bottom: number; - /** - * Width - * @description The width of the tile. Equal to coords_right - coords_left. - */ - width: number; - /** - * Height - * @description The height of the tile. Equal to coords_bottom - coords_top. - */ - height: number; - /** - * Overlap Top - * @description Overlap between this tile and its top neighbor. - */ - overlap_top: number; - /** - * Overlap Bottom - * @description Overlap between this tile and its bottom neighbor. - */ - overlap_bottom: number; - /** - * Overlap Left - * @description Overlap between this tile and its left neighbor. - */ - overlap_left: number; - /** - * Overlap Right - * @description Overlap between this tile and its right neighbor. - */ - overlap_right: number; - /** - * type - * @default tile_to_properties_output - * @constant - * @enum {string} - */ - type: "tile_to_properties_output"; - }; - /** TileWithImage */ - TileWithImage: { - tile: components["schemas"]["Tile"]; - image: components["schemas"]["ImageField"]; - }; - /** - * Tiled Multi-Diffusion Denoise Latents - * @description Tiled Multi-Diffusion denoising. - * - * This node handles automatically tiling the input image, and is primarily intended for global refinement of images - * in tiled upscaling workflows. Future Multi-Diffusion nodes should allow the user to specify custom regions with - * different parameters for each region to harness the full power of Multi-Diffusion. - * - * This node has a similar interface to the `DenoiseLatents` node, but it has a reduced feature set (no IP-Adapter, - * T2I-Adapter, masking, etc.). - */ - TiledMultiDiffusionDenoiseLatents: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description Positive conditioning tensor - * @default null - */ - positive_conditioning?: components["schemas"]["ConditioningField"]; - /** - * @description Negative conditioning tensor - * @default null - */ - negative_conditioning?: components["schemas"]["ConditioningField"]; - /** - * @description Noise tensor - * @default null - */ - noise?: components["schemas"]["LatentsField"] | null; - /** - * @description Latents tensor - * @default null - */ - latents?: components["schemas"]["LatentsField"] | null; - /** - * Tile Height - * @description Height of the tiles in image space. - * @default 1024 - */ - tile_height?: number; - /** - * Tile Width - * @description Width of the tiles in image space. - * @default 1024 - */ - tile_width?: number; - /** - * Tile Overlap - * @description The overlap between adjacent tiles in pixel space. (Of course, tile merging is applied in latent space.) Tiles will be cropped during merging (if necessary) to ensure that they overlap by exactly this amount. - * @default 32 - */ - tile_overlap?: number; - /** - * Steps - * @description Number of steps to run - * @default 18 - */ - steps?: number; - /** - * CFG Scale - * @description Classifier-Free Guidance scale - * @default 6 - */ - cfg_scale?: number | number[]; - /** - * Denoising Start - * @description When to start denoising, expressed a percentage of total steps - * @default 0 - */ - denoising_start?: number; - /** - * Denoising End - * @description When to stop denoising, expressed a percentage of total steps - * @default 1 - */ - denoising_end?: number; - /** - * Scheduler - * @description Scheduler to use during inference - * @default euler - * @enum {string} - */ - scheduler?: "ddim" | "ddpm" | "deis" | "deis_k" | "lms" | "lms_k" | "pndm" | "heun" | "heun_k" | "euler" | "euler_k" | "euler_a" | "kdpm_2" | "kdpm_2_k" | "kdpm_2_a" | "kdpm_2_a_k" | "dpmpp_2s" | "dpmpp_2s_k" | "dpmpp_2m" | "dpmpp_2m_k" | "dpmpp_2m_sde" | "dpmpp_2m_sde_k" | "dpmpp_3m" | "dpmpp_3m_k" | "dpmpp_sde" | "dpmpp_sde_k" | "unipc" | "unipc_k" | "lcm" | "tcd"; - /** - * UNet - * @description UNet (scheduler, LoRAs) - * @default null - */ - unet?: components["schemas"]["UNetField"]; - /** - * CFG Rescale Multiplier - * @description Rescale multiplier for CFG guidance, used for models trained with zero-terminal SNR - * @default 0 - */ - cfg_rescale_multiplier?: number; - /** - * Control - * @default null - */ - control?: components["schemas"]["ControlField"] | components["schemas"]["ControlField"][] | null; - /** - * type - * @default tiled_multi_diffusion_denoise_latents - * @constant - * @enum {string} - */ - type: "tiled_multi_diffusion_denoise_latents"; - }; - /** TransformerField */ - TransformerField: { - /** @description Info to load Transformer submodel */ - transformer: components["schemas"]["ModelIdentifierField"]; - }; - /** - * UIComponent - * @description The type of UI component to use for a field, used to override the default components, which are - * inferred from the field type. - * @enum {string} - */ - UIComponent: "none" | "textarea" | "slider"; - /** - * UIConfigBase - * @description Provides additional node configuration to the UI. - * This is used internally by the @invocation decorator logic. Do not use this directly. - */ - UIConfigBase: { - /** - * Tags - * @description The node's tags - */ - tags: string[] | null; - /** - * Title - * @description The node's display name - * @default null - */ - title: string | null; - /** - * Category - * @description The node's category - * @default null - */ - category: string | null; - /** - * Version - * @description The node's version. Should be a valid semver string e.g. "1.0.0" or "3.8.13". - */ - version: string; - /** - * Node Pack - * @description Whether or not this is a custom node - * @default null - */ - node_pack: string | null; - /** - * @description The node's classification - * @default stable - */ - classification: components["schemas"]["Classification"]; - }; - /** - * UIType - * @description Type hints for the UI for situations in which the field type is not enough to infer the correct UI type. - * - * - Model Fields - * The most common node-author-facing use will be for model fields. Internally, there is no difference - * between SD-1, SD-2 and SDXL model fields - they all use the class `MainModelField`. To ensure the - * base-model-specific UI is rendered, use e.g. `ui_type=UIType.SDXLMainModelField` to indicate that - * the field is an SDXL main model field. - * - * - Any Field - * We cannot infer the usage of `typing.Any` via schema parsing, so you *must* use `ui_type=UIType.Any` to - * indicate that the field accepts any type. Use with caution. This cannot be used on outputs. - * - * - Scheduler Field - * Special handling in the UI is needed for this field, which otherwise would be parsed as a plain enum field. - * - * - Internal Fields - * Similar to the Any Field, the `collect` and `iterate` nodes make use of `typing.Any`. To facilitate - * handling these types in the client, we use `UIType._Collection` and `UIType._CollectionItem`. These - * should not be used by node authors. - * - * - DEPRECATED Fields - * These types are deprecated and should not be used by node authors. A warning will be logged if one is - * used, and the type will be ignored. They are included here for backwards compatibility. - * @enum {string} - */ - UIType: "MainModelField" | "FluxMainModelField" | "SDXLMainModelField" | "SDXLRefinerModelField" | "ONNXModelField" | "VAEModelField" | "LoRAModelField" | "ControlNetModelField" | "IPAdapterModelField" | "T2IAdapterModelField" | "T5EncoderModelField" | "SpandrelImageToImageModelField" | "SchedulerField" | "AnyField" | "CollectionField" | "CollectionItemField" | "DEPRECATED_Boolean" | "DEPRECATED_Color" | "DEPRECATED_Conditioning" | "DEPRECATED_Control" | "DEPRECATED_Float" | "DEPRECATED_Image" | "DEPRECATED_Integer" | "DEPRECATED_Latents" | "DEPRECATED_String" | "DEPRECATED_BooleanCollection" | "DEPRECATED_ColorCollection" | "DEPRECATED_ConditioningCollection" | "DEPRECATED_ControlCollection" | "DEPRECATED_FloatCollection" | "DEPRECATED_ImageCollection" | "DEPRECATED_IntegerCollection" | "DEPRECATED_LatentsCollection" | "DEPRECATED_StringCollection" | "DEPRECATED_BooleanPolymorphic" | "DEPRECATED_ColorPolymorphic" | "DEPRECATED_ConditioningPolymorphic" | "DEPRECATED_ControlPolymorphic" | "DEPRECATED_FloatPolymorphic" | "DEPRECATED_ImagePolymorphic" | "DEPRECATED_IntegerPolymorphic" | "DEPRECATED_LatentsPolymorphic" | "DEPRECATED_StringPolymorphic" | "DEPRECATED_UNet" | "DEPRECATED_Vae" | "DEPRECATED_CLIP" | "DEPRECATED_Collection" | "DEPRECATED_CollectionItem" | "DEPRECATED_Enum" | "DEPRECATED_WorkflowField" | "DEPRECATED_IsIntermediate" | "DEPRECATED_BoardField" | "DEPRECATED_MetadataItem" | "DEPRECATED_MetadataItemCollection" | "DEPRECATED_MetadataItemPolymorphic" | "DEPRECATED_MetadataDict"; - /** UNetField */ - UNetField: { - /** @description Info to load unet submodel */ - unet: components["schemas"]["ModelIdentifierField"]; - /** @description Info to load scheduler submodel */ - scheduler: components["schemas"]["ModelIdentifierField"]; - /** - * Loras - * @description LoRAs to apply on model loading - */ - loras: components["schemas"]["LoRAField"][]; - /** - * Seamless Axes - * @description Axes("x" and "y") to which apply seamless - */ - seamless_axes?: string[]; - /** - * @description FreeU configuration - * @default null - */ - freeu_config?: components["schemas"]["FreeUConfig"] | null; - }; - /** - * UNetOutput - * @description Base class for invocations that output a UNet field. - */ - UNetOutput: { - /** - * UNet - * @description UNet (scheduler, LoRAs) - */ - unet: components["schemas"]["UNetField"]; - /** - * type - * @default unet_output - * @constant - * @enum {string} - */ - type: "unet_output"; - }; - /** - * URLModelSource - * @description A generic URL point to a checkpoint file. - */ - URLModelSource: { - /** - * Url - * Format: uri - */ - url: string; - /** Access Token */ - access_token?: string | null; - /** - * Type - * @default url - * @constant - * @enum {string} - */ - type?: "url"; - }; - /** - * Unsharp Mask - * @description Applies an unsharp mask filter to an image - */ - UnsharpMaskInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to use - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * Radius - * @description Unsharp mask radius - * @default 2 - */ - radius?: number; - /** - * Strength - * @description Unsharp mask strength - * @default 50 - */ - strength?: number; - /** - * type - * @default unsharp_mask - * @constant - * @enum {string} - */ - type: "unsharp_mask"; - }; - /** Upscaler */ - Upscaler: { - /** - * Upscaling Method - * @description Name of upscaling method - */ - upscaling_method: string; - /** - * Upscaling Models - * @description List of upscaling models for this method - */ - upscaling_models: string[]; - }; - /** - * VAECheckpointConfig - * @description Model config for standalone VAE models. - */ - VAECheckpointConfig: { - /** - * Key - * @description A unique key for this model. - */ - key: string; - /** - * Hash - * @description The hash of the model file(s). - */ - hash: string; - /** - * Path - * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. - */ - path: string; - /** - * Name - * @description Name of the model. - */ - name: string; - /** @description The base model. */ - base: components["schemas"]["BaseModelType"]; - /** - * Description - * @description Model description - */ - description?: string | null; - /** - * Source - * @description The original source of the model (path, URL or repo_id). - */ - source: string; - /** @description The type of source */ - source_type: components["schemas"]["ModelSourceType"]; - /** - * Source Api Response - * @description The original API response from the source, as stringified JSON. - */ - source_api_response?: string | null; - /** - * Cover Image - * @description Url for image to preview model - */ - cover_image?: string | null; - /** - * Format - * @description Format of the provided checkpoint model - * @default checkpoint - * @enum {string} - */ - format: "checkpoint" | "bnb_quantized_nf4b"; - /** - * Config Path - * @description path to the checkpoint model config file - */ - config_path: string; - /** - * Converted At - * @description When this model was last converted to diffusers - */ - converted_at?: number | null; - /** - * Type - * @default vae - * @constant - * @enum {string} - */ - type: "vae"; - }; - /** - * VAEDiffusersConfig - * @description Model config for standalone VAE models (diffusers version). - */ - VAEDiffusersConfig: { - /** - * Key - * @description A unique key for this model. - */ - key: string; - /** - * Hash - * @description The hash of the model file(s). - */ - hash: string; - /** - * Path - * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. - */ - path: string; - /** - * Name - * @description Name of the model. - */ - name: string; - /** @description The base model. */ - base: components["schemas"]["BaseModelType"]; - /** - * Description - * @description Model description - */ - description?: string | null; - /** - * Source - * @description The original source of the model (path, URL or repo_id). - */ - source: string; - /** @description The type of source */ - source_type: components["schemas"]["ModelSourceType"]; - /** - * Source Api Response - * @description The original API response from the source, as stringified JSON. - */ - source_api_response?: string | null; - /** - * Cover Image - * @description Url for image to preview model - */ - cover_image?: string | null; - /** - * Type - * @default vae - * @constant - * @enum {string} - */ - type: "vae"; - /** - * Format - * @default diffusers - * @constant - * @enum {string} - */ - format: "diffusers"; - }; - /** VAEField */ - VAEField: { - /** @description Info to load vae submodel */ - vae: components["schemas"]["ModelIdentifierField"]; - /** - * Seamless Axes - * @description Axes("x" and "y") to which apply seamless - */ - seamless_axes?: string[]; - }; - /** - * VAE - * @description Loads a VAE model, outputting a VaeLoaderOutput - */ - VAELoaderInvocation: { - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * VAE - * @description VAE model to load - * @default null - */ - vae_model?: components["schemas"]["ModelIdentifierField"]; - /** - * type - * @default vae_loader - * @constant - * @enum {string} - */ - type: "vae_loader"; - }; - /** - * VAEOutput - * @description Base class for invocations that output a VAE field - */ - VAEOutput: { - /** - * VAE - * @description VAE - */ - vae: components["schemas"]["VAEField"]; - /** - * type - * @default vae_output - * @constant - * @enum {string} - */ - type: "vae_output"; - }; - /** ValidationError */ - ValidationError: { - /** Location */ - loc: (string | number)[]; - /** Message */ - msg: string; - /** Error Type */ - type: string; - }; - /** Workflow */ - Workflow: { - /** - * Name - * @description The name of the workflow. - */ - name: string; - /** - * Author - * @description The author of the workflow. - */ - author: string; - /** - * Description - * @description The description of the workflow. - */ - description: string; - /** - * Version - * @description The version of the workflow. - */ - version: string; - /** - * Contact - * @description The contact of the workflow. - */ - contact: string; - /** - * Tags - * @description The tags of the workflow. - */ - tags: string; - /** - * Notes - * @description The notes of the workflow. - */ - notes: string; - /** - * Exposedfields - * @description The exposed fields of the workflow. - */ - exposedFields: components["schemas"]["ExposedField"][]; - /** @description The meta of the workflow. */ - meta: components["schemas"]["WorkflowMeta"]; - /** - * Nodes - * @description The nodes of the workflow. - */ - nodes: { - [key: string]: components["schemas"]["JsonValue"]; - }[]; - /** - * Edges - * @description The edges of the workflow. - */ - edges: { - [key: string]: components["schemas"]["JsonValue"]; - }[]; - /** - * Id - * @description The id of the workflow. - */ - id: string; - }; - /** WorkflowAndGraphResponse */ - WorkflowAndGraphResponse: { - /** - * Workflow - * @description The workflow used to generate the image, as stringified JSON - */ - workflow: string | null; - /** - * Graph - * @description The graph used to generate the image, as stringified JSON - */ - graph: string | null; - }; - /** - * WorkflowCategory - * @enum {string} - */ - WorkflowCategory: "user" | "default" | "project"; - /** WorkflowMeta */ - WorkflowMeta: { - /** - * Version - * @description The version of the workflow schema. - */ - version: string; - /** - * @description The category of the workflow (user or default). - * @default user - */ - category?: components["schemas"]["WorkflowCategory"]; - }; - /** WorkflowRecordDTO */ - WorkflowRecordDTO: { - /** - * Workflow Id - * @description The id of the workflow. - */ - workflow_id: string; - /** - * Name - * @description The name of the workflow. - */ - name: string; - /** - * Created At - * @description The created timestamp of the workflow. - */ - created_at: string; - /** - * Updated At - * @description The updated timestamp of the workflow. - */ - updated_at: string; - /** - * Opened At - * @description The opened timestamp of the workflow. - */ - opened_at: string; - /** @description The workflow. */ - workflow: components["schemas"]["Workflow"]; - }; - /** WorkflowRecordListItemDTO */ - WorkflowRecordListItemDTO: { - /** - * Workflow Id - * @description The id of the workflow. - */ - workflow_id: string; - /** - * Name - * @description The name of the workflow. - */ - name: string; - /** - * Created At - * @description The created timestamp of the workflow. - */ - created_at: string; - /** - * Updated At - * @description The updated timestamp of the workflow. - */ - updated_at: string; - /** - * Opened At - * @description The opened timestamp of the workflow. - */ - opened_at: string; - /** - * Description - * @description The description of the workflow. - */ - description: string; - /** @description The description of the workflow. */ - category: components["schemas"]["WorkflowCategory"]; - }; - /** - * WorkflowRecordOrderBy - * @description The order by options for workflow records - * @enum {string} - */ - WorkflowRecordOrderBy: "created_at" | "updated_at" | "opened_at" | "name"; - /** WorkflowWithoutID */ - WorkflowWithoutID: { - /** - * Name - * @description The name of the workflow. - */ - name: string; - /** - * Author - * @description The author of the workflow. - */ - author: string; - /** - * Description - * @description The description of the workflow. - */ - description: string; - /** - * Version - * @description The version of the workflow. - */ - version: string; - /** - * Contact - * @description The contact of the workflow. - */ - contact: string; - /** - * Tags - * @description The tags of the workflow. - */ - tags: string; - /** - * Notes - * @description The notes of the workflow. - */ - notes: string; - /** - * Exposedfields - * @description The exposed fields of the workflow. - */ - exposedFields: components["schemas"]["ExposedField"][]; - /** @description The meta of the workflow. */ - meta: components["schemas"]["WorkflowMeta"]; - /** - * Nodes - * @description The nodes of the workflow. - */ - nodes: { - [key: string]: components["schemas"]["JsonValue"]; - }[]; - /** - * Edges - * @description The edges of the workflow. - */ - edges: { - [key: string]: components["schemas"]["JsonValue"]; - }[]; - }; - /** - * Zoe (Depth) Processor - * @description Applies Zoe depth processing to image - */ - ZoeDepthImageProcessorInvocation: { - /** - * @description The board to save the image to - * @default null - */ - board?: components["schemas"]["BoardField"] | null; - /** - * @description Optional metadata to be saved with the image - * @default null - */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** - * @description The image to process - * @default null - */ - image?: components["schemas"]["ImageField"]; - /** - * type - * @default zoe_depth_image_processor - * @constant - * @enum {string} - */ - type: "zoe_depth_image_processor"; - }; - }; - responses: never; - parameters: never; - requestBodies: never; - headers: never; - pathItems: never; ->>>>>>> bfbb72a8b (tsc and lint fix) }; export type $defs = Record; export interface operations { From a04d4793cc7087f63f2c53a60ac698de73398799 Mon Sep 17 00:00:00 2001 From: maryhipp Date: Wed, 21 Aug 2024 14:45:02 -0400 Subject: [PATCH 061/113] update default workflow --- .../default_workflows/Flux Text to Image.json | 230 +++++++++--------- 1 file changed, 118 insertions(+), 112 deletions(-) diff --git a/invokeai/app/services/workflow_records/default_workflows/Flux Text to Image.json b/invokeai/app/services/workflow_records/default_workflows/Flux Text to Image.json index 4be8ebf07c5..8f569b3df9d 100644 --- a/invokeai/app/services/workflow_records/default_workflows/Flux Text to Image.json +++ b/invokeai/app/services/workflow_records/default_workflows/Flux Text to Image.json @@ -8,15 +8,15 @@ "notes": "", "exposedFields": [ { - "nodeId": "90701a55-0a0f-444d-ab7d-ea9b7361dd44", + "nodeId": "fd1274a3-c56d-4a96-aa21-83d14c920f08", "fieldName": "model" }, { - "nodeId": "90701a55-0a0f-444d-ab7d-ea9b7361dd44", + "nodeId": "fd1274a3-c56d-4a96-aa21-83d14c920f08", "fieldName": "t5_encoder" }, { - "nodeId": "7187b891-8b9e-41f2-bad0-579c14c92faf", + "nodeId": "0fa90f47-79cd-4d84-9445-f0b1065e0c3c", "fieldName": "positive_prompt" } ], @@ -24,7 +24,87 @@ "version": "3.0.0", "category": "default" }, - "nodes": [ +"nodes": [ + { + "id": "0fa90f47-79cd-4d84-9445-f0b1065e0c3c", + "type": "invocation", + "data": { + "id": "0fa90f47-79cd-4d84-9445-f0b1065e0c3c", + "type": "flux_text_encoder", + "version": "1.0.0", + "label": "", + "notes": "", + "isOpen": true, + "isIntermediate": true, + "useCache": true, + "inputs": { + "clip": { + "name": "clip", + "label": "" + }, + "t5_encoder": { + "name": "t5_encoder", + "label": "" + }, + "t5_max_seq_len": { + "name": "t5_max_seq_len", + "label": "", + "value": 256 + }, + "positive_prompt": { + "name": "positive_prompt", + "label": "", + "value": "" + } + } + }, + "position": { + "x": 817.9049777916891, + "y": 137.19130248771572 + } + }, + { + "id": "fd1274a3-c56d-4a96-aa21-83d14c920f08", + "type": "invocation", + "data": { + "id": "fd1274a3-c56d-4a96-aa21-83d14c920f08", + "type": "flux_model_loader", + "version": "1.0.3", + "label": "", + "notes": "", + "isOpen": true, + "isIntermediate": true, + "useCache": true, + "inputs": { + "model": { + "name": "model", + "label": "", + "value": { + "key": "a1deb125-2781-482c-8a71-9a22e76fd956", + "hash": "random:40bd0a5b8b2c6edf8f5611e049000329b952efc6a1a24b4f77ca4ae3dbecaf6a", + "name": "flux1-schnell-bnb_nf4", + "base": "flux", + "type": "main" + } + }, + "t5_encoder": { + "name": "t5_encoder", + "label": "T5 Encoder (Model Manager -> Starter Models)", + "value": { + "key": "798baafd-63bd-4799-8600-5db43662f3aa", + "hash": "random:4495f3ac1650e9d5969c365e951a9cffee526573a15f9f4c997e1c63550444fd", + "name": "t5_8b_quantized_encoder", + "base": "any", + "type": "t5_encoder" + } + } + } + }, + "position": { + "x": 343.41871407356723, + "y": -1.0211223664301414 + } + }, { "id": "4754c534-a5f3-4ad0-9382-7887985e668c", "type": "invocation", @@ -119,138 +199,64 @@ "x": 1216.3900791301849, "y": 5.500841807102248 } - }, - { - "id": "7187b891-8b9e-41f2-bad0-579c14c92faf", - "type": "invocation", - "data": { - "id": "7187b891-8b9e-41f2-bad0-579c14c92faf", - "type": "flux_text_encoder", - "version": "1.0.0", - "label": "", - "notes": "", - "isOpen": true, - "isIntermediate": true, - "useCache": false, - "inputs": { - "clip": { - "name": "clip", - "label": "" - }, - "t5_encoder": { - "name": "t5_encoder", - "label": "" - }, - "max_seq_len": { - "name": "max_seq_len", - "label": "", - "value": 256 - }, - "positive_prompt": { - "name": "positive_prompt", - "label": "", - "value": "dog eating an ice cream cone while watching the TV" - } - } - }, - "position": { - "x": 809.5428272455715, - "y": 111.5674004989348 - } - }, - { - "id": "90701a55-0a0f-444d-ab7d-ea9b7361dd44", - "type": "invocation", - "data": { - "id": "90701a55-0a0f-444d-ab7d-ea9b7361dd44", - "type": "flux_model_loader", - "version": "1.0.3", - "label": "", - "notes": "", - "isOpen": true, - "isIntermediate": true, - "useCache": false, - "inputs": { - "model": { - "name": "model", - "label": "", - "value": { - "key": "a1deb125-2781-482c-8a71-9a22e76fd956", - "hash": "random:40bd0a5b8b2c6edf8f5611e049000329b952efc6a1a24b4f77ca4ae3dbecaf6a", - "name": "flux1-schnell-bnb_nf4", - "base": "flux", - "type": "main" - } - }, - "t5_encoder": { - "name": "t5_encoder", - "label": "", - "value": "8b_quantized" - } - } - }, - "position": { - "x": 407.297070550788, - "y": 37.50301331772734 - } } ], "edges": [ { - "id": "reactflow__edge-4754c534-a5f3-4ad0-9382-7887985e668cvalue-159bdf1b-79e7-4174-b86e-d40e646964c8seed", + "id": "reactflow__edge-0fa90f47-79cd-4d84-9445-f0b1065e0c3cconditioning-159bdf1b-79e7-4174-b86e-d40e646964c8positive_text_conditioning", "type": "default", - "source": "4754c534-a5f3-4ad0-9382-7887985e668c", + "source": "0fa90f47-79cd-4d84-9445-f0b1065e0c3c", "target": "159bdf1b-79e7-4174-b86e-d40e646964c8", - "sourceHandle": "value", - "targetHandle": "seed" + "sourceHandle": "conditioning", + "targetHandle": "positive_text_conditioning" }, { - "id": "reactflow__edge-90701a55-0a0f-444d-ab7d-ea9b7361dd44vae-159bdf1b-79e7-4174-b86e-d40e646964c8vae", + "id": "reactflow__edge-fd1274a3-c56d-4a96-aa21-83d14c920f08max_seq_len-0fa90f47-79cd-4d84-9445-f0b1065e0c3ct5_max_seq_len", "type": "default", - "source": "90701a55-0a0f-444d-ab7d-ea9b7361dd44", - "target": "159bdf1b-79e7-4174-b86e-d40e646964c8", - "sourceHandle": "vae", - "targetHandle": "vae" + "source": "fd1274a3-c56d-4a96-aa21-83d14c920f08", + "target": "0fa90f47-79cd-4d84-9445-f0b1065e0c3c", + "sourceHandle": "max_seq_len", + "targetHandle": "t5_max_seq_len" }, { - "id": "reactflow__edge-90701a55-0a0f-444d-ab7d-ea9b7361dd44transformer-159bdf1b-79e7-4174-b86e-d40e646964c8transformer", + "id": "reactflow__edge-fd1274a3-c56d-4a96-aa21-83d14c920f08t5_encoder-0fa90f47-79cd-4d84-9445-f0b1065e0c3ct5_encoder", "type": "default", - "source": "90701a55-0a0f-444d-ab7d-ea9b7361dd44", - "target": "159bdf1b-79e7-4174-b86e-d40e646964c8", - "sourceHandle": "transformer", - "targetHandle": "transformer" + "source": "fd1274a3-c56d-4a96-aa21-83d14c920f08", + "target": "0fa90f47-79cd-4d84-9445-f0b1065e0c3c", + "sourceHandle": "t5_encoder", + "targetHandle": "t5_encoder" }, { - "id": "reactflow__edge-7187b891-8b9e-41f2-bad0-579c14c92fafconditioning-159bdf1b-79e7-4174-b86e-d40e646964c8positive_text_conditioning", + "id": "reactflow__edge-fd1274a3-c56d-4a96-aa21-83d14c920f08clip-0fa90f47-79cd-4d84-9445-f0b1065e0c3cclip", "type": "default", - "source": "7187b891-8b9e-41f2-bad0-579c14c92faf", - "target": "159bdf1b-79e7-4174-b86e-d40e646964c8", - "sourceHandle": "conditioning", - "targetHandle": "positive_text_conditioning" + "source": "fd1274a3-c56d-4a96-aa21-83d14c920f08", + "target": "0fa90f47-79cd-4d84-9445-f0b1065e0c3c", + "sourceHandle": "clip", + "targetHandle": "clip" }, { - "id": "reactflow__edge-90701a55-0a0f-444d-ab7d-ea9b7361dd44max_seq_len-7187b891-8b9e-41f2-bad0-579c14c92fafmax_seq_len", + "id": "reactflow__edge-fd1274a3-c56d-4a96-aa21-83d14c920f08vae-159bdf1b-79e7-4174-b86e-d40e646964c8vae", "type": "default", - "source": "90701a55-0a0f-444d-ab7d-ea9b7361dd44", - "target": "7187b891-8b9e-41f2-bad0-579c14c92faf", - "sourceHandle": "max_seq_len", - "targetHandle": "max_seq_len" + "source": "fd1274a3-c56d-4a96-aa21-83d14c920f08", + "target": "159bdf1b-79e7-4174-b86e-d40e646964c8", + "sourceHandle": "vae", + "targetHandle": "vae" }, { - "id": "reactflow__edge-90701a55-0a0f-444d-ab7d-ea9b7361dd44t5_encoder-7187b891-8b9e-41f2-bad0-579c14c92faft5_encoder", + "id": "reactflow__edge-fd1274a3-c56d-4a96-aa21-83d14c920f08transformer-159bdf1b-79e7-4174-b86e-d40e646964c8transformer", "type": "default", - "source": "90701a55-0a0f-444d-ab7d-ea9b7361dd44", - "target": "7187b891-8b9e-41f2-bad0-579c14c92faf", - "sourceHandle": "t5_encoder", - "targetHandle": "t5_encoder" + "source": "fd1274a3-c56d-4a96-aa21-83d14c920f08", + "target": "159bdf1b-79e7-4174-b86e-d40e646964c8", + "sourceHandle": "transformer", + "targetHandle": "transformer" }, { - "id": "reactflow__edge-90701a55-0a0f-444d-ab7d-ea9b7361dd44clip-7187b891-8b9e-41f2-bad0-579c14c92fafclip", + "id": "reactflow__edge-4754c534-a5f3-4ad0-9382-7887985e668cvalue-159bdf1b-79e7-4174-b86e-d40e646964c8seed", "type": "default", - "source": "90701a55-0a0f-444d-ab7d-ea9b7361dd44", - "target": "7187b891-8b9e-41f2-bad0-579c14c92faf", - "sourceHandle": "clip", - "targetHandle": "clip" + "source": "4754c534-a5f3-4ad0-9382-7887985e668c", + "target": "159bdf1b-79e7-4174-b86e-d40e646964c8", + "sourceHandle": "value", + "targetHandle": "seed" } ] } From 192eda72e7aec86cf6cbf20115594a051243dd6a Mon Sep 17 00:00:00 2001 From: maryhipp Date: Wed, 21 Aug 2024 14:45:21 -0400 Subject: [PATCH 062/113] fix(worker) fix T5 type --- invokeai/app/invocations/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/invokeai/app/invocations/model.py b/invokeai/app/invocations/model.py index 300e38b2312..e6beeed42dd 100644 --- a/invokeai/app/invocations/model.py +++ b/invokeai/app/invocations/model.py @@ -178,7 +178,7 @@ class FluxModelLoaderInvocation(BaseInvocation): ) t5_encoder: ModelIdentifierField = InputField( - description=FieldDescriptions.t5Encoder, + description=FieldDescriptions.t5_encoder, ui_type=UIType.T5EncoderModel, input=Input.Direct, ) From 3c861fd54f51674fc47cc9c04a5240875dc01372 Mon Sep 17 00:00:00 2001 From: maryhipp Date: Wed, 21 Aug 2024 14:49:05 -0400 Subject: [PATCH 063/113] add better workflow description --- .../workflow_records/default_workflows/Flux Text to Image.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/invokeai/app/services/workflow_records/default_workflows/Flux Text to Image.json b/invokeai/app/services/workflow_records/default_workflows/Flux Text to Image.json index 8f569b3df9d..5e0a47a8fe6 100644 --- a/invokeai/app/services/workflow_records/default_workflows/Flux Text to Image.json +++ b/invokeai/app/services/workflow_records/default_workflows/Flux Text to Image.json @@ -1,7 +1,7 @@ { "name": "Flux Text to Image", "author": "InvokeAI", - "description": "A simple text-to-image workflow using Flux Dev or Flux Schnell", + "description": "A simple text-to-image workflow using FLUX dev or schnell models. Prerequisite model downloads: T5 Encoder, CLIP-L Encoder, and FLUX VAE. These can be found in the starter models tab within your Model Manager.", "version": "1.0.0", "contact": "", "tags": "text2image, flux", From dcfdc002c6cf5d5117e6892eaf4b4c1e6629585d Mon Sep 17 00:00:00 2001 From: maryhipp Date: Wed, 21 Aug 2024 14:52:23 -0400 Subject: [PATCH 064/113] add better workflow name --- .../workflow_records/default_workflows/Flux Text to Image.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/invokeai/app/services/workflow_records/default_workflows/Flux Text to Image.json b/invokeai/app/services/workflow_records/default_workflows/Flux Text to Image.json index 5e0a47a8fe6..ccf5bc7a7b9 100644 --- a/invokeai/app/services/workflow_records/default_workflows/Flux Text to Image.json +++ b/invokeai/app/services/workflow_records/default_workflows/Flux Text to Image.json @@ -1,5 +1,5 @@ { - "name": "Flux Text to Image", + "name": "FLUX Text to Image", "author": "InvokeAI", "description": "A simple text-to-image workflow using FLUX dev or schnell models. Prerequisite model downloads: T5 Encoder, CLIP-L Encoder, and FLUX VAE. These can be found in the starter models tab within your Model Manager.", "version": "1.0.0", From f51dd365767c437871640340a61dc07a89011576 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Wed, 21 Aug 2024 19:01:11 +0000 Subject: [PATCH 065/113] Fix bug in InvokeInt8Params that was causing it to use double the necessary VRAM. --- invokeai/backend/quantization/bnb_llm_int8.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/invokeai/backend/quantization/bnb_llm_int8.py b/invokeai/backend/quantization/bnb_llm_int8.py index f196ebc43e9..b92717cbc57 100644 --- a/invokeai/backend/quantization/bnb_llm_int8.py +++ b/invokeai/backend/quantization/bnb_llm_int8.py @@ -22,7 +22,7 @@ def cuda(self, device): return super().cuda(device) elif self.CB is not None and self.SCB is not None: self.data = self.data.cuda() - self.CB = self.CB.cuda() + self.CB = self.data self.SCB = self.SCB.cuda() else: # we store the 8-bit rows-major weight From 3c9811ffa7f4d666af5586cf0c887644832ca524 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Wed, 21 Aug 2024 19:03:09 +0000 Subject: [PATCH 066/113] Update load_flux_model_bnb_llm_int8.py to work with a single-file FLUX transformer checkpoint. --- .../load_flux_model_bnb_llm_int8.py | 41 +++++++++---------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/invokeai/backend/quantization/load_flux_model_bnb_llm_int8.py b/invokeai/backend/quantization/load_flux_model_bnb_llm_int8.py index a24370967cc..47ce0f56b13 100644 --- a/invokeai/backend/quantization/load_flux_model_bnb_llm_int8.py +++ b/invokeai/backend/quantization/load_flux_model_bnb_llm_int8.py @@ -1,7 +1,8 @@ from pathlib import Path import accelerate -from diffusers.models.transformers.transformer_flux import FluxTransformer2DModel +from flux.model import Flux +from flux.util import configs as flux_configs from safetensors.torch import load_file, save_file from invokeai.backend.quantization.bnb_llm_int8 import quantize_model_llm_int8 @@ -11,30 +12,32 @@ def main(): # Load the FLUX transformer model onto the meta device. model_path = Path( - "/data/invokeai/models/.download_cache/black-forest-labs_flux.1-schnell/FLUX.1-schnell/transformer/" + "/data/invokeai/models/.download_cache/https__huggingface.co_black-forest-labs_flux.1-schnell_resolve_main_flux1-schnell.safetensors/flux1-schnell.safetensors" ) - with log_time("Initialize FLUX transformer on meta device"): - model_config = FluxTransformer2DModel.load_config(model_path, local_files_only=True) + with log_time("Intialize FLUX transformer on meta device"): + # TODO(ryand): Determine if this is a schnell model or a dev model and load the appropriate config. + params = flux_configs["flux-schnell"].params + + # Initialize the model on the "meta" device. with accelerate.init_empty_weights(): - empty_model = FluxTransformer2DModel.from_config(model_config) - assert isinstance(empty_model, FluxTransformer2DModel) + model = Flux(params) # TODO(ryand): We may want to add some modules to not quantize here (e.g. the proj_out layer). See the accelerate # `get_keys_to_not_convert(...)` function for a heuristic to determine which modules to not quantize. modules_to_not_convert: set[str] = set() - model_int8_path = model_path / "bnb_llm_int8" + model_int8_path = model_path.parent / "bnb_llm_int8.safetensors" if model_int8_path.exists(): # The quantized model already exists, load it and return it. print(f"A pre-quantized model already exists at '{model_int8_path}'. Attempting to load it...") # Replace the linear layers with LLM.int8() quantized linear layers (still on the meta device). with log_time("Replace linear layers with LLM.int8() layers"), accelerate.init_empty_weights(): - model = quantize_model_llm_int8(empty_model, modules_to_not_convert=modules_to_not_convert) + model = quantize_model_llm_int8(model, modules_to_not_convert=modules_to_not_convert) with log_time("Load state dict into model"): - sd = load_file(model_int8_path / "model.safetensors") + sd = load_file(model_int8_path) model.load_state_dict(sd, strict=True, assign=True) with log_time("Move model to cuda"): @@ -47,29 +50,23 @@ def main(): print(f"No pre-quantized model found at '{model_int8_path}'. Quantizing the model...") with log_time("Replace linear layers with LLM.int8() layers"), accelerate.init_empty_weights(): - model = quantize_model_llm_int8(empty_model, modules_to_not_convert=modules_to_not_convert) + model = quantize_model_llm_int8(model, modules_to_not_convert=modules_to_not_convert) with log_time("Load state dict into model"): - # Load sharded state dict. - files = list(model_path.glob("*.safetensors")) - state_dict = {} - for file in files: - sd = load_file(file) - state_dict.update(sd) - + state_dict = load_file(model_path) + # TODO(ryand): Cast the state_dict to the appropriate dtype? model.load_state_dict(state_dict, strict=True, assign=True) with log_time("Move model to cuda and quantize"): model = model.to("cuda") with log_time("Save quantized model"): - model_int8_path.mkdir(parents=True, exist_ok=True) - output_path = model_int8_path / "model.safetensors" - save_file(model.state_dict(), output_path) + model_int8_path.parent.mkdir(parents=True, exist_ok=True) + save_file(model.state_dict(), model_int8_path) - print(f"Successfully quantized and saved model to '{output_path}'.") + print(f"Successfully quantized and saved model to '{model_int8_path}'.") - assert isinstance(model, FluxTransformer2DModel) + assert isinstance(model, Flux) return model From 9982bc21d10a2eb167287c16e3418e911015ad10 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Wed, 21 Aug 2024 19:04:04 +0000 Subject: [PATCH 067/113] Add docs to the quantization scripts. --- .../backend/quantization/load_flux_model_bnb_llm_int8.py | 5 +++++ invokeai/backend/quantization/load_flux_model_bnb_nf4.py | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/invokeai/backend/quantization/load_flux_model_bnb_llm_int8.py b/invokeai/backend/quantization/load_flux_model_bnb_llm_int8.py index 47ce0f56b13..c01193e6ac2 100644 --- a/invokeai/backend/quantization/load_flux_model_bnb_llm_int8.py +++ b/invokeai/backend/quantization/load_flux_model_bnb_llm_int8.py @@ -10,6 +10,11 @@ def main(): + """A script for quantizing a FLUX transformer model using the bitsandbytes LLM.int8() quantization method. + + This script is primarily intended for reference. The script params (e.g. the model_path, modules_to_not_convert, + etc.) are hardcoded and would need to be modified for other use cases. + """ # Load the FLUX transformer model onto the meta device. model_path = Path( "/data/invokeai/models/.download_cache/https__huggingface.co_black-forest-labs_flux.1-schnell_resolve_main_flux1-schnell.safetensors/flux1-schnell.safetensors" diff --git a/invokeai/backend/quantization/load_flux_model_bnb_nf4.py b/invokeai/backend/quantization/load_flux_model_bnb_nf4.py index 80f3e71901e..fe88b79d328 100644 --- a/invokeai/backend/quantization/load_flux_model_bnb_nf4.py +++ b/invokeai/backend/quantization/load_flux_model_bnb_nf4.py @@ -23,6 +23,11 @@ def log_time(name: str): def main(): + """A script for quantizing a FLUX transformer model using the bitsandbytes NF4 quantization method. + + This script is primarily intended for reference. The script params (e.g. the model_path, modules_to_not_convert, + etc.) are hardcoded and would need to be modified for other use cases. + """ model_path = Path( "/data/invokeai/models/.download_cache/https__huggingface.co_black-forest-labs_flux.1-schnell_resolve_main_flux1-schnell.safetensors/flux1-schnell.safetensors" ) From c5c60f5f17a51e1bcf3b28ce11f3fa336a8f1ab4 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Wed, 21 Aug 2024 19:17:39 +0000 Subject: [PATCH 068/113] Fix max_seq_len field description. --- invokeai/app/invocations/model.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/invokeai/app/invocations/model.py b/invokeai/app/invocations/model.py index e6beeed42dd..6f542810758 100644 --- a/invokeai/app/invocations/model.py +++ b/invokeai/app/invocations/model.py @@ -164,7 +164,10 @@ class FluxModelLoaderOutput(BaseInvocationOutput): clip: CLIPField = OutputField(description=FieldDescriptions.clip, title="CLIP") t5_encoder: T5EncoderField = OutputField(description=FieldDescriptions.t5_encoder, title="T5 Encoder") vae: VAEField = OutputField(description=FieldDescriptions.vae, title="VAE") - max_seq_len: Literal[256, 512] = OutputField(description=FieldDescriptions.vae, title="Max Seq Length") + max_seq_len: Literal[256, 512] = OutputField( + description="The max sequence length to used for the T5 encoder. (256 for schnell transformer, 512 for dev transformer)", + title="Max Seq Length", + ) @invocation("flux_model_loader", title="Flux Main Model", tags=["model", "flux"], category="model", version="1.0.3") From 5f3e3256e847dad8f812f99607822255c8057ecd Mon Sep 17 00:00:00 2001 From: Brandon Rising Date: Wed, 21 Aug 2024 15:34:34 -0400 Subject: [PATCH 069/113] Remove automatic install of models during flux model loader, remove no longer used import function on context --- invokeai/app/invocations/model.py | 56 +++++-------------- .../app/services/shared/invocation_context.py | 23 -------- 2 files changed, 13 insertions(+), 66 deletions(-) diff --git a/invokeai/app/invocations/model.py b/invokeai/app/invocations/model.py index 6f542810758..75ed9fceab5 100644 --- a/invokeai/app/invocations/model.py +++ b/invokeai/app/invocations/model.py @@ -141,21 +141,6 @@ def invoke(self, context: InvocationContext) -> ModelIdentifierOutput: return ModelIdentifierOutput(model=self.model) -T5_ENCODER_OPTIONS = Literal["base", "8b_quantized"] -T5_ENCODER_MAP: Dict[str, Dict[str, str]] = { - "base": { - "repo": "InvokeAI/flux_schnell::t5_xxl_encoder/base", - "name": "t5_base_encoder", - "format": ModelFormat.T5Encoder, - }, - "8b_quantized": { - "repo": "invokeai/flux_schnell::t5_xxl_encoder/optimum_quanto_qfloat8", - "name": "t5_8b_quantized_encoder", - "format": ModelFormat.T5Encoder8b, - }, -} - - @invocation_output("flux_model_loader_output") class FluxModelLoaderOutput(BaseInvocationOutput): """Flux base model loader output""" @@ -196,15 +181,7 @@ def invoke(self, context: InvocationContext) -> FluxModelLoaderOutput: tokenizer2 = self._get_model(context, SubModelType.Tokenizer2) clip_encoder = self._get_model(context, SubModelType.TextEncoder) t5_encoder = self._get_model(context, SubModelType.TextEncoder2) - vae = self._install_model( - context, - SubModelType.VAE, - "FLUX.1-schnell_ae", - "black-forest-labs/FLUX.1-schnell::ae.safetensors", - ModelFormat.Checkpoint, - ModelType.VAE, - BaseModelType.Flux, - ) + vae = self._get_model(context, SubModelType.VAE) transformer_config = context.models.get_config(transformer) assert isinstance(transformer_config, CheckpointConfigBase) legacy_config_path = context.config.get().legacy_conf_path / transformer_config.config_path @@ -224,36 +201,38 @@ def _get_model(self, context: InvocationContext, submodel: SubModelType) -> Mode match submodel: case SubModelType.Transformer: return self.model.model_copy(update={"submodel_type": SubModelType.Transformer}) + case SubModelType.VAE: + return self._pull_model_from_mm( + context, + SubModelType.VAE, + "FLUX.1-schnell_ae", + ModelType.VAE, + BaseModelType.Flux, + ) case submodel if submodel in [SubModelType.Tokenizer, SubModelType.TextEncoder]: - return self._install_model( + return self._pull_model_from_mm( context, submodel, "clip-vit-large-patch14", - "openai/clip-vit-large-patch14", - ModelFormat.Diffusers, ModelType.CLIPEmbed, BaseModelType.Any, ) case submodel if submodel in [SubModelType.Tokenizer2, SubModelType.TextEncoder2]: - return self._install_model( + return self._pull_model_from_mm( context, submodel, self.t5_encoder.name, - "", - ModelFormat.T5Encoder, ModelType.T5Encoder, BaseModelType.Any, ) case _: raise Exception(f"{submodel.value} is not a supported submodule for a flux model") - def _install_model( + def _pull_model_from_mm( self, context: InvocationContext, submodel: SubModelType, name: str, - repo_id: str, - format: ModelFormat, type: ModelType, base: BaseModelType, ): @@ -262,16 +241,7 @@ def _install_model( raise Exception(f"Multiple models detected for selected model with name {name}") return ModelIdentifierField.from_config(models[0]).model_copy(update={"submodel_type": submodel}) else: - model_path = context.models.download_and_cache_model(repo_id) - config = ModelRecordChanges(name=name, base=base, type=type, format=format) - model_install_job = context.models.import_local_model(model_path=model_path, config=config) - while not model_install_job.in_terminal_state: - sleep(0.01) - if not model_install_job.config_out: - raise Exception(f"Failed to install {name}") - return ModelIdentifierField.from_config(model_install_job.config_out).model_copy( - update={"submodel_type": submodel} - ) + raise ValueError(f"Please install the {base}:{type} model named {name} via starter models") @invocation( diff --git a/invokeai/app/services/shared/invocation_context.py b/invokeai/app/services/shared/invocation_context.py index 23189b85ab2..3f378b663e9 100644 --- a/invokeai/app/services/shared/invocation_context.py +++ b/invokeai/app/services/shared/invocation_context.py @@ -464,29 +464,6 @@ def download_and_cache_model( """ return self._services.model_manager.install.download_and_cache_model(source=source) - def import_local_model( - self, - model_path: Path, - config: Optional[ModelRecordChanges] = None, - inplace: Optional[bool] = False, - ): - """ - Import the model file located at the given local file path and return its ModelInstallJob. - - This can be used to single-file models or directories. - - Args: - model_path: A pathlib.Path object pointing to a model file or directory - config: Optional ModelRecordChanges to define manual probe overrides - inplace: Optional boolean to declare whether or not to install the model in the models dir - - Returns: - ModelInstallJob object defining the install job to be used in tracking the job - """ - if not model_path.exists(): - raise ValueError(f"Models provided to import_local_model must already exist on disk at {model_path.as_posix()}") - return self._services.model_manager.install.heuristic_import(str(model_path), config=config, inplace=inplace) - def load_local_model( self, model_path: Path, From bc6e1baff3590080affde218031c28723aecb840 Mon Sep 17 00:00:00 2001 From: Brandon Rising Date: Wed, 21 Aug 2024 15:37:27 -0400 Subject: [PATCH 070/113] Run ruff --- invokeai/app/invocations/model.py | 5 +---- .../app/services/shared/invocation_context.py | 1 - .../model_manager/load/model_loaders/flux.py | 20 ++++++++++++++----- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/invokeai/app/invocations/model.py b/invokeai/app/invocations/model.py index 75ed9fceab5..eadf3002d5d 100644 --- a/invokeai/app/invocations/model.py +++ b/invokeai/app/invocations/model.py @@ -1,6 +1,5 @@ import copy -from time import sleep -from typing import Dict, List, Literal, Optional +from typing import List, Literal, Optional import yaml from pydantic import BaseModel, Field @@ -13,14 +12,12 @@ invocation_output, ) from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, OutputField, UIType -from invokeai.app.services.model_records import ModelRecordChanges from invokeai.app.services.shared.invocation_context import InvocationContext from invokeai.app.shared.models import FreeUConfig from invokeai.backend.model_manager.config import ( AnyModelConfig, BaseModelType, CheckpointConfigBase, - ModelFormat, ModelType, SubModelType, ) diff --git a/invokeai/app/services/shared/invocation_context.py b/invokeai/app/services/shared/invocation_context.py index 3f378b663e9..01662335e46 100644 --- a/invokeai/app/services/shared/invocation_context.py +++ b/invokeai/app/services/shared/invocation_context.py @@ -13,7 +13,6 @@ from invokeai.app.services.image_records.image_records_common import ImageCategory, ResourceOrigin from invokeai.app.services.images.images_common import ImageDTO from invokeai.app.services.invocation_services import InvocationServices -from invokeai.app.services.model_records import ModelRecordChanges from invokeai.app.services.model_records.model_records_base import UnknownModelException from invokeai.app.util.step_callback import stable_diffusion_step_callback from invokeai.backend.model_manager.config import ( diff --git a/invokeai/backend/model_manager/load/model_loaders/flux.py b/invokeai/backend/model_manager/load/model_loaders/flux.py index ebc3333eea9..bb57e4413c5 100644 --- a/invokeai/backend/model_manager/load/model_loaders/flux.py +++ b/invokeai/backend/model_manager/load/model_loaders/flux.py @@ -87,7 +87,9 @@ def _load_model( case SubModelType.TextEncoder: return CLIPTextModel.from_pretrained(config.path) - raise ValueError(f"Only Tokenizer and TextEncoder submodels are currently supported. Received: {submodel_type.value if submodel_type else 'None'}") + raise ValueError( + f"Only Tokenizer and TextEncoder submodels are currently supported. Received: {submodel_type.value if submodel_type else 'None'}" + ) @ModelLoaderRegistry.register(base=BaseModelType.Any, type=ModelType.T5Encoder, format=ModelFormat.T5Encoder8b) @@ -108,7 +110,9 @@ def _load_model( case SubModelType.TextEncoder2: return FastQuantizedTransformersModel.from_pretrained(Path(config.path) / "text_encoder_2") - raise ValueError(f"Only Tokenizer and TextEncoder submodels are currently supported. Received: {submodel_type.value if submodel_type else 'None'}") + raise ValueError( + f"Only Tokenizer and TextEncoder submodels are currently supported. Received: {submodel_type.value if submodel_type else 'None'}" + ) @ModelLoaderRegistry.register(base=BaseModelType.Any, type=ModelType.T5Encoder, format=ModelFormat.T5Encoder) @@ -131,7 +135,9 @@ def _load_model( Path(config.path) / "text_encoder_2" ) # TODO: Fix hf subfolder install - raise ValueError(f"Only Tokenizer and TextEncoder submodels are currently supported. Received: {submodel_type.value if submodel_type else 'None'}") + raise ValueError( + f"Only Tokenizer and TextEncoder submodels are currently supported. Received: {submodel_type.value if submodel_type else 'None'}" + ) @ModelLoaderRegistry.register(base=BaseModelType.Flux, type=ModelType.Main, format=ModelFormat.Checkpoint) @@ -154,7 +160,9 @@ def _load_model( case SubModelType.Transformer: return self._load_from_singlefile(config, flux_conf) - raise ValueError(f"Only Transformer submodels are currently supported. Received: {submodel_type.value if submodel_type else 'None'}") + raise ValueError( + f"Only Transformer submodels are currently supported. Received: {submodel_type.value if submodel_type else 'None'}" + ) def _load_from_singlefile( self, @@ -194,7 +202,9 @@ def _load_model( case SubModelType.Transformer: return self._load_from_singlefile(config, flux_conf) - raise ValueError(f"Only Transformer submodels are currently supported. Received: {submodel_type.value if submodel_type else 'None'}") + raise ValueError( + f"Only Transformer submodels are currently supported. Received: {submodel_type.value if submodel_type else 'None'}" + ) def _load_from_singlefile( self, From 407796c8ec38bd8206a8dc203bca3eb47a15bea9 Mon Sep 17 00:00:00 2001 From: Brandon Rising Date: Wed, 21 Aug 2024 15:44:38 -0400 Subject: [PATCH 071/113] Undo changes to the v2 dir of frontend types --- .../web/src/features/nodes/types/v2/common.ts | 2 +- .../web/src/features/nodes/types/v2/field.ts | 17 ----------------- 2 files changed, 1 insertion(+), 18 deletions(-) diff --git a/invokeai/frontend/web/src/features/nodes/types/v2/common.ts b/invokeai/frontend/web/src/features/nodes/types/v2/common.ts index 64d4db04515..8613076132d 100644 --- a/invokeai/frontend/web/src/features/nodes/types/v2/common.ts +++ b/invokeai/frontend/web/src/features/nodes/types/v2/common.ts @@ -44,7 +44,7 @@ export const zSchedulerField = z.enum([ // #endregion // #region Model-related schemas -const zBaseModel = z.enum(['any', 'sd-1', 'sd-2', 'sdxl', 'sdxl-refiner', 'flux']); +const zBaseModel = z.enum(['any', 'sd-1', 'sd-2', 'sdxl', 'sdxl-refiner']); const zModelName = z.string().min(3); export const zModelIdentifier = z.object({ model_name: zModelName, diff --git a/invokeai/frontend/web/src/features/nodes/types/v2/field.ts b/invokeai/frontend/web/src/features/nodes/types/v2/field.ts index a02a9985089..4b680d1de33 100644 --- a/invokeai/frontend/web/src/features/nodes/types/v2/field.ts +++ b/invokeai/frontend/web/src/features/nodes/types/v2/field.ts @@ -203,20 +203,6 @@ const zSDXLMainModelFieldOutputInstance = zFieldOutputInstanceBase.extend({ }); // #endregion -// #region FluxMainModelField -const zFluxMainModelFieldType = zFieldTypeBase.extend({ - name: z.literal('FluxMainModelField'), -}); -const zFluxMainModelFieldValue = zMainModelFieldValue; // TODO: Narrow to SDXL models only. -const zFluxMainModelFieldInputInstance = zFieldInputInstanceBase.extend({ - type: zFluxMainModelFieldType, - value: zFluxMainModelFieldValue, -}); -const zFluxMainModelFieldOutputInstance = zFieldOutputInstanceBase.extend({ - type: zFluxMainModelFieldType, -}); -// #endregion - // #region SDXLRefinerModelField const zSDXLRefinerModelFieldType = zFieldTypeBase.extend({ name: z.literal('SDXLRefinerModelField'), @@ -352,7 +338,6 @@ const zStatefulFieldType = z.union([ zBoardFieldType, zMainModelFieldType, zSDXLMainModelFieldType, - zFluxMainModelFieldType, zSDXLRefinerModelFieldType, zVAEModelFieldType, zLoRAModelFieldType, @@ -392,7 +377,6 @@ const zStatefulFieldInputInstance = z.union([ zBoardFieldInputInstance, zMainModelFieldInputInstance, zSDXLMainModelFieldInputInstance, - zFluxMainModelFieldInputInstance, zSDXLRefinerModelFieldInputInstance, zVAEModelFieldInputInstance, zLoRAModelFieldInputInstance, @@ -417,7 +401,6 @@ const zStatefulFieldOutputInstance = z.union([ zBoardFieldOutputInstance, zMainModelFieldOutputInstance, zSDXLMainModelFieldOutputInstance, - zFluxMainModelFieldOutputInstance, zSDXLRefinerModelFieldOutputInstance, zVAEModelFieldOutputInstance, zLoRAModelFieldOutputInstance, From d4ec4341e971892c613ffc5412a3859ae9d560d2 Mon Sep 17 00:00:00 2001 From: maryhipp Date: Wed, 21 Aug 2024 15:46:35 -0400 Subject: [PATCH 072/113] added FLUX dev to starter models --- invokeai/backend/model_manager/starter_models.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/invokeai/backend/model_manager/starter_models.py b/invokeai/backend/model_manager/starter_models.py index 8be46882a06..7d5233d767d 100644 --- a/invokeai/backend/model_manager/starter_models.py +++ b/invokeai/backend/model_manager/starter_models.py @@ -96,6 +96,14 @@ class StarterModel(StarterModelWithoutDependencies): type=ModelType.Main, dependencies=[t5_8b_quantized_encoder, flux_vae, clip_l_encoder], ), + StarterModel( + name="FLUX Dev (Quantized)", + base=BaseModelType.Flux, + source="InvokeAI/flux_dev::transformer/bnb_nf4/flux1-dev-bnb_nf4.safetensors", + description="FLUX dev transformer quantized to bitsandbytes NF4 format. Total size with dependencies: ~16GB", + type=ModelType.Main, + dependencies=[t5_8b_quantized_encoder, flux_vae, clip_l_encoder], + ), StarterModel( name="FLUX Schnell", base=BaseModelType.Flux, @@ -104,6 +112,14 @@ class StarterModel(StarterModelWithoutDependencies): type=ModelType.Main, dependencies=[t5_base_encoder, flux_vae, clip_l_encoder], ), + StarterModel( + name="FLUX Dev", + base=BaseModelType.Flux, + source="InvokeAI/flux_dev::transformer/base/flux1-dev.safetensors", + description="FLUX dev transformer in bfloat16. Total size with dependencies: ~34GB", + type=ModelType.Main, + dependencies=[t5_base_encoder, flux_vae, clip_l_encoder], + ), StarterModel( name="CyberRealistic v4.1", base=BaseModelType.StableDiffusion1, From 374dc8298336ca9ebc34f6d4a26c086f88e1deb8 Mon Sep 17 00:00:00 2001 From: Brandon Rising Date: Wed, 21 Aug 2024 15:48:21 -0400 Subject: [PATCH 073/113] Don't install bitsandbytes on macOS --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 6b22b45babf..5be3117af30 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,7 +34,7 @@ classifiers = [ dependencies = [ # Core generation dependencies, pinned for reproducible builds. "accelerate==0.33.0", - "bitsandbytes==0.43.3", + "bitsandbytes==0.43.3; sys_platform!='darwin'", "clip_anytorch==2.6.0", # replacing "clip @ https://github.com/openai/CLIP/archive/eaa22acb90a5876642d0507623e859909230a52d.zip", "compel==2.0.2", "controlnet-aux==0.0.7", From 80a46d292ee0318bc626d7cdf0c11fddcd9f25ea Mon Sep 17 00:00:00 2001 From: Brandon Rising Date: Wed, 21 Aug 2024 15:53:58 -0400 Subject: [PATCH 074/113] Attribute black-forest-labs/flux for much of the flux code --- invokeai/backend/flux/math.py | 2 ++ invokeai/backend/flux/model.py | 2 ++ invokeai/backend/flux/modules/autoencoder.py | 2 ++ invokeai/backend/flux/modules/conditioner.py | 2 ++ invokeai/backend/flux/modules/layers.py | 2 ++ invokeai/backend/flux/sampling.py | 2 ++ 6 files changed, 12 insertions(+) diff --git a/invokeai/backend/flux/math.py b/invokeai/backend/flux/math.py index 0156bb6a205..aa719b7c072 100644 --- a/invokeai/backend/flux/math.py +++ b/invokeai/backend/flux/math.py @@ -1,3 +1,5 @@ +# Initially pulled from https://github.com/black-forest-labs/flux + import torch from einops import rearrange from torch import Tensor diff --git a/invokeai/backend/flux/model.py b/invokeai/backend/flux/model.py index f7ef25bf4fa..5358ddf0bc0 100644 --- a/invokeai/backend/flux/model.py +++ b/invokeai/backend/flux/model.py @@ -1,3 +1,5 @@ +# Initially pulled from https://github.com/black-forest-labs/flux + from dataclasses import dataclass import torch diff --git a/invokeai/backend/flux/modules/autoencoder.py b/invokeai/backend/flux/modules/autoencoder.py index 75159f711f6..ae003261e7f 100644 --- a/invokeai/backend/flux/modules/autoencoder.py +++ b/invokeai/backend/flux/modules/autoencoder.py @@ -1,3 +1,5 @@ +# Initially pulled from https://github.com/black-forest-labs/flux + from dataclasses import dataclass import torch diff --git a/invokeai/backend/flux/modules/conditioner.py b/invokeai/backend/flux/modules/conditioner.py index 974ad64ab3a..de6d8256c4f 100644 --- a/invokeai/backend/flux/modules/conditioner.py +++ b/invokeai/backend/flux/modules/conditioner.py @@ -1,3 +1,5 @@ +# Initially pulled from https://github.com/black-forest-labs/flux + from torch import Tensor, nn from transformers import PreTrainedModel, PreTrainedTokenizer diff --git a/invokeai/backend/flux/modules/layers.py b/invokeai/backend/flux/modules/layers.py index d93dddba0fc..23dc2448d3c 100644 --- a/invokeai/backend/flux/modules/layers.py +++ b/invokeai/backend/flux/modules/layers.py @@ -1,3 +1,5 @@ +# Initially pulled from https://github.com/black-forest-labs/flux + import math from dataclasses import dataclass diff --git a/invokeai/backend/flux/sampling.py b/invokeai/backend/flux/sampling.py index 675728a94b0..9917d63a8ba 100644 --- a/invokeai/backend/flux/sampling.py +++ b/invokeai/backend/flux/sampling.py @@ -1,3 +1,5 @@ +# Initially pulled from https://github.com/black-forest-labs/flux + import math from typing import Callable From 5406a2f96735b97d264f3d582fa7008352100e38 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Thu, 22 Aug 2024 15:29:59 +0000 Subject: [PATCH 075/113] Mark FLUX nodes as prototypes. --- invokeai/app/invocations/flux_text_encoder.py | 3 ++- invokeai/app/invocations/flux_text_to_image.py | 3 ++- invokeai/app/invocations/model.py | 9 ++++++++- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/invokeai/app/invocations/flux_text_encoder.py b/invokeai/app/invocations/flux_text_encoder.py index 54c6ff2b332..d3585c66a64 100644 --- a/invokeai/app/invocations/flux_text_encoder.py +++ b/invokeai/app/invocations/flux_text_encoder.py @@ -3,7 +3,7 @@ import torch from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5Tokenizer -from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation +from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, invocation from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField from invokeai.app.invocations.model import CLIPField, T5EncoderField from invokeai.app.invocations.primitives import ConditioningOutput @@ -18,6 +18,7 @@ tags=["prompt", "conditioning", "flux"], category="conditioning", version="1.0.0", + classification=Classification.Prototype, ) class FluxTextEncoderInvocation(BaseInvocation): """Encodes and preps a prompt for a flux image.""" diff --git a/invokeai/app/invocations/flux_text_to_image.py b/invokeai/app/invocations/flux_text_to_image.py index d2789b86f04..93d763428bb 100644 --- a/invokeai/app/invocations/flux_text_to_image.py +++ b/invokeai/app/invocations/flux_text_to_image.py @@ -2,7 +2,7 @@ from einops import rearrange, repeat from PIL import Image -from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation +from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, invocation from invokeai.app.invocations.fields import ( ConditioningField, FieldDescriptions, @@ -28,6 +28,7 @@ tags=["image", "flux"], category="image", version="1.0.0", + classification=Classification.Prototype, ) class FluxTextToImageInvocation(BaseInvocation, WithMetadata, WithBoard): """Text-to-image generation using a FLUX model.""" diff --git a/invokeai/app/invocations/model.py b/invokeai/app/invocations/model.py index eadf3002d5d..756686b548a 100644 --- a/invokeai/app/invocations/model.py +++ b/invokeai/app/invocations/model.py @@ -152,7 +152,14 @@ class FluxModelLoaderOutput(BaseInvocationOutput): ) -@invocation("flux_model_loader", title="Flux Main Model", tags=["model", "flux"], category="model", version="1.0.3") +@invocation( + "flux_model_loader", + title="Flux Main Model", + tags=["model", "flux"], + category="model", + version="1.0.3", + classification=Classification.Prototype, +) class FluxModelLoaderInvocation(BaseInvocation): """Loads a flux base model, outputting its submodels.""" From c9c4e47a2a37275bf2cc7bc0661529297b0b10bb Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Thu, 22 Aug 2024 15:56:30 +0000 Subject: [PATCH 076/113] Make FLUX get_noise(...) consistent across devices/dtypes. --- invokeai/app/invocations/flux_text_to_image.py | 2 -- invokeai/backend/flux/sampling.py | 11 +++++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/invokeai/app/invocations/flux_text_to_image.py b/invokeai/app/invocations/flux_text_to_image.py index 93d763428bb..8b947b9a8db 100644 --- a/invokeai/app/invocations/flux_text_to_image.py +++ b/invokeai/app/invocations/flux_text_to_image.py @@ -79,8 +79,6 @@ def _run_diffusion( inference_dtype = torch.bfloat16 # Prepare input noise. - # TODO(ryand): Does the seed behave the same on different devices? Should we re-implement this to always use a - # CPU RNG? x = get_noise( num_samples=1, height=self.height, diff --git a/invokeai/backend/flux/sampling.py b/invokeai/backend/flux/sampling.py index 9917d63a8ba..82abc0e561a 100644 --- a/invokeai/backend/flux/sampling.py +++ b/invokeai/backend/flux/sampling.py @@ -20,16 +20,19 @@ def get_noise( dtype: torch.dtype, seed: int, ): + # We always generate noise on the same device and dtype then cast to ensure consistency across devices/dtypes. + rand_device = "cpu" + rand_dtype = torch.float16 return torch.randn( num_samples, 16, # allow for packing 2 * math.ceil(height / 16), 2 * math.ceil(width / 16), - device=device, - dtype=dtype, - generator=torch.Generator(device=device).manual_seed(seed), - ) + device=rand_device, + dtype=rand_dtype, + generator=torch.Generator(device=rand_device).manual_seed(seed), + ).to(device=device, dtype=dtype) def prepare(t5: HFEncoder, clip: HFEncoder, img: Tensor, prompt: str | list[str]) -> dict[str, Tensor]: From 22a3b3d99677822c8edff07925f0a902ac8265a5 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Thu, 22 Aug 2024 16:03:54 +0000 Subject: [PATCH 077/113] Tidy is_schnell detection logic. --- invokeai/app/invocations/flux_text_to_image.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/invokeai/app/invocations/flux_text_to_image.py b/invokeai/app/invocations/flux_text_to_image.py index 8b947b9a8db..f29b3dd3091 100644 --- a/invokeai/app/invocations/flux_text_to_image.py +++ b/invokeai/app/invocations/flux_text_to_image.py @@ -17,7 +17,6 @@ from invokeai.backend.flux.model import Flux from invokeai.backend.flux.modules.autoencoder import AutoEncoder from invokeai.backend.flux.sampling import denoise, get_noise, get_schedule, unpack -from invokeai.backend.model_manager.config import CheckpointConfigBase from invokeai.backend.stable_diffusion.diffusion.conditioning_data import FLUXConditioningInfo from invokeai.backend.util.devices import TorchDevice @@ -90,12 +89,8 @@ def _run_diffusion( img, img_ids = self._prepare_latent_img_patches(x) - # HACK(ryand): Find a better way to determine if this is a schnell model or not. - is_schnell = ( - "schnell" in transformer_info.config.config_path - if transformer_info.config and isinstance(transformer_info.config, CheckpointConfigBase) - else "" - ) + is_schnell = "schnell" in transformer_info.config.config_path + timesteps = get_schedule( num_steps=self.num_steps, image_seq_len=img.shape[1], From 5307a6ff900079f8e8f36ea3c9faf626da5fb2ab Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Thu, 22 Aug 2024 16:09:46 +0000 Subject: [PATCH 078/113] Add comment about incorrect T5 Tokenizer size calculation. --- invokeai/backend/model_manager/load/model_util.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/invokeai/backend/model_manager/load/model_util.py b/invokeai/backend/model_manager/load/model_util.py index 6f93fcbd759..4b8b5a8dded 100644 --- a/invokeai/backend/model_manager/load/model_util.py +++ b/invokeai/backend/model_manager/load/model_util.py @@ -57,6 +57,9 @@ def calc_model_size_by_data(logger: logging.Logger, model: AnyModel) -> int: T5Tokenizer, ), ): + # HACK(ryand): len(model) just returns the vocabulary size, so this is blatantly wrong. It should be small + # relative to the text encoder that it's used with, so shouldn't matter too much, but we should fix this at some + # point. return len(model) else: # TODO(ryand): Promote this from a log to an exception once we are confident that we are handling all of the From 5e9ef4bab332dd5d71166e52e2f7d3c327e156d9 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Thu, 22 Aug 2024 16:18:33 +0000 Subject: [PATCH 079/113] Rename field positive_prompt -> prompt. --- invokeai/app/invocations/flux_text_encoder.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/invokeai/app/invocations/flux_text_encoder.py b/invokeai/app/invocations/flux_text_encoder.py index d3585c66a64..ae3456be061 100644 --- a/invokeai/app/invocations/flux_text_encoder.py +++ b/invokeai/app/invocations/flux_text_encoder.py @@ -36,7 +36,7 @@ class FluxTextEncoderInvocation(BaseInvocation): t5_max_seq_len: Literal[256, 512] = InputField( description="Max sequence length for the T5 encoder. Expected to be 256 for FLUX schnell models and 512 for FLUX dev models." ) - positive_prompt: str = InputField(description="Positive prompt for text-to-image generation.") + prompt: str = InputField(description="Text prompt to encode.") # TODO(ryand): Should we create a new return type for this invocation? This ConditioningOutput is clearly not # compatible with other ConditioningOutputs. @@ -73,7 +73,7 @@ def _encode_prompt(self, context: InvocationContext) -> tuple[torch.Tensor, torc clip_encoder = HFEncoder(clip_text_encoder, clip_tokenizer, True, 77) t5_encoder = HFEncoder(t5_text_encoder, t5_tokenizer, False, self.t5_max_seq_len) - prompt = [self.positive_prompt] + prompt = [self.prompt] prompt_embeds = t5_encoder(prompt) pooled_prompt_embeds = clip_encoder(prompt) From 0e9f6f7373bf7d7fe736c79eb136a6c6a040e1e5 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Thu, 22 Aug 2024 17:18:43 +0000 Subject: [PATCH 080/113] Move prepare_latent_image_patches(...) to sampling.py with all of the related FLUX inference code. --- .../app/invocations/flux_text_to_image.py | 30 ++----------------- invokeai/backend/flux/sampling.py | 25 ++++++++++++++++ 2 files changed, 28 insertions(+), 27 deletions(-) diff --git a/invokeai/app/invocations/flux_text_to_image.py b/invokeai/app/invocations/flux_text_to_image.py index f29b3dd3091..2e80afc1e46 100644 --- a/invokeai/app/invocations/flux_text_to_image.py +++ b/invokeai/app/invocations/flux_text_to_image.py @@ -1,5 +1,5 @@ import torch -from einops import rearrange, repeat +from einops import rearrange from PIL import Image from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, invocation @@ -16,7 +16,7 @@ from invokeai.app.services.shared.invocation_context import InvocationContext from invokeai.backend.flux.model import Flux from invokeai.backend.flux.modules.autoencoder import AutoEncoder -from invokeai.backend.flux.sampling import denoise, get_noise, get_schedule, unpack +from invokeai.backend.flux.sampling import denoise, get_noise, get_schedule, prepare_latent_img_patches, unpack from invokeai.backend.stable_diffusion.diffusion.conditioning_data import FLUXConditioningInfo from invokeai.backend.util.devices import TorchDevice @@ -87,7 +87,7 @@ def _run_diffusion( seed=self.seed, ) - img, img_ids = self._prepare_latent_img_patches(x) + img, img_ids = prepare_latent_img_patches(x) is_schnell = "schnell" in transformer_info.config.config_path @@ -123,30 +123,6 @@ def _run_diffusion( return x - def _prepare_latent_img_patches(self, latent_img: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: - """Convert an input image in latent space to patches for diffusion. - - This implementation was extracted from: - https://github.com/black-forest-labs/flux/blob/c00d7c60b085fce8058b9df845e036090873f2ce/src/flux/sampling.py#L32 - - Returns: - tuple[Tensor, Tensor]: (img, img_ids), as defined in the original flux repo. - """ - bs, c, h, w = latent_img.shape - - # Pixel unshuffle with a scale of 2, and flatten the height/width dimensions to get an array of patches. - img = rearrange(latent_img, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=2, pw=2) - if img.shape[0] == 1 and bs > 1: - img = repeat(img, "1 ... -> bs ...", bs=bs) - - # Generate patch position ids. - img_ids = torch.zeros(h // 2, w // 2, 3, device=img.device) - img_ids[..., 1] = img_ids[..., 1] + torch.arange(h // 2, device=img.device)[:, None] - img_ids[..., 2] = img_ids[..., 2] + torch.arange(w // 2, device=img.device)[None, :] - img_ids = repeat(img_ids, "h w c -> b (h w) c", b=bs) - - return img, img_ids - def _run_vae_decoding( self, context: InvocationContext, diff --git a/invokeai/backend/flux/sampling.py b/invokeai/backend/flux/sampling.py index 82abc0e561a..318a0bcdce9 100644 --- a/invokeai/backend/flux/sampling.py +++ b/invokeai/backend/flux/sampling.py @@ -147,3 +147,28 @@ def unpack(x: Tensor, height: int, width: int) -> Tensor: ph=2, pw=2, ) + + +def prepare_latent_img_patches(latent_img: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: + """Convert an input image in latent space to patches for diffusion. + + This implementation was extracted from: + https://github.com/black-forest-labs/flux/blob/c00d7c60b085fce8058b9df845e036090873f2ce/src/flux/sampling.py#L32 + + Returns: + tuple[Tensor, Tensor]: (img, img_ids), as defined in the original flux repo. + """ + bs, c, h, w = latent_img.shape + + # Pixel unshuffle with a scale of 2, and flatten the height/width dimensions to get an array of patches. + img = rearrange(latent_img, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=2, pw=2) + if img.shape[0] == 1 and bs > 1: + img = repeat(img, "1 ... -> bs ...", bs=bs) + + # Generate patch position ids. + img_ids = torch.zeros(h // 2, w // 2, 3, device=img.device) + img_ids[..., 1] = img_ids[..., 1] + torch.arange(h // 2, device=img.device)[:, None] + img_ids[..., 2] = img_ids[..., 2] + torch.arange(w // 2, device=img.device)[None, :] + img_ids = repeat(img_ids, "h w c -> b (h w) c", b=bs) + + return img, img_ids From b5c937e7908f11848a92df3ad20b103b36969919 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Thu, 22 Aug 2024 18:16:43 +0000 Subject: [PATCH 081/113] Run FLUX VAE decoding in the user's preferred dtype rather than float32. Tested, and seems to work well at float16. --- invokeai/app/invocations/flux_text_to_image.py | 5 +---- invokeai/backend/model_manager/load/model_loaders/flux.py | 1 + 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/invokeai/app/invocations/flux_text_to_image.py b/invokeai/app/invocations/flux_text_to_image.py index 2e80afc1e46..9504abee3e4 100644 --- a/invokeai/app/invocations/flux_text_to_image.py +++ b/invokeai/app/invocations/flux_text_to_image.py @@ -131,10 +131,7 @@ def _run_vae_decoding( vae_info = context.models.load(self.vae.vae) with vae_info as vae: assert isinstance(vae, AutoEncoder) - # TODO(ryand): Test that this works with both float16 and bfloat16. - # with torch.autocast(device_type=latents.device.type, dtype=torch.float32): - vae.to(torch.float32) - latents.to(torch.float32) + latents = latents.to(dtype=TorchDevice.choose_torch_dtype()) img = vae.decode(latents) img = img.clamp(-1, 1) diff --git a/invokeai/backend/model_manager/load/model_loaders/flux.py b/invokeai/backend/model_manager/load/model_loaders/flux.py index bb57e4413c5..e37b12c4f78 100644 --- a/invokeai/backend/model_manager/load/model_loaders/flux.py +++ b/invokeai/backend/model_manager/load/model_loaders/flux.py @@ -65,6 +65,7 @@ def _load_model( model = AutoEncoder(params) sd = load_file(model_path) model.load_state_dict(sd, assign=True) + model.to(dtype=self._torch_dtype) return model From f34a9236303a3517c00daa669e7283ab0d37ec85 Mon Sep 17 00:00:00 2001 From: Brandon Rising Date: Thu, 22 Aug 2024 16:28:09 -0400 Subject: [PATCH 082/113] Update macos test vm to macOS-14 --- .github/workflows/python-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml index 8805989a69b..f22e29e3e9b 100644 --- a/.github/workflows/python-tests.yml +++ b/.github/workflows/python-tests.yml @@ -60,7 +60,7 @@ jobs: extra-index-url: 'https://download.pytorch.org/whl/cpu' github-env: $GITHUB_ENV - platform: macos-default - os: macOS-12 + os: macOS-14 github-env: $GITHUB_ENV - platform: windows-cpu os: windows-2022 From b8d4630600f6c2c6e1a0e7e54ecab457a1ac27e8 Mon Sep 17 00:00:00 2001 From: Brandon Rising Date: Fri, 23 Aug 2024 13:28:05 -0400 Subject: [PATCH 083/113] Load and unload clip/t5 encoders and run inference separately in text encoding --- invokeai/app/invocations/flux_text_encoder.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/invokeai/app/invocations/flux_text_encoder.py b/invokeai/app/invocations/flux_text_encoder.py index ae3456be061..80e13c2270e 100644 --- a/invokeai/app/invocations/flux_text_encoder.py +++ b/invokeai/app/invocations/flux_text_encoder.py @@ -59,23 +59,28 @@ def _encode_prompt(self, context: InvocationContext) -> tuple[torch.Tensor, torc t5_tokenizer_info = context.models.load(self.t5_encoder.tokenizer) t5_text_encoder_info = context.models.load(self.t5_encoder.text_encoder) + prompt = [self.prompt] + with ( - clip_text_encoder_info as clip_text_encoder, t5_text_encoder_info as t5_text_encoder, - clip_tokenizer_info as clip_tokenizer, t5_tokenizer_info as t5_tokenizer, ): - assert isinstance(clip_text_encoder, CLIPTextModel) assert isinstance(t5_text_encoder, T5EncoderModel) - assert isinstance(clip_tokenizer, CLIPTokenizer) assert isinstance(t5_tokenizer, T5Tokenizer) - clip_encoder = HFEncoder(clip_text_encoder, clip_tokenizer, True, 77) t5_encoder = HFEncoder(t5_text_encoder, t5_tokenizer, False, self.t5_max_seq_len) - prompt = [self.prompt] prompt_embeds = t5_encoder(prompt) + with ( + clip_text_encoder_info as clip_text_encoder, + clip_tokenizer_info as clip_tokenizer, + ): + assert isinstance(clip_text_encoder, CLIPTextModel) + assert isinstance(clip_tokenizer, CLIPTokenizer) + + clip_encoder = HFEncoder(clip_text_encoder, clip_tokenizer, True, 77) + pooled_prompt_embeds = clip_encoder(prompt) assert isinstance(prompt_embeds, torch.Tensor) From a31c02bedfcd0ede80e9b665c31a4239bc74a3d7 Mon Sep 17 00:00:00 2001 From: Brandon Rising Date: Fri, 23 Aug 2024 13:36:14 -0400 Subject: [PATCH 084/113] Only import bnb quantize file if bitsandbytes is installed --- .../backend/model_manager/load/model_loaders/flux.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/invokeai/backend/model_manager/load/model_loaders/flux.py b/invokeai/backend/model_manager/load/model_loaders/flux.py index e37b12c4f78..58b4843395e 100644 --- a/invokeai/backend/model_manager/load/model_loaders/flux.py +++ b/invokeai/backend/model_manager/load/model_loaders/flux.py @@ -33,10 +33,16 @@ ) from invokeai.backend.model_manager.load.load_default import ModelLoader from invokeai.backend.model_manager.load.model_loader_registry import ModelLoaderRegistry -from invokeai.backend.quantization.bnb_nf4 import quantize_model_nf4 from invokeai.backend.quantization.fast_quantized_transformers_model import FastQuantizedTransformersModel from invokeai.backend.util.silence_warnings import SilenceWarnings +try: + from invokeai.backend.quantization.bnb_nf4 import quantize_model_nf4 + + bnb_nf4_available = True +except ImportError: + bnb_nf4_available = False + app_config = get_config() @@ -213,6 +219,10 @@ def _load_from_singlefile( flux_conf: Any, ) -> AnyModel: assert isinstance(config, MainBnbQuantized4bCheckpointConfig) + if not bnb_nf4_available: + raise ImportError( + "The bnb_nf4 module is not available. Please install bitsandbytes if available on your platform." + ) model_path = Path(config.path) dataclass_fields = {f.name for f in fields(FluxParams)} filtered_data = {k: v for k, v in flux_conf["params"].items() if k in dataclass_fields} From 9899e42577c4b67e0c9caf0a66734fc5ebbff34b Mon Sep 17 00:00:00 2001 From: Brandon Rising Date: Fri, 23 Aug 2024 13:50:01 -0400 Subject: [PATCH 085/113] Switch flux to using its own conditioning field --- invokeai/app/invocations/fields.py | 6 +++ invokeai/app/invocations/flux_text_encoder.py | 8 ++-- .../app/invocations/flux_text_to_image.py | 4 +- invokeai/app/invocations/primitives.py | 12 ++++++ .../web/src/features/nodes/types/constants.ts | 1 + .../frontend/web/src/services/api/schema.ts | 42 +++++++++++++++---- 6 files changed, 58 insertions(+), 15 deletions(-) diff --git a/invokeai/app/invocations/fields.py b/invokeai/app/invocations/fields.py index 6b7d7bef635..3a4e2cbddb1 100644 --- a/invokeai/app/invocations/fields.py +++ b/invokeai/app/invocations/fields.py @@ -236,6 +236,12 @@ def tuple(self) -> Tuple[int, int, int, int]: return (self.r, self.g, self.b, self.a) +class FluxConditioningField(BaseModel): + """A conditioning tensor primitive value""" + + conditioning_name: str = Field(description="The name of conditioning tensor") + + class ConditioningField(BaseModel): """A conditioning tensor primitive value""" diff --git a/invokeai/app/invocations/flux_text_encoder.py b/invokeai/app/invocations/flux_text_encoder.py index 80e13c2270e..0e7ebd6d69b 100644 --- a/invokeai/app/invocations/flux_text_encoder.py +++ b/invokeai/app/invocations/flux_text_encoder.py @@ -6,7 +6,7 @@ from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, invocation from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField from invokeai.app.invocations.model import CLIPField, T5EncoderField -from invokeai.app.invocations.primitives import ConditioningOutput +from invokeai.app.invocations.primitives import FluxConditioningOutput from invokeai.app.services.shared.invocation_context import InvocationContext from invokeai.backend.flux.modules.conditioner import HFEncoder from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ConditioningFieldData, FLUXConditioningInfo @@ -38,17 +38,15 @@ class FluxTextEncoderInvocation(BaseInvocation): ) prompt: str = InputField(description="Text prompt to encode.") - # TODO(ryand): Should we create a new return type for this invocation? This ConditioningOutput is clearly not - # compatible with other ConditioningOutputs. @torch.no_grad() - def invoke(self, context: InvocationContext) -> ConditioningOutput: + def invoke(self, context: InvocationContext) -> FluxConditioningOutput: t5_embeddings, clip_embeddings = self._encode_prompt(context) conditioning_data = ConditioningFieldData( conditionings=[FLUXConditioningInfo(clip_embeds=clip_embeddings, t5_embeds=t5_embeddings)] ) conditioning_name = context.conditioning.save(conditioning_data) - return ConditioningOutput.build(conditioning_name) + return FluxConditioningOutput.build(conditioning_name) def _encode_prompt(self, context: InvocationContext) -> tuple[torch.Tensor, torch.Tensor]: # Load CLIP. diff --git a/invokeai/app/invocations/flux_text_to_image.py b/invokeai/app/invocations/flux_text_to_image.py index 9504abee3e4..b68bb91513c 100644 --- a/invokeai/app/invocations/flux_text_to_image.py +++ b/invokeai/app/invocations/flux_text_to_image.py @@ -4,8 +4,8 @@ from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, invocation from invokeai.app.invocations.fields import ( - ConditioningField, FieldDescriptions, + FluxConditioningField, Input, InputField, WithBoard, @@ -41,7 +41,7 @@ class FluxTextToImageInvocation(BaseInvocation, WithMetadata, WithBoard): description=FieldDescriptions.vae, input=Input.Connection, ) - positive_text_conditioning: ConditioningField = InputField( + positive_text_conditioning: FluxConditioningField = InputField( description=FieldDescriptions.positive_cond, input=Input.Connection ) width: int = InputField(default=1024, multiple_of=16, description="Width of the generated image.") diff --git a/invokeai/app/invocations/primitives.py b/invokeai/app/invocations/primitives.py index 3655554f3bf..bb136d62fdd 100644 --- a/invokeai/app/invocations/primitives.py +++ b/invokeai/app/invocations/primitives.py @@ -12,6 +12,7 @@ ConditioningField, DenoiseMaskField, FieldDescriptions, + FluxConditioningField, ImageField, Input, InputField, @@ -414,6 +415,17 @@ class MaskOutput(BaseInvocationOutput): height: int = OutputField(description="The height of the mask in pixels.") +@invocation_output("flux_conditioning_output") +class FluxConditioningOutput(BaseInvocationOutput): + """Base class for nodes that output a single conditioning tensor""" + + conditioning: FluxConditioningField = OutputField(description=FieldDescriptions.cond) + + @classmethod + def build(cls, conditioning_name: str) -> "FluxConditioningOutput": + return cls(conditioning=FluxConditioningField(conditioning_name=conditioning_name)) + + @invocation_output("conditioning_output") class ConditioningOutput(BaseInvocationOutput): """Base class for nodes that output a single conditioning tensor""" diff --git a/invokeai/frontend/web/src/features/nodes/types/constants.ts b/invokeai/frontend/web/src/features/nodes/types/constants.ts index 19927220f20..100c094c464 100644 --- a/invokeai/frontend/web/src/features/nodes/types/constants.ts +++ b/invokeai/frontend/web/src/features/nodes/types/constants.ts @@ -52,6 +52,7 @@ export const FIELD_COLORS: { [key: string]: string } = { CLIPField: 'green.500', ColorField: 'pink.300', ConditioningField: 'cyan.500', + FluxConditioningField: 'cyan.500', ControlField: 'teal.500', ControlNetModelField: 'teal.500', EnumField: 'blue.500', diff --git a/invokeai/frontend/web/src/services/api/schema.ts b/invokeai/frontend/web/src/services/api/schema.ts index 8c3849593ab..2b506759bd8 100644 --- a/invokeai/frontend/web/src/services/api/schema.ts +++ b/invokeai/frontend/web/src/services/api/schema.ts @@ -5720,6 +5720,32 @@ export type components = { */ type: "float_to_int"; }; + /** + * FluxConditioningField + * @description A conditioning tensor primitive value + */ + FluxConditioningField: { + /** + * Conditioning Name + * @description The name of conditioning tensor + */ + conditioning_name: string; + }; + /** + * FluxConditioningOutput + * @description Base class for nodes that output a single conditioning tensor + */ + FluxConditioningOutput: { + /** @description Conditioning tensor */ + conditioning: components["schemas"]["FluxConditioningField"]; + /** + * type + * @default flux_conditioning_output + * @constant + * @enum {string} + */ + type: "flux_conditioning_output"; + }; /** * Flux Main Model * @description Loads a flux base model, outputting its submodels. @@ -5781,7 +5807,7 @@ export type components = { vae: components["schemas"]["VAEField"]; /** * Max Seq Length - * @description VAE + * @description The max sequence length to used for the T5 encoder. (256 for schnell transformer, 512 for dev transformer) * @enum {integer} */ max_seq_len: 256 | 512; @@ -5835,11 +5861,11 @@ export type components = { */ t5_max_seq_len?: 256 | 512; /** - * Positive Prompt - * @description Positive prompt for text-to-image generation. + * Prompt + * @description Text prompt to encode. * @default null */ - positive_prompt?: string; + prompt?: string; /** * type * @default flux_text_encoder @@ -5895,7 +5921,7 @@ export type components = { * @description Positive conditioning tensor * @default null */ - positive_text_conditioning?: components["schemas"]["ConditioningField"]; + positive_text_conditioning?: components["schemas"]["FluxConditioningField"]; /** * Width * @description Width of the generated image. @@ -6105,7 +6131,7 @@ export type components = { * @description The results of node executions */ results?: { - [key: string]: components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["BoundingBoxCollectionOutput"] | components["schemas"]["BoundingBoxOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["CLIPSkipInvocationOutput"] | components["schemas"]["CalculateImageTilesOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["FluxModelLoaderOutput"] | components["schemas"]["GradientMaskOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["IdealSizeOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["LoRALoaderOutput"] | components["schemas"]["LoRASelectorOutput"] | components["schemas"]["MaskOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["ModelIdentifierOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["PairTileImageOutput"] | components["schemas"]["SDXLLoRALoaderOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["String2Output"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["TileToPropertiesOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["VAEOutput"]; + [key: string]: components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["BoundingBoxCollectionOutput"] | components["schemas"]["BoundingBoxOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["CLIPSkipInvocationOutput"] | components["schemas"]["CalculateImageTilesOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["FluxConditioningOutput"] | components["schemas"]["FluxModelLoaderOutput"] | components["schemas"]["GradientMaskOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["IdealSizeOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["LoRALoaderOutput"] | components["schemas"]["LoRASelectorOutput"] | components["schemas"]["MaskOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["ModelIdentifierOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["PairTileImageOutput"] | components["schemas"]["SDXLLoRALoaderOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["String2Output"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["TileToPropertiesOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["VAEOutput"]; }; /** * Errors @@ -8500,7 +8526,7 @@ export type components = { * Result * @description The result of the invocation */ - result: components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["BoundingBoxCollectionOutput"] | components["schemas"]["BoundingBoxOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["CLIPSkipInvocationOutput"] | components["schemas"]["CalculateImageTilesOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["FluxModelLoaderOutput"] | components["schemas"]["GradientMaskOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["IdealSizeOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["LoRALoaderOutput"] | components["schemas"]["LoRASelectorOutput"] | components["schemas"]["MaskOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["ModelIdentifierOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["PairTileImageOutput"] | components["schemas"]["SDXLLoRALoaderOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["String2Output"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["TileToPropertiesOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["VAEOutput"]; + result: components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["BoundingBoxCollectionOutput"] | components["schemas"]["BoundingBoxOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["CLIPSkipInvocationOutput"] | components["schemas"]["CalculateImageTilesOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["FluxConditioningOutput"] | components["schemas"]["FluxModelLoaderOutput"] | components["schemas"]["GradientMaskOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["IdealSizeOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["LoRALoaderOutput"] | components["schemas"]["LoRASelectorOutput"] | components["schemas"]["MaskOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["ModelIdentifierOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["PairTileImageOutput"] | components["schemas"]["SDXLLoRALoaderOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["String2Output"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["TileToPropertiesOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["VAEOutput"]; }; /** * InvocationDenoiseProgressEvent @@ -8675,7 +8701,7 @@ export type components = { float_range: components["schemas"]["FloatCollectionOutput"]; float_to_int: components["schemas"]["IntegerOutput"]; flux_model_loader: components["schemas"]["FluxModelLoaderOutput"]; - flux_text_encoder: components["schemas"]["ConditioningOutput"]; + flux_text_encoder: components["schemas"]["FluxConditioningOutput"]; flux_text_to_image: components["schemas"]["ImageOutput"]; freeu: components["schemas"]["UNetOutput"]; grounding_dino: components["schemas"]["BoundingBoxCollectionOutput"]; From cfcd8601ff0572922ac6d29954c24c7b75c5f818 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Fri, 23 Aug 2024 01:25:04 +0000 Subject: [PATCH 086/113] Add script for quantizing a T5 model. --- .../quantize_t5_xxl_bnb_llm_int8.py | 80 +++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 invokeai/backend/quantization/quantize_t5_xxl_bnb_llm_int8.py diff --git a/invokeai/backend/quantization/quantize_t5_xxl_bnb_llm_int8.py b/invokeai/backend/quantization/quantize_t5_xxl_bnb_llm_int8.py new file mode 100644 index 00000000000..a77af4cb241 --- /dev/null +++ b/invokeai/backend/quantization/quantize_t5_xxl_bnb_llm_int8.py @@ -0,0 +1,80 @@ +from pathlib import Path + +import accelerate +from safetensors.torch import load_file, save_model +from transformers import AutoConfig, AutoModelForTextEncoding, T5EncoderModel + +from invokeai.backend.quantization.bnb_llm_int8 import quantize_model_llm_int8 +from invokeai.backend.quantization.load_flux_model_bnb_nf4 import log_time + + +def main(): + # Load the FLUX transformer model onto the meta device. + model_path = Path( + "/data/invokeai/models/.download_cache/black-forest-labs_flux.1-schnell/FLUX.1-schnell/text_encoder_2" + ) + + with log_time("Intialize T5 on meta device"): + model_config = AutoConfig.from_pretrained(model_path) + with accelerate.init_empty_weights(): + model = AutoModelForTextEncoding.from_config(model_config) + + # TODO(ryand): We may want to add some modules to not quantize here (e.g. the proj_out layer). See the accelerate + # `get_keys_to_not_convert(...)` function for a heuristic to determine which modules to not quantize. + modules_to_not_convert: set[str] = set() + + model_int8_path = model_path / "bnb_llm_int8.safetensors" + if model_int8_path.exists(): + # The quantized model already exists, load it and return it. + print(f"A pre-quantized model already exists at '{model_int8_path}'. Attempting to load it...") + + # Replace the linear layers with LLM.int8() quantized linear layers (still on the meta device). + with log_time("Replace linear layers with LLM.int8() layers"), accelerate.init_empty_weights(): + model = quantize_model_llm_int8(model, modules_to_not_convert=modules_to_not_convert) + + with log_time("Load state dict into model"): + sd = load_file(model_int8_path) + missing_keys, unexpected_keys = model.load_state_dict(sd, strict=False, assign=True) + assert len(unexpected_keys) == 0 + assert set(missing_keys) == {"shared.weight"} + # load_model(model, model_int8_path) + + with log_time("Move model to cuda"): + model = model.to("cuda") + + print(f"Successfully loaded pre-quantized model from '{model_int8_path}'.") + + else: + # The quantized model does not exist, quantize the model and save it. + print(f"No pre-quantized model found at '{model_int8_path}'. Quantizing the model...") + + with log_time("Replace linear layers with LLM.int8() layers"), accelerate.init_empty_weights(): + model = quantize_model_llm_int8(model, modules_to_not_convert=modules_to_not_convert) + + with log_time("Load state dict into model"): + # Load sharded state dict. + files = list(model_path.glob("*.safetensors")) + state_dict = {} + for file in files: + sd = load_file(file) + state_dict.update(sd) + # TODO(ryand): Cast the state_dict to the appropriate dtype? + # The state dict is expected to have some extra keys, so we use `strict=False`. + model.load_state_dict(state_dict, strict=True, assign=True) + + with log_time("Move model to cuda and quantize"): + model = model.to("cuda") + + with log_time("Save quantized model"): + model_int8_path.parent.mkdir(parents=True, exist_ok=True) + # save_file(model.state_dict(), model_int8_path) + save_model(model, model_int8_path) + + print(f"Successfully quantized and saved model to '{model_int8_path}'.") + + assert isinstance(model, T5EncoderModel) + return model + + +if __name__ == "__main__": + main() From 4089ff214e0d285ab03186ff052cd905e71bcb4e Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Fri, 23 Aug 2024 14:06:08 +0000 Subject: [PATCH 087/113] Fixes to the T5XXL quantization script. --- .../quantize_t5_xxl_bnb_llm_int8.py | 33 ++++++++++++------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/invokeai/backend/quantization/quantize_t5_xxl_bnb_llm_int8.py b/invokeai/backend/quantization/quantize_t5_xxl_bnb_llm_int8.py index a77af4cb241..3a11e6129bf 100644 --- a/invokeai/backend/quantization/quantize_t5_xxl_bnb_llm_int8.py +++ b/invokeai/backend/quantization/quantize_t5_xxl_bnb_llm_int8.py @@ -1,17 +1,29 @@ from pathlib import Path import accelerate -from safetensors.torch import load_file, save_model +from safetensors.torch import load_file, save_file from transformers import AutoConfig, AutoModelForTextEncoding, T5EncoderModel from invokeai.backend.quantization.bnb_llm_int8 import quantize_model_llm_int8 from invokeai.backend.quantization.load_flux_model_bnb_nf4 import log_time +def load_state_dict_into_t5(model: T5EncoderModel, state_dict: dict): + # There is a shared reference to a single weight tensor in the model. + # Both "encoder.embed_tokens.weight" and "shared.weight" refer to the same tensor, so only the latter should + # be present in the state_dict. + missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False, assign=True) + assert len(unexpected_keys) == 0 + assert set(missing_keys) == {"encoder.embed_tokens.weight"} + # Assert that the layers we expect to be shared are actually shared. + assert model.encoder.embed_tokens.weight is model.shared.weight + + def main(): # Load the FLUX transformer model onto the meta device. model_path = Path( - "/data/invokeai/models/.download_cache/black-forest-labs_flux.1-schnell/FLUX.1-schnell/text_encoder_2" + # "/data/invokeai/models/.download_cache/black-forest-labs_flux.1-schnell/FLUX.1-schnell/text_encoder_2" + "/data/misc/text_encoder_2" ) with log_time("Intialize T5 on meta device"): @@ -34,10 +46,7 @@ def main(): with log_time("Load state dict into model"): sd = load_file(model_int8_path) - missing_keys, unexpected_keys = model.load_state_dict(sd, strict=False, assign=True) - assert len(unexpected_keys) == 0 - assert set(missing_keys) == {"shared.weight"} - # load_model(model, model_int8_path) + load_state_dict_into_t5(model, sd) with log_time("Move model to cuda"): model = model.to("cuda") @@ -58,17 +67,19 @@ def main(): for file in files: sd = load_file(file) state_dict.update(sd) - # TODO(ryand): Cast the state_dict to the appropriate dtype? - # The state dict is expected to have some extra keys, so we use `strict=False`. - model.load_state_dict(state_dict, strict=True, assign=True) + load_state_dict_into_t5(model, state_dict) with log_time("Move model to cuda and quantize"): model = model.to("cuda") with log_time("Save quantized model"): model_int8_path.parent.mkdir(parents=True, exist_ok=True) - # save_file(model.state_dict(), model_int8_path) - save_model(model, model_int8_path) + state_dict = model.state_dict() + state_dict.pop("encoder.embed_tokens.weight") + save_file(state_dict, model_int8_path) + # This handling of shared weights could also be achieved with save_model(...), but then we'd lose control + # over which keys are kept. And, the corresponding load_model(...) function does not support assign=True. + # save_model(model, model_int8_path) print(f"Successfully quantized and saved model to '{model_int8_path}'.") From 54c48c30c4f8fd6cd1328b4709ad39daed374e54 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Fri, 23 Aug 2024 17:44:03 +0000 Subject: [PATCH 088/113] Update the T5 8-bit quantized starter model to use the BnB LLM.int8() variant. --- .../model_manager/load/model_loaders/flux.py | 28 +++++++++++++++++-- .../backend/model_manager/starter_models.py | 12 ++++---- .../StarterModels/StartModelsResultItem.tsx | 4 +-- 3 files changed, 34 insertions(+), 10 deletions(-) diff --git a/invokeai/backend/model_manager/load/model_loaders/flux.py b/invokeai/backend/model_manager/load/model_loaders/flux.py index 58b4843395e..f3e44fc2218 100644 --- a/invokeai/backend/model_manager/load/model_loaders/flux.py +++ b/invokeai/backend/model_manager/load/model_loaders/flux.py @@ -9,7 +9,7 @@ import torch import yaml from safetensors.torch import load_file -from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5Tokenizer +from transformers import AutoConfig, AutoModelForTextEncoding, CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5Tokenizer from invokeai.app.services.config.config_default import get_config from invokeai.backend.flux.model import Flux, FluxParams @@ -33,7 +33,8 @@ ) from invokeai.backend.model_manager.load.load_default import ModelLoader from invokeai.backend.model_manager.load.model_loader_registry import ModelLoaderRegistry -from invokeai.backend.quantization.fast_quantized_transformers_model import FastQuantizedTransformersModel +from invokeai.backend.quantization.bnb_llm_int8 import quantize_model_llm_int8 +from invokeai.backend.quantization.bnb_nf4 import quantize_model_nf4 from invokeai.backend.util.silence_warnings import SilenceWarnings try: @@ -115,12 +116,33 @@ def _load_model( case SubModelType.Tokenizer2: return T5Tokenizer.from_pretrained(Path(config.path) / "tokenizer_2", max_length=512) case SubModelType.TextEncoder2: - return FastQuantizedTransformersModel.from_pretrained(Path(config.path) / "text_encoder_2") + te2_model_path = Path(config.path) / "text_encoder_2" + model_config = AutoConfig.from_pretrained(te2_model_path) + with accelerate.init_empty_weights(): + model = AutoModelForTextEncoding.from_config(model_config) + model = quantize_model_llm_int8(model, modules_to_not_convert=set()) + + state_dict_path = te2_model_path / "bnb_llm_int8_model.safetensors" + state_dict = load_file(state_dict_path) + self._load_state_dict_into_t5(model, state_dict) + + return model raise ValueError( f"Only Tokenizer and TextEncoder submodels are currently supported. Received: {submodel_type.value if submodel_type else 'None'}" ) + @classmethod + def _load_state_dict_into_t5(cls, model: T5EncoderModel, state_dict: dict[str, torch.Tensor]): + # There is a shared reference to a single weight tensor in the model. + # Both "encoder.embed_tokens.weight" and "shared.weight" refer to the same tensor, so only the latter should + # be present in the state_dict. + missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False, assign=True) + assert len(unexpected_keys) == 0 + assert set(missing_keys) == {"encoder.embed_tokens.weight"} + # Assert that the layers we expect to be shared are actually shared. + assert model.encoder.embed_tokens.weight is model.shared.weight + @ModelLoaderRegistry.register(base=BaseModelType.Any, type=ModelType.T5Encoder, format=ModelFormat.T5Encoder) class T5EncoderCheckpointModel(ModelLoader): diff --git a/invokeai/backend/model_manager/starter_models.py b/invokeai/backend/model_manager/starter_models.py index 7d5233d767d..13a22ee219e 100644 --- a/invokeai/backend/model_manager/starter_models.py +++ b/invokeai/backend/model_manager/starter_models.py @@ -2,7 +2,7 @@ from pydantic import BaseModel -from invokeai.backend.model_manager.config import BaseModelType, ModelType +from invokeai.backend.model_manager.config import BaseModelType, ModelFormat, ModelType class StarterModelWithoutDependencies(BaseModel): @@ -11,6 +11,7 @@ class StarterModelWithoutDependencies(BaseModel): name: str base: BaseModelType type: ModelType + format: Optional[ModelFormat] = None is_installed: bool = False @@ -54,17 +55,18 @@ class StarterModel(StarterModelWithoutDependencies): t5_base_encoder = StarterModel( name="t5_base_encoder", base=BaseModelType.Any, - source="InvokeAI/flux_schnell::t5_xxl_encoder/base", + source="InvokeAI/t5-v1_1-xxl::bfloat16", description="T5-XXL text encoder (used in FLUX pipelines). ~8GB", type=ModelType.T5Encoder, ) t5_8b_quantized_encoder = StarterModel( - name="t5_8b_quantized_encoder", + name="t5_bnb_int8_quantized_encoder", base=BaseModelType.Any, - source="invokeai/flux_schnell::t5_xxl_encoder/optimum_quanto_qfloat8", - description="T5-XXL text encoder with optimum-quanto qfloat8 quantization (used in FLUX pipelines). ~6GB", + source="InvokeAI/t5-v1_1-xxl::bnb_llm_int8", + description="T5-XXL text encoder with bitsandbytes LLM.int8() quantization (used in FLUX pipelines). ~5GB", type=ModelType.T5Encoder, + format=ModelFormat.T5Encoder8b, ) clip_l_encoder = StarterModel( diff --git a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/AddModelPanel/StarterModels/StartModelsResultItem.tsx b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/AddModelPanel/StarterModels/StartModelsResultItem.tsx index 4fc83908907..bd6a2b42684 100644 --- a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/AddModelPanel/StarterModels/StartModelsResultItem.tsx +++ b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/AddModelPanel/StarterModels/StartModelsResultItem.tsx @@ -15,14 +15,14 @@ export const StarterModelsResultItem = memo(({ result }: Props) => { const _allSources = [ { source: result.source, - config: { name: result.name, description: result.description, type: result.type, base: result.base }, + config: { name: result.name, description: result.description, type: result.type, base: result.base, format: result.format }, }, ]; if (result.dependencies) { for (const d of result.dependencies) { _allSources.push({ source: d.source, - config: { name: d.name, description: d.description, type: d.type, base: d.base }, + config: { name: d.name, description: d.description, type: d.type, base: d.base, format: d.format }, }); } } From 5af214b0bc56837c4c7fce1c462771bb17dec023 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Fri, 23 Aug 2024 17:48:29 +0000 Subject: [PATCH 089/113] Remove all references to optimum-quanto and downgrade diffusers. --- .../fast_quantized_diffusion_model.py | 79 ------------------- .../fast_quantized_transformers_model.py | 65 --------------- invokeai/backend/quantization/requantize.py | 56 ------------- pyproject.toml | 4 +- 4 files changed, 1 insertion(+), 203 deletions(-) delete mode 100644 invokeai/backend/quantization/fast_quantized_diffusion_model.py delete mode 100644 invokeai/backend/quantization/fast_quantized_transformers_model.py delete mode 100644 invokeai/backend/quantization/requantize.py diff --git a/invokeai/backend/quantization/fast_quantized_diffusion_model.py b/invokeai/backend/quantization/fast_quantized_diffusion_model.py deleted file mode 100644 index 6ad82b8e9ee..00000000000 --- a/invokeai/backend/quantization/fast_quantized_diffusion_model.py +++ /dev/null @@ -1,79 +0,0 @@ -import json -import os -from typing import Union - -from diffusers.models.model_loading_utils import load_state_dict -from diffusers.models.transformers.transformer_flux import FluxTransformer2DModel -from diffusers.utils import ( - CONFIG_NAME, - SAFE_WEIGHTS_INDEX_NAME, - SAFETENSORS_WEIGHTS_NAME, - _get_checkpoint_shard_files, - is_accelerate_available, -) -from optimum.quanto.models import QuantizedDiffusersModel -from optimum.quanto.models.shared_dict import ShardedStateDict - -from invokeai.backend.quantization.requantize import requantize - - -class FastQuantizedDiffusersModel(QuantizedDiffusersModel): - @classmethod - def from_pretrained(cls, model_name_or_path: Union[str, os.PathLike], base_class=FluxTransformer2DModel, **kwargs): - """We override the `from_pretrained()` method in order to use our custom `requantize()` implementation.""" - base_class = base_class or cls.base_class - if base_class is None: - raise ValueError("The `base_class` attribute needs to be configured.") - - if not is_accelerate_available(): - raise ValueError("Reloading a quantized diffusers model requires the accelerate library.") - from accelerate import init_empty_weights - - if os.path.isdir(model_name_or_path): - # Look for a quantization map - qmap_path = os.path.join(model_name_or_path, cls._qmap_name()) - if not os.path.exists(qmap_path): - raise ValueError(f"No quantization map found in {model_name_or_path}: is this a quantized model ?") - - # Look for original model config file. - model_config_path = os.path.join(model_name_or_path, CONFIG_NAME) - if not os.path.exists(model_config_path): - raise ValueError(f"{CONFIG_NAME} not found in {model_name_or_path}.") - - with open(qmap_path, "r", encoding="utf-8") as f: - qmap = json.load(f) - - with open(model_config_path, "r", encoding="utf-8") as f: - original_model_cls_name = json.load(f)["_class_name"] - configured_cls_name = base_class.__name__ - if configured_cls_name != original_model_cls_name: - raise ValueError( - f"Configured base class ({configured_cls_name}) differs from what was derived from the provided configuration ({original_model_cls_name})." - ) - - # Create an empty model - config = base_class.load_config(model_name_or_path) - with init_empty_weights(): - model = base_class.from_config(config) - - # Look for the index of a sharded checkpoint - checkpoint_file = os.path.join(model_name_or_path, SAFE_WEIGHTS_INDEX_NAME) - if os.path.exists(checkpoint_file): - # Convert the checkpoint path to a list of shards - _, sharded_metadata = _get_checkpoint_shard_files(model_name_or_path, checkpoint_file) - # Create a mapping for the sharded safetensor files - state_dict = ShardedStateDict(model_name_or_path, sharded_metadata["weight_map"]) - else: - # Look for a single checkpoint file - checkpoint_file = os.path.join(model_name_or_path, SAFETENSORS_WEIGHTS_NAME) - if not os.path.exists(checkpoint_file): - raise ValueError(f"No safetensor weights found in {model_name_or_path}.") - # Get state_dict from model checkpoint - state_dict = load_state_dict(checkpoint_file) - - # Requantize and load quantized weights from state_dict - requantize(model, state_dict=state_dict, quantization_map=qmap) - model.eval() - return cls(model)._wrapped - else: - raise NotImplementedError("Reloading quantized models directly from the hub is not supported yet.") diff --git a/invokeai/backend/quantization/fast_quantized_transformers_model.py b/invokeai/backend/quantization/fast_quantized_transformers_model.py deleted file mode 100644 index b811b598e7c..00000000000 --- a/invokeai/backend/quantization/fast_quantized_transformers_model.py +++ /dev/null @@ -1,65 +0,0 @@ -import json -import os -from typing import Union - -from optimum.quanto.models import QuantizedTransformersModel -from optimum.quanto.models.shared_dict import ShardedStateDict -from transformers import AutoConfig -from transformers.modeling_utils import get_checkpoint_shard_files, load_state_dict -from transformers.models.auto import AutoModelForTextEncoding -from transformers.utils import SAFE_WEIGHTS_INDEX_NAME, SAFE_WEIGHTS_NAME, is_accelerate_available - -from invokeai.backend.quantization.requantize import requantize - - -class FastQuantizedTransformersModel(QuantizedTransformersModel): - @classmethod - def from_pretrained( - cls, model_name_or_path: Union[str, os.PathLike], auto_class=AutoModelForTextEncoding, **kwargs - ): - """We override the `from_pretrained()` method in order to use our custom `requantize()` implementation.""" - auto_class = auto_class or cls.auto_class - if auto_class is None: - raise ValueError( - "Quantized models cannot be reloaded using {cls}: use a specialized quantized class such as QuantizedModelForCausalLM instead." - ) - if not is_accelerate_available(): - raise ValueError("Reloading a quantized transformers model requires the accelerate library.") - from accelerate import init_empty_weights - - if os.path.isdir(model_name_or_path): - # Look for a quantization map - qmap_path = os.path.join(model_name_or_path, cls._qmap_name()) - if not os.path.exists(qmap_path): - raise ValueError(f"No quantization map found in {model_name_or_path}: is this a quantized model ?") - with open(qmap_path, "r", encoding="utf-8") as f: - qmap = json.load(f) - # Create an empty model - config = AutoConfig.from_pretrained(model_name_or_path) - with init_empty_weights(): - model = auto_class.from_config(config) - # Look for the index of a sharded checkpoint - checkpoint_file = os.path.join(model_name_or_path, SAFE_WEIGHTS_INDEX_NAME) - if os.path.exists(checkpoint_file): - # Convert the checkpoint path to a list of shards - checkpoint_file, sharded_metadata = get_checkpoint_shard_files(model_name_or_path, checkpoint_file) - # Create a mapping for the sharded safetensor files - state_dict = ShardedStateDict(model_name_or_path, sharded_metadata["weight_map"]) - else: - # Look for a single checkpoint file - checkpoint_file = os.path.join(model_name_or_path, SAFE_WEIGHTS_NAME) - if not os.path.exists(checkpoint_file): - raise ValueError(f"No safetensor weights found in {model_name_or_path}.") - # Get state_dict from model checkpoint - state_dict = load_state_dict(checkpoint_file) - # Requantize and load quantized weights from state_dict - requantize(model, state_dict=state_dict, quantization_map=qmap) - if getattr(model.config, "tie_word_embeddings", True): - # Tie output weight embeddings to input weight embeddings - # Note that if they were quantized they would NOT be tied - model.tie_weights() - # Set model in evaluation mode as it is done in transformers - model.eval() - return cls(model)._wrapped - else: - raise NotImplementedError("Reloading quantized models directly from the hub is not supported yet.") diff --git a/invokeai/backend/quantization/requantize.py b/invokeai/backend/quantization/requantize.py deleted file mode 100644 index aae85bed7c9..00000000000 --- a/invokeai/backend/quantization/requantize.py +++ /dev/null @@ -1,56 +0,0 @@ -from typing import Any, Dict - -import torch -from optimum.quanto.quantize import _quantize_submodule - - -def requantize( - model: torch.nn.Module, - state_dict: Dict[str, Any], - quantization_map: Dict[str, Dict[str, str]], - device: torch.device | None = None, -): - """This function was initially copied from: - https://github.com/huggingface/optimum-quanto/blob/832f7f5c3926c91fe4f923aaaf037a780ac3e6c3/optimum/quanto/quantize.py#L101 - - The function was modified to remove the `freeze()` call. The `freeze()` call is very slow and unnecessary when the - weights are about to be loaded from a state_dict. - - TODO(ryand): Unless I'm overlooking something, this should be contributed upstream to the `optimum-quanto` library. - """ - if device is None: - device = next(model.parameters()).device - if device.type == "meta": - device = torch.device("cpu") - - # Quantize the model with parameters from the quantization map - for name, m in model.named_modules(): - qconfig = quantization_map.get(name, None) - if qconfig is not None: - weights = qconfig["weights"] - if weights == "none": - weights = None - activations = qconfig["activations"] - if activations == "none": - activations = None - _quantize_submodule(model, name, m, weights=weights, activations=activations) - - # Move model parameters and buffers to CPU before materializing quantized weights - for name, m in model.named_modules(): - - def move_tensor(t, device): - if t.device.type == "meta": - return torch.empty_like(t, device=device) - return t.to(device) - - for name, param in m.named_parameters(recurse=False): - setattr(m, name, torch.nn.Parameter(move_tensor(param, "cpu"))) - for name, param in m.named_buffers(recurse=False): - setattr(m, name, move_tensor(param, "cpu")) - - # Freeze model and move to target device - # freeze(model) - # model.to(device) - - # Load the quantized model weights - model.load_state_dict(state_dict, strict=False) diff --git a/pyproject.toml b/pyproject.toml index 5be3117af30..1537bf7e6a6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,8 +38,7 @@ dependencies = [ "clip_anytorch==2.6.0", # replacing "clip @ https://github.com/openai/CLIP/archive/eaa22acb90a5876642d0507623e859909230a52d.zip", "compel==2.0.2", "controlnet-aux==0.0.7", - # TODO(ryand): Bump this once the next diffusers release is ready. - "diffusers[torch] @ git+https://github.com/huggingface/diffusers.git@4c6152c2fb0ade468aadb417102605a07a8635d3", + "diffusers[torch]==0.27.2", "flux @ git+https://github.com/black-forest-labs/flux.git@c23ae247225daba30fbd56058d247cc1b1fc20a3", "invisible-watermark==0.2.0", # needed to install SDXL base and refiner using their repo_ids "mediapipe==0.10.7", # needed for "mediapipeface" controlnet model @@ -47,7 +46,6 @@ dependencies = [ "onnx==1.15.0", "onnxruntime==1.16.3", "opencv-python==4.9.0.80", - "optimum-quanto==0.2.4", "pytorch-lightning==2.1.3", "safetensors==0.4.3", # sentencepiece is required to load T5TokenizerFast (used by FLUX). From f4612a93a7c9658af959b6e4e2e27abbc8bd2ffc Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Fri, 23 Aug 2024 18:07:14 +0000 Subject: [PATCH 090/113] Update docs for T5 quantization script. --- .../quantization/quantize_t5_xxl_bnb_llm_int8.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/invokeai/backend/quantization/quantize_t5_xxl_bnb_llm_int8.py b/invokeai/backend/quantization/quantize_t5_xxl_bnb_llm_int8.py index 3a11e6129bf..d37041248da 100644 --- a/invokeai/backend/quantization/quantize_t5_xxl_bnb_llm_int8.py +++ b/invokeai/backend/quantization/quantize_t5_xxl_bnb_llm_int8.py @@ -20,11 +20,12 @@ def load_state_dict_into_t5(model: T5EncoderModel, state_dict: dict): def main(): - # Load the FLUX transformer model onto the meta device. - model_path = Path( - # "/data/invokeai/models/.download_cache/black-forest-labs_flux.1-schnell/FLUX.1-schnell/text_encoder_2" - "/data/misc/text_encoder_2" - ) + """A script for quantizing a T5 text encoder model using the bitsandbytes LLM.int8() quantization method. + + This script is primarily intended for reference. The script params (e.g. the model_path, modules_to_not_convert, + etc.) are hardcoded and would need to be modified for other use cases. + """ + model_path = Path("/data/misc/text_encoder_2") with log_time("Intialize T5 on meta device"): model_config = AutoConfig.from_pretrained(model_path) From 18c0ec37b9ef1d782d5b1fd0978d11fa14623180 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Fri, 23 Aug 2024 18:08:37 +0000 Subject: [PATCH 091/113] Move quantization scripts to a scripts/ subdir. --- .../quantization/{ => scripts}/load_flux_model_bnb_llm_int8.py | 2 +- .../quantization/{ => scripts}/load_flux_model_bnb_nf4.py | 0 .../quantization/{ => scripts}/quantize_t5_xxl_bnb_llm_int8.py | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) rename invokeai/backend/quantization/{ => scripts}/load_flux_model_bnb_llm_int8.py (97%) rename invokeai/backend/quantization/{ => scripts}/load_flux_model_bnb_nf4.py (100%) rename invokeai/backend/quantization/{ => scripts}/quantize_t5_xxl_bnb_llm_int8.py (98%) diff --git a/invokeai/backend/quantization/load_flux_model_bnb_llm_int8.py b/invokeai/backend/quantization/scripts/load_flux_model_bnb_llm_int8.py similarity index 97% rename from invokeai/backend/quantization/load_flux_model_bnb_llm_int8.py rename to invokeai/backend/quantization/scripts/load_flux_model_bnb_llm_int8.py index c01193e6ac2..e8771dca225 100644 --- a/invokeai/backend/quantization/load_flux_model_bnb_llm_int8.py +++ b/invokeai/backend/quantization/scripts/load_flux_model_bnb_llm_int8.py @@ -6,7 +6,7 @@ from safetensors.torch import load_file, save_file from invokeai.backend.quantization.bnb_llm_int8 import quantize_model_llm_int8 -from invokeai.backend.quantization.load_flux_model_bnb_nf4 import log_time +from invokeai.backend.quantization.scripts.load_flux_model_bnb_nf4 import log_time def main(): diff --git a/invokeai/backend/quantization/load_flux_model_bnb_nf4.py b/invokeai/backend/quantization/scripts/load_flux_model_bnb_nf4.py similarity index 100% rename from invokeai/backend/quantization/load_flux_model_bnb_nf4.py rename to invokeai/backend/quantization/scripts/load_flux_model_bnb_nf4.py diff --git a/invokeai/backend/quantization/quantize_t5_xxl_bnb_llm_int8.py b/invokeai/backend/quantization/scripts/quantize_t5_xxl_bnb_llm_int8.py similarity index 98% rename from invokeai/backend/quantization/quantize_t5_xxl_bnb_llm_int8.py rename to invokeai/backend/quantization/scripts/quantize_t5_xxl_bnb_llm_int8.py index d37041248da..fc681e8fc57 100644 --- a/invokeai/backend/quantization/quantize_t5_xxl_bnb_llm_int8.py +++ b/invokeai/backend/quantization/scripts/quantize_t5_xxl_bnb_llm_int8.py @@ -5,7 +5,7 @@ from transformers import AutoConfig, AutoModelForTextEncoding, T5EncoderModel from invokeai.backend.quantization.bnb_llm_int8 import quantize_model_llm_int8 -from invokeai.backend.quantization.load_flux_model_bnb_nf4 import log_time +from invokeai.backend.quantization.scripts.load_flux_model_bnb_nf4 import log_time def load_state_dict_into_t5(model: T5EncoderModel, state_dict: dict): From 098db5c1e95615842272a9acf975fcb4beaeae25 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Fri, 23 Aug 2024 18:24:58 +0000 Subject: [PATCH 092/113] Downgrade revert torch version after removing optimum-qanto, and other minor version-related fixes. --- invokeai/backend/util/hotfixes.py | 4 ++-- pyproject.toml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/invokeai/backend/util/hotfixes.py b/invokeai/backend/util/hotfixes.py index db9d19cf48c..7e362fe9589 100644 --- a/invokeai/backend/util/hotfixes.py +++ b/invokeai/backend/util/hotfixes.py @@ -3,7 +3,7 @@ import diffusers import torch from diffusers.configuration_utils import ConfigMixin, register_to_config -from diffusers.loaders.single_file_model import FromOriginalModelMixin +from diffusers.loaders import FromOriginalControlNetMixin from diffusers.models.attention_processor import AttentionProcessor, AttnProcessor from diffusers.models.controlnet import ControlNetConditioningEmbedding, ControlNetOutput, zero_module from diffusers.models.embeddings import ( @@ -32,7 +32,7 @@ logger = InvokeAILogger.get_logger(__name__) -class ControlNetModel(ModelMixin, ConfigMixin, FromOriginalModelMixin): +class ControlNetModel(ModelMixin, ConfigMixin, FromOriginalControlNetMixin): """ A ControlNet model. diff --git a/pyproject.toml b/pyproject.toml index 1537bf7e6a6..0c9e79b5831 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,10 +52,10 @@ dependencies = [ "sentencepiece==0.2.0", "spandrel==0.3.4", "timm==0.6.13", # needed to override timm latest in controlnet_aux, see https://github.com/isl-org/ZoeDepth/issues/26 - "torch==2.4.0", + "torch==2.2.2", "torchmetrics==0.11.4", "torchsde==0.2.6", - "torchvision==0.19.0", + "torchvision==0.17.2", "transformers==4.41.1", # Core application dependencies, pinned for reproducible builds. From dbdd8516176e7d15bded54ce0eec79c65b8f8ad7 Mon Sep 17 00:00:00 2001 From: Brandon Rising Date: Fri, 23 Aug 2024 15:05:08 -0400 Subject: [PATCH 093/113] Update t5 encoder formats to accurately reflect the quantization strategy and data type --- invokeai/backend/model_manager/config.py | 11 ++++---- .../model_manager/load/model_loaders/flux.py | 26 +++++++++-------- .../backend/model_manager/starter_models.py | 2 +- .../StarterModels/StartModelsResultItem.tsx | 8 +++++- .../T5EncoderModelFieldInputComponent.tsx | 4 +-- .../frontend/web/src/services/api/schema.ts | 28 ++++++++++--------- .../frontend/web/src/services/api/types.ts | 6 ++-- 7 files changed, 47 insertions(+), 38 deletions(-) diff --git a/invokeai/backend/model_manager/config.py b/invokeai/backend/model_manager/config.py index 34cc993d39c..66e54d82f3a 100644 --- a/invokeai/backend/model_manager/config.py +++ b/invokeai/backend/model_manager/config.py @@ -109,8 +109,7 @@ class ModelFormat(str, Enum): EmbeddingFolder = "embedding_folder" InvokeAI = "invokeai" T5Encoder = "t5_encoder" - T5Encoder8b = "t5_encoder_8b" - T5Encoder4b = "t5_encoder_4b" + BnbQuantizedLlmInt8b = "bnb_quantized_int8b" BnbQuantizednf4b = "bnb_quantized_nf4b" @@ -227,12 +226,12 @@ def get_tag() -> Tag: return Tag(f"{ModelType.T5Encoder.value}.{ModelFormat.T5Encoder.value}") -class T5Encoder8bConfig(T5EncoderConfigBase): - format: Literal[ModelFormat.T5Encoder8b] = ModelFormat.T5Encoder8b +class T5EncoderBnbQuantizedLlmInt8bConfig(T5EncoderConfigBase): + format: Literal[ModelFormat.BnbQuantizedLlmInt8b] = ModelFormat.BnbQuantizedLlmInt8b @staticmethod def get_tag() -> Tag: - return Tag(f"{ModelType.T5Encoder.value}.{ModelFormat.T5Encoder8b.value}") + return Tag(f"{ModelType.T5Encoder.value}.{ModelFormat.BnbQuantizedLlmInt8b.value}") class LoRALyCORISConfig(LoRAConfigBase): @@ -470,7 +469,7 @@ def get_model_discriminator_value(v: Any) -> str: Annotated[LoRALyCORISConfig, LoRALyCORISConfig.get_tag()], Annotated[LoRADiffusersConfig, LoRADiffusersConfig.get_tag()], Annotated[T5EncoderConfig, T5EncoderConfig.get_tag()], - Annotated[T5Encoder8bConfig, T5Encoder8bConfig.get_tag()], + Annotated[T5EncoderBnbQuantizedLlmInt8bConfig, T5EncoderBnbQuantizedLlmInt8bConfig.get_tag()], Annotated[TextualInversionFileConfig, TextualInversionFileConfig.get_tag()], Annotated[TextualInversionFolderConfig, TextualInversionFolderConfig.get_tag()], Annotated[IPAdapterInvokeAIConfig, IPAdapterInvokeAIConfig.get_tag()], diff --git a/invokeai/backend/model_manager/load/model_loaders/flux.py b/invokeai/backend/model_manager/load/model_loaders/flux.py index f3e44fc2218..40c77f59829 100644 --- a/invokeai/backend/model_manager/load/model_loaders/flux.py +++ b/invokeai/backend/model_manager/load/model_loaders/flux.py @@ -27,22 +27,21 @@ CLIPEmbedDiffusersConfig, MainBnbQuantized4bCheckpointConfig, MainCheckpointConfig, - T5Encoder8bConfig, + T5EncoderBnbQuantizedLlmInt8bConfig, T5EncoderConfig, VAECheckpointConfig, ) from invokeai.backend.model_manager.load.load_default import ModelLoader from invokeai.backend.model_manager.load.model_loader_registry import ModelLoaderRegistry -from invokeai.backend.quantization.bnb_llm_int8 import quantize_model_llm_int8 -from invokeai.backend.quantization.bnb_nf4 import quantize_model_nf4 from invokeai.backend.util.silence_warnings import SilenceWarnings try: + from invokeai.backend.quantization.bnb_llm_int8 import quantize_model_llm_int8 from invokeai.backend.quantization.bnb_nf4 import quantize_model_nf4 - bnb_nf4_available = True + bnb_available = True except ImportError: - bnb_nf4_available = False + bnb_available = False app_config = get_config() @@ -100,8 +99,8 @@ def _load_model( ) -@ModelLoaderRegistry.register(base=BaseModelType.Any, type=ModelType.T5Encoder, format=ModelFormat.T5Encoder8b) -class T5Encoder8bCheckpointModel(ModelLoader): +@ModelLoaderRegistry.register(base=BaseModelType.Any, type=ModelType.T5Encoder, format=ModelFormat.BnbQuantizedLlmInt8b) +class BnbQuantizedLlmInt8bCheckpointModel(ModelLoader): """Class to load main models.""" def _load_model( @@ -109,9 +108,12 @@ def _load_model( config: AnyModelConfig, submodel_type: Optional[SubModelType] = None, ) -> AnyModel: - if not isinstance(config, T5Encoder8bConfig): - raise ValueError("Only T5Encoder8bConfig models are currently supported here.") - + if not isinstance(config, T5EncoderBnbQuantizedLlmInt8bConfig): + raise ValueError("Only T5EncoderBnbQuantizedLlmInt8bConfig models are currently supported here.") + if not bnb_available: + raise ImportError( + "The bnb modules are not available. Please install bitsandbytes if available on your platform." + ) match submodel_type: case SubModelType.Tokenizer2: return T5Tokenizer.from_pretrained(Path(config.path) / "tokenizer_2", max_length=512) @@ -241,9 +243,9 @@ def _load_from_singlefile( flux_conf: Any, ) -> AnyModel: assert isinstance(config, MainBnbQuantized4bCheckpointConfig) - if not bnb_nf4_available: + if not bnb_available: raise ImportError( - "The bnb_nf4 module is not available. Please install bitsandbytes if available on your platform." + "The bnb modules are not available. Please install bitsandbytes if available on your platform." ) model_path = Path(config.path) dataclass_fields = {f.name for f in fields(FluxParams)} diff --git a/invokeai/backend/model_manager/starter_models.py b/invokeai/backend/model_manager/starter_models.py index 13a22ee219e..69b9c1bd271 100644 --- a/invokeai/backend/model_manager/starter_models.py +++ b/invokeai/backend/model_manager/starter_models.py @@ -66,7 +66,7 @@ class StarterModel(StarterModelWithoutDependencies): source="InvokeAI/t5-v1_1-xxl::bnb_llm_int8", description="T5-XXL text encoder with bitsandbytes LLM.int8() quantization (used in FLUX pipelines). ~5GB", type=ModelType.T5Encoder, - format=ModelFormat.T5Encoder8b, + format=ModelFormat.BnbQuantizedLlmInt8b, ) clip_l_encoder = StarterModel( diff --git a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/AddModelPanel/StarterModels/StartModelsResultItem.tsx b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/AddModelPanel/StarterModels/StartModelsResultItem.tsx index bd6a2b42684..f85b124b21d 100644 --- a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/AddModelPanel/StarterModels/StartModelsResultItem.tsx +++ b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/AddModelPanel/StarterModels/StartModelsResultItem.tsx @@ -15,7 +15,13 @@ export const StarterModelsResultItem = memo(({ result }: Props) => { const _allSources = [ { source: result.source, - config: { name: result.name, description: result.description, type: result.type, base: result.base, format: result.format }, + config: { + name: result.name, + description: result.description, + type: result.type, + base: result.base, + format: result.format, + }, }, ]; if (result.dependencies) { diff --git a/invokeai/frontend/web/src/features/nodes/components/flow/nodes/Invocation/fields/inputs/T5EncoderModelFieldInputComponent.tsx b/invokeai/frontend/web/src/features/nodes/components/flow/nodes/Invocation/fields/inputs/T5EncoderModelFieldInputComponent.tsx index d92163c9c31..72b60bcee96 100644 --- a/invokeai/frontend/web/src/features/nodes/components/flow/nodes/Invocation/fields/inputs/T5EncoderModelFieldInputComponent.tsx +++ b/invokeai/frontend/web/src/features/nodes/components/flow/nodes/Invocation/fields/inputs/T5EncoderModelFieldInputComponent.tsx @@ -6,7 +6,7 @@ import type { T5EncoderModelFieldInputInstance, T5EncoderModelFieldInputTemplate import { memo, useCallback } from 'react'; import { useTranslation } from 'react-i18next'; import { useT5EncoderModels } from 'services/api/hooks/modelsByType'; -import type { T5Encoder8bModelConfig, T5EncoderModelConfig } from 'services/api/types'; +import type { T5EncoderBnbQuantizedLlmInt8bModelConfig, T5EncoderModelConfig } from 'services/api/types'; import type { FieldComponentProps } from './types'; @@ -19,7 +19,7 @@ const T5EncoderModelFieldInputComponent = (props: Props) => { const dispatch = useAppDispatch(); const [modelConfigs, { isLoading }] = useT5EncoderModels(); const _onChange = useCallback( - (value: T5Encoder8bModelConfig | T5EncoderModelConfig | null) => { + (value: T5EncoderBnbQuantizedLlmInt8bModelConfig | T5EncoderModelConfig | null) => { if (!value) { return; } diff --git a/invokeai/frontend/web/src/services/api/schema.ts b/invokeai/frontend/web/src/services/api/schema.ts index 2b506759bd8..88c767d1a70 100644 --- a/invokeai/frontend/web/src/services/api/schema.ts +++ b/invokeai/frontend/web/src/services/api/schema.ts @@ -10797,7 +10797,7 @@ export type components = { * @description Storage format of model. * @enum {string} */ - ModelFormat: "diffusers" | "checkpoint" | "lycoris" | "onnx" | "olive" | "embedding_file" | "embedding_folder" | "invokeai" | "t5_encoder" | "t5_encoder_8b" | "t5_encoder_4b" | "bnb_quantized_nf4b"; + ModelFormat: "diffusers" | "checkpoint" | "lycoris" | "onnx" | "olive" | "embedding_file" | "embedding_folder" | "invokeai" | "t5_encoder" | "bnb_quantized_int8b" | "bnb_quantized_nf4b"; /** ModelIdentifierField */ ModelIdentifierField: { /** @@ -11097,7 +11097,7 @@ export type components = { * Config Out * @description After successful installation, this will hold the configuration object. */ - config_out?: (components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["T5Encoder8bConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]) | null; + config_out?: (components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["T5EncoderBnbQuantizedLlmInt8bConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]) | null; /** * Inplace * @description Leave model in its current location; otherwise install under models directory @@ -11183,7 +11183,7 @@ export type components = { * Config * @description The model's config */ - config: components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["T5Encoder8bConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; + config: components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["T5EncoderBnbQuantizedLlmInt8bConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; /** * @description The submodel type, if any * @default null @@ -11204,7 +11204,7 @@ export type components = { * Config * @description The model's config */ - config: components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["T5Encoder8bConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; + config: components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["T5EncoderBnbQuantizedLlmInt8bConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; /** * @description The submodel type, if any * @default null @@ -11345,7 +11345,7 @@ export type components = { */ ModelsList: { /** Models */ - models: (components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["T5Encoder8bConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"])[]; + models: (components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["T5EncoderBnbQuantizedLlmInt8bConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"])[]; }; /** * Multiply Integers @@ -13677,6 +13677,7 @@ export type components = { name: string; base: components["schemas"]["BaseModelType"]; type: components["schemas"]["ModelType"]; + format?: components["schemas"]["ModelFormat"] | null; /** * Is Installed * @default false @@ -13695,6 +13696,7 @@ export type components = { name: string; base: components["schemas"]["BaseModelType"]; type: components["schemas"]["ModelType"]; + format?: components["schemas"]["ModelFormat"] | null; /** * Is Installed * @default false @@ -14456,8 +14458,8 @@ export type components = { */ type: "t2i_adapter_output"; }; - /** T5Encoder8bConfig */ - T5Encoder8bConfig: { + /** T5EncoderBnbQuantizedLlmInt8bConfig */ + T5EncoderBnbQuantizedLlmInt8bConfig: { /** * Key * @description A unique key for this model. @@ -14511,11 +14513,11 @@ export type components = { type: "t5_encoder"; /** * Format - * @default t5_encoder_8b + * @default bnb_quantized_int8b * @constant * @enum {string} */ - format: "t5_encoder_8b"; + format: "bnb_quantized_int8b"; }; /** T5EncoderConfig */ T5EncoderConfig: { @@ -15807,7 +15809,7 @@ export interface operations { [name: string]: unknown; }; content: { - "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["T5Encoder8bConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; + "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["T5EncoderBnbQuantizedLlmInt8bConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; }; }; /** @description Validation Error */ @@ -15839,7 +15841,7 @@ export interface operations { [name: string]: unknown; }; content: { - "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["T5Encoder8bConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; + "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["T5EncoderBnbQuantizedLlmInt8bConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; }; }; /** @description Bad request */ @@ -15936,7 +15938,7 @@ export interface operations { [name: string]: unknown; }; content: { - "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["T5Encoder8bConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; + "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["T5EncoderBnbQuantizedLlmInt8bConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; }; }; /** @description Bad request */ @@ -16436,7 +16438,7 @@ export interface operations { [name: string]: unknown; }; content: { - "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["T5Encoder8bConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; + "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["T5EncoderBnbQuantizedLlmInt8bConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]; }; }; /** @description Bad request */ diff --git a/invokeai/frontend/web/src/services/api/types.ts b/invokeai/frontend/web/src/services/api/types.ts index 9c75b85ab90..d7df8967b83 100644 --- a/invokeai/frontend/web/src/services/api/types.ts +++ b/invokeai/frontend/web/src/services/api/types.ts @@ -53,7 +53,7 @@ export type IPAdapterModelConfig = S['IPAdapterInvokeAIConfig'] | S['IPAdapterCh export type T2IAdapterModelConfig = S['T2IAdapterConfig']; type ClipEmbedModelConfig = S['CLIPEmbedDiffusersConfig']; export type T5EncoderModelConfig = S['T5EncoderConfig']; -export type T5Encoder8bModelConfig = S['T5Encoder8bConfig']; +export type T5EncoderBnbQuantizedLlmInt8bModelConfig = S['T5EncoderBnbQuantizedLlmInt8bConfig']; export type SpandrelImageToImageModelConfig = S['SpandrelImageToImageConfig']; type TextualInversionModelConfig = S['TextualInversionFileConfig'] | S['TextualInversionFolderConfig']; type DiffusersModelConfig = S['MainDiffusersConfig']; @@ -66,7 +66,7 @@ export type AnyModelConfig = | ControlNetModelConfig | IPAdapterModelConfig | T5EncoderModelConfig - | T5Encoder8bModelConfig + | T5EncoderBnbQuantizedLlmInt8bModelConfig | ClipEmbedModelConfig | T2IAdapterModelConfig | SpandrelImageToImageModelConfig @@ -96,7 +96,7 @@ export const isT2IAdapterModelConfig = (config: AnyModelConfig): config is T2IAd export const isT5EncoderModelConfig = ( config: AnyModelConfig -): config is T5EncoderModelConfig | T5Encoder8bModelConfig => { +): config is T5EncoderModelConfig | T5EncoderBnbQuantizedLlmInt8bModelConfig => { return config.type === 't5_encoder'; }; From 1d6c83b95306f12f04fbfbfbab55e2322b5ef03c Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Fri, 23 Aug 2024 19:14:23 +0000 Subject: [PATCH 094/113] Switch the CLIP-L start model to use our hosted version - which is much smaller. --- invokeai/backend/model_manager/load/model_loaders/flux.py | 4 ++-- invokeai/backend/model_manager/starter_models.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/invokeai/backend/model_manager/load/model_loaders/flux.py b/invokeai/backend/model_manager/load/model_loaders/flux.py index 40c77f59829..79613b7602f 100644 --- a/invokeai/backend/model_manager/load/model_loaders/flux.py +++ b/invokeai/backend/model_manager/load/model_loaders/flux.py @@ -90,9 +90,9 @@ def _load_model( match submodel_type: case SubModelType.Tokenizer: - return CLIPTokenizer.from_pretrained(config.path) + return CLIPTokenizer.from_pretrained(Path(config.path) / "tokenizer") case SubModelType.TextEncoder: - return CLIPTextModel.from_pretrained(config.path) + return CLIPTextModel.from_pretrained(Path(config.path) / "text_encoder") raise ValueError( f"Only Tokenizer and TextEncoder submodels are currently supported. Received: {submodel_type.value if submodel_type else 'None'}" diff --git a/invokeai/backend/model_manager/starter_models.py b/invokeai/backend/model_manager/starter_models.py index 69b9c1bd271..d08fc9fc978 100644 --- a/invokeai/backend/model_manager/starter_models.py +++ b/invokeai/backend/model_manager/starter_models.py @@ -72,8 +72,8 @@ class StarterModel(StarterModelWithoutDependencies): clip_l_encoder = StarterModel( name="clip-vit-large-patch14", base=BaseModelType.Any, - source="openai/clip-vit-large-patch14", - description="CLIP-L text encoder (used in FLUX pipelines). ~3GB", + source="InvokeAI/clip-vit-large-patch14-text-encoder::bfloat16", + description="CLIP-L text encoder (used in FLUX pipelines). ~250MB", type=ModelType.CLIPEmbed, ) From 1413ff94c441be2d1cabb811d5371cfa86c24d63 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Fri, 23 Aug 2024 20:28:45 +0000 Subject: [PATCH 095/113] Replace swish() with torch.nn.functional.silu(h). They are functionally equivalent, but in my test VAE deconding was ~8% faster after the change. --- invokeai/backend/flux/modules/autoencoder.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/invokeai/backend/flux/modules/autoencoder.py b/invokeai/backend/flux/modules/autoencoder.py index ae003261e7f..237769aba71 100644 --- a/invokeai/backend/flux/modules/autoencoder.py +++ b/invokeai/backend/flux/modules/autoencoder.py @@ -20,10 +20,6 @@ class AutoEncoderParams: shift_factor: float -def swish(x: Tensor) -> Tensor: - return x * torch.sigmoid(x) - - class AttnBlock(nn.Module): def __init__(self, in_channels: int): super().__init__() @@ -71,11 +67,11 @@ def __init__(self, in_channels: int, out_channels: int): def forward(self, x): h = x h = self.norm1(h) - h = swish(h) + h = torch.nn.functional.silu(h) h = self.conv1(h) h = self.norm2(h) - h = swish(h) + h = torch.nn.functional.silu(h) h = self.conv2(h) if self.in_channels != self.out_channels: @@ -177,7 +173,7 @@ def forward(self, x: Tensor) -> Tensor: h = self.mid.block_2(h) # end h = self.norm_out(h) - h = swish(h) + h = torch.nn.functional.silu(h) h = self.conv_out(h) return h @@ -256,7 +252,7 @@ def forward(self, z: Tensor) -> Tensor: # end h = self.norm_out(h) - h = swish(h) + h = torch.nn.functional.silu(h) h = self.conv_out(h) return h From bd1b37ddbbe1feb831a75255ebbae12aa98e142b Mon Sep 17 00:00:00 2001 From: Brandon Rising Date: Sat, 24 Aug 2024 11:01:16 -0400 Subject: [PATCH 096/113] Setup scaffolding for in progress images and add ability to cancel the flux node --- .../app/invocations/flux_text_to_image.py | 34 ++++++++ invokeai/backend/flux/sampling.py | 14 +++ invokeai/backend/flux/util.py | 86 +++++++++++++++++++ .../scripts/load_flux_model_bnb_llm_int8.py | 4 +- .../scripts/load_flux_model_bnb_nf4.py | 4 +- 5 files changed, 138 insertions(+), 4 deletions(-) create mode 100644 invokeai/backend/flux/util.py diff --git a/invokeai/app/invocations/flux_text_to_image.py b/invokeai/app/invocations/flux_text_to_image.py index b68bb91513c..43cf1f9d650 100644 --- a/invokeai/app/invocations/flux_text_to_image.py +++ b/invokeai/app/invocations/flux_text_to_image.py @@ -1,3 +1,4 @@ +import numpy as np import torch from einops import rearrange from PIL import Image @@ -13,12 +14,15 @@ ) from invokeai.app.invocations.model import TransformerField, VAEField from invokeai.app.invocations.primitives import ImageOutput +from invokeai.app.services.session_processor.session_processor_common import CanceledException, ProgressImage from invokeai.app.services.shared.invocation_context import InvocationContext +from invokeai.app.util.step_callback import PipelineIntermediateState from invokeai.backend.flux.model import Flux from invokeai.backend.flux.modules.autoencoder import AutoEncoder from invokeai.backend.flux.sampling import denoise, get_noise, get_schedule, prepare_latent_img_patches, unpack from invokeai.backend.stable_diffusion.diffusion.conditioning_data import FLUXConditioningInfo from invokeai.backend.util.devices import TorchDevice +from invokeai.backend.util.util import image_to_dataURL @invocation( @@ -108,6 +112,35 @@ def _run_diffusion( with transformer_info as transformer: assert isinstance(transformer, Flux) + def step_callback(img: torch.Tensor, state: PipelineIntermediateState) -> None: + if context.util.is_canceled(): + raise CanceledException + + # TODO: Make this look like the image + latent_image = unpack(img.float(), self.height, self.width) + latent_image = latent_image.squeeze() # Remove unnecessary dimensions + flattened_tensor = latent_image.reshape(-1) # Flatten to shape [48*128*128] + + # Create a new tensor of the required shape [255, 255, 3] + latent_image = flattened_tensor[: 255 * 255 * 3].reshape(255, 255, 3) # Reshape to RGB format + + # Convert to a NumPy array and then to a PIL Image + image = Image.fromarray(latent_image.cpu().numpy().astype(np.uint8)) + + (width, height) = image.size + width *= 8 + height *= 8 + + dataURL = image_to_dataURL(image, image_format="JPEG") + + # TODO: move this whole function to invocation context to properly reference these variables + context._services.events.emit_invocation_denoise_progress( + context._data.queue_item, + context._data.invocation, + state, + ProgressImage(dataURL=dataURL, width=width, height=height), + ) + x = denoise( model=transformer, img=img, @@ -116,6 +149,7 @@ def _run_diffusion( txt_ids=txt_ids, vec=clip_embeddings, timesteps=timesteps, + step_callback=step_callback, guidance=self.guidance, ) diff --git a/invokeai/backend/flux/sampling.py b/invokeai/backend/flux/sampling.py index 318a0bcdce9..ab9d41797b8 100644 --- a/invokeai/backend/flux/sampling.py +++ b/invokeai/backend/flux/sampling.py @@ -8,6 +8,7 @@ from torch import Tensor from tqdm import tqdm +from invokeai.app.util.step_callback import PipelineIntermediateState from invokeai.backend.flux.model import Flux from invokeai.backend.flux.modules.conditioner import HFEncoder @@ -108,6 +109,7 @@ def denoise( vec: Tensor, # sampling parameters timesteps: list[float], + step_callback: Callable[[Tensor, PipelineIntermediateState], None], guidance: float = 4.0, ): dtype = model.txt_in.bias.dtype @@ -121,6 +123,7 @@ def denoise( # this is ignored for schnell guidance_vec = torch.full((img.shape[0],), guidance, device=img.device, dtype=img.dtype) + step_count = 0 for t_curr, t_prev in tqdm(list(zip(timesteps[:-1], timesteps[1:], strict=True))): t_vec = torch.full((img.shape[0],), t_curr, dtype=img.dtype, device=img.device) pred = model( @@ -134,6 +137,17 @@ def denoise( ) img = img + (t_prev - t_curr) * pred + step_callback( + img, + PipelineIntermediateState( + step=step_count, + order=0, + total_steps=len(timesteps), + timestep=math.floor(t_curr), + latents=img, + ), + ) + step_count += 1 return img diff --git a/invokeai/backend/flux/util.py b/invokeai/backend/flux/util.py new file mode 100644 index 00000000000..112d7111de2 --- /dev/null +++ b/invokeai/backend/flux/util.py @@ -0,0 +1,86 @@ +# Initially pulled from https://github.com/black-forest-labs/flux + +import os +from dataclasses import dataclass + +from invokeai.backend.flux.model import FluxParams +from invokeai.backend.flux.modules.autoencoder import AutoEncoderParams + + +@dataclass +class ModelSpec: + params: FluxParams + ae_params: AutoEncoderParams + ckpt_path: str | None + ae_path: str | None + repo_id: str | None + repo_flow: str | None + repo_ae: str | None + + +configs = { + "flux-dev": ModelSpec( + repo_id="black-forest-labs/FLUX.1-dev", + repo_flow="flux1-dev.safetensors", + repo_ae="ae.safetensors", + ckpt_path=os.getenv("FLUX_DEV"), + params=FluxParams( + in_channels=64, + vec_in_dim=768, + context_in_dim=4096, + hidden_size=3072, + mlp_ratio=4.0, + num_heads=24, + depth=19, + depth_single_blocks=38, + axes_dim=[16, 56, 56], + theta=10_000, + qkv_bias=True, + guidance_embed=True, + ), + ae_path=os.getenv("AE"), + ae_params=AutoEncoderParams( + resolution=256, + in_channels=3, + ch=128, + out_ch=3, + ch_mult=[1, 2, 4, 4], + num_res_blocks=2, + z_channels=16, + scale_factor=0.3611, + shift_factor=0.1159, + ), + ), + "flux-schnell": ModelSpec( + repo_id="black-forest-labs/FLUX.1-schnell", + repo_flow="flux1-schnell.safetensors", + repo_ae="ae.safetensors", + ckpt_path=os.getenv("FLUX_SCHNELL"), + params=FluxParams( + in_channels=64, + vec_in_dim=768, + context_in_dim=4096, + hidden_size=3072, + mlp_ratio=4.0, + num_heads=24, + depth=19, + depth_single_blocks=38, + axes_dim=[16, 56, 56], + theta=10_000, + qkv_bias=True, + guidance_embed=False, + ), + ae_path=os.getenv("AE"), + ae_params=AutoEncoderParams( + resolution=256, + in_channels=3, + ch=128, + out_ch=3, + ch_mult=[1, 2, 4, 4], + num_res_blocks=2, + z_channels=16, + scale_factor=0.3611, + shift_factor=0.1159, + ), + ), +} diff --git a/invokeai/backend/quantization/scripts/load_flux_model_bnb_llm_int8.py b/invokeai/backend/quantization/scripts/load_flux_model_bnb_llm_int8.py index e8771dca225..286c96b5277 100644 --- a/invokeai/backend/quantization/scripts/load_flux_model_bnb_llm_int8.py +++ b/invokeai/backend/quantization/scripts/load_flux_model_bnb_llm_int8.py @@ -1,8 +1,8 @@ from pathlib import Path import accelerate -from flux.model import Flux -from flux.util import configs as flux_configs +from invokeai.backend.flux.model import Flux +from invokeai.backend.flux.util import configs as flux_configs from safetensors.torch import load_file, save_file from invokeai.backend.quantization.bnb_llm_int8 import quantize_model_llm_int8 diff --git a/invokeai/backend/quantization/scripts/load_flux_model_bnb_nf4.py b/invokeai/backend/quantization/scripts/load_flux_model_bnb_nf4.py index fe88b79d328..5415407a2bd 100644 --- a/invokeai/backend/quantization/scripts/load_flux_model_bnb_nf4.py +++ b/invokeai/backend/quantization/scripts/load_flux_model_bnb_nf4.py @@ -4,10 +4,10 @@ import accelerate import torch -from flux.model import Flux -from flux.util import configs as flux_configs from safetensors.torch import load_file, save_file +from invokeai.backend.flux.model import Flux +from invokeai.backend.flux.util import configs as flux_configs from invokeai.backend.quantization.bnb_nf4 import quantize_model_nf4 From d159fe618e9cb3aaf7013e9f2fd0621c8d4554da Mon Sep 17 00:00:00 2001 From: Brandon Rising Date: Sun, 25 Aug 2024 02:41:13 -0400 Subject: [PATCH 097/113] Remove dependency on flux config files --- invokeai/app/invocations/model.py | 7 +-- invokeai/backend/flux/util.py | 20 +++++++++ .../model_manager/load/model_loaders/flux.py | 43 ++++--------------- invokeai/backend/model_manager/probe.py | 4 +- invokeai/configs/flux/flux1-dev.yaml | 19 -------- invokeai/configs/flux/flux1-schnell.yaml | 19 -------- invokeai/configs/flux/flux1-vae.yaml | 16 ------- 7 files changed, 33 insertions(+), 95 deletions(-) delete mode 100644 invokeai/configs/flux/flux1-dev.yaml delete mode 100644 invokeai/configs/flux/flux1-schnell.yaml delete mode 100644 invokeai/configs/flux/flux1-vae.yaml diff --git a/invokeai/app/invocations/model.py b/invokeai/app/invocations/model.py index 756686b548a..d68f8eaa971 100644 --- a/invokeai/app/invocations/model.py +++ b/invokeai/app/invocations/model.py @@ -11,6 +11,7 @@ invocation, invocation_output, ) +from invokeai.backend.flux.util import max_seq_lengths from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, OutputField, UIType from invokeai.app.services.shared.invocation_context import InvocationContext from invokeai.app.shared.models import FreeUConfig @@ -188,17 +189,13 @@ def invoke(self, context: InvocationContext) -> FluxModelLoaderOutput: vae = self._get_model(context, SubModelType.VAE) transformer_config = context.models.get_config(transformer) assert isinstance(transformer_config, CheckpointConfigBase) - legacy_config_path = context.config.get().legacy_conf_path / transformer_config.config_path - config_path = legacy_config_path.as_posix() - with open(config_path, "r") as stream: - flux_conf = yaml.safe_load(stream) return FluxModelLoaderOutput( transformer=TransformerField(transformer=transformer), clip=CLIPField(tokenizer=tokenizer, text_encoder=clip_encoder, loras=[], skipped_layers=0), t5_encoder=T5EncoderField(tokenizer=tokenizer2, text_encoder=t5_encoder), vae=VAEField(vae=vae), - max_seq_len=flux_conf["max_seq_len"], + max_seq_len=max_seq_lengths[transformer_config.config_path], ) def _get_model(self, context: InvocationContext, submodel: SubModelType) -> ModelIdentifierField: diff --git a/invokeai/backend/flux/util.py b/invokeai/backend/flux/util.py index 112d7111de2..40e0554dcd9 100644 --- a/invokeai/backend/flux/util.py +++ b/invokeai/backend/flux/util.py @@ -2,6 +2,7 @@ import os from dataclasses import dataclass +from typing import Dict, Literal from invokeai.backend.flux.model import FluxParams from invokeai.backend.flux.modules.autoencoder import AutoEncoderParams @@ -18,6 +19,25 @@ class ModelSpec: repo_ae: str | None +max_seq_lengths: Dict[str, Literal[256, 512]] = { + "flux-dev": 512, + "flux-schnell": 256, +} + + +ae_params=AutoEncoderParams( + resolution=256, + in_channels=3, + ch=128, + out_ch=3, + ch_mult=[1, 2, 4, 4], + num_res_blocks=2, + z_channels=16, + scale_factor=0.3611, + shift_factor=0.1159, +) + + configs = { "flux-dev": ModelSpec( repo_id="black-forest-labs/FLUX.1-dev", diff --git a/invokeai/backend/model_manager/load/model_loaders/flux.py b/invokeai/backend/model_manager/load/model_loaders/flux.py index 79613b7602f..063367f30d2 100644 --- a/invokeai/backend/model_manager/load/model_loaders/flux.py +++ b/invokeai/backend/model_manager/load/model_loaders/flux.py @@ -1,19 +1,18 @@ # Copyright (c) 2024, Brandon W. Rising and the InvokeAI Development Team """Class for Flux model loading in InvokeAI.""" -from dataclasses import fields from pathlib import Path -from typing import Any, Optional +from typing import Optional import accelerate import torch -import yaml from safetensors.torch import load_file from transformers import AutoConfig, AutoModelForTextEncoding, CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5Tokenizer from invokeai.app.services.config.config_default import get_config -from invokeai.backend.flux.model import Flux, FluxParams -from invokeai.backend.flux.modules.autoencoder import AutoEncoder, AutoEncoderParams +from invokeai.backend.flux.model import Flux +from invokeai.backend.flux.util import configs, ae_params +from invokeai.backend.flux.modules.autoencoder import AutoEncoder from invokeai.backend.model_manager import ( AnyModel, AnyModelConfig, @@ -58,17 +57,9 @@ def _load_model( if not isinstance(config, VAECheckpointConfig): raise ValueError("Only VAECheckpointConfig models are currently supported here.") model_path = Path(config.path) - legacy_config_path = app_config.legacy_conf_path / config.config_path - config_path = legacy_config_path.as_posix() - with open(config_path, "r") as stream: - flux_conf = yaml.safe_load(stream) - - dataclass_fields = {f.name for f in fields(AutoEncoderParams)} - filtered_data = {k: v for k, v in flux_conf["params"].items() if k in dataclass_fields} - params = AutoEncoderParams(**filtered_data) with SilenceWarnings(): - model = AutoEncoder(params) + model = AutoEncoder(ae_params) sd = load_file(model_path) model.load_state_dict(sd, assign=True) model.to(dtype=self._torch_dtype) @@ -182,14 +173,10 @@ def _load_model( ) -> AnyModel: if not isinstance(config, CheckpointConfigBase): raise ValueError("Only CheckpointConfigBase models are currently supported here.") - legacy_config_path = app_config.legacy_conf_path / config.config_path - config_path = legacy_config_path.as_posix() - with open(config_path, "r") as stream: - flux_conf = yaml.safe_load(stream) match submodel_type: case SubModelType.Transformer: - return self._load_from_singlefile(config, flux_conf) + return self._load_from_singlefile(config) raise ValueError( f"Only Transformer submodels are currently supported. Received: {submodel_type.value if submodel_type else 'None'}" @@ -198,16 +185,12 @@ def _load_model( def _load_from_singlefile( self, config: AnyModelConfig, - flux_conf: Any, ) -> AnyModel: assert isinstance(config, MainCheckpointConfig) model_path = Path(config.path) - dataclass_fields = {f.name for f in fields(FluxParams)} - filtered_data = {k: v for k, v in flux_conf["params"].items() if k in dataclass_fields} - params = FluxParams(**filtered_data) with SilenceWarnings(): - model = Flux(params) + model = Flux(configs[config.config_path].params) sd = load_file(model_path) model.load_state_dict(sd, assign=True) return model @@ -224,14 +207,10 @@ def _load_model( ) -> AnyModel: if not isinstance(config, CheckpointConfigBase): raise ValueError("Only CheckpointConfigBase models are currently supported here.") - legacy_config_path = app_config.legacy_conf_path / config.config_path - config_path = legacy_config_path.as_posix() - with open(config_path, "r") as stream: - flux_conf = yaml.safe_load(stream) match submodel_type: case SubModelType.Transformer: - return self._load_from_singlefile(config, flux_conf) + return self._load_from_singlefile(config) raise ValueError( f"Only Transformer submodels are currently supported. Received: {submodel_type.value if submodel_type else 'None'}" @@ -240,7 +219,6 @@ def _load_model( def _load_from_singlefile( self, config: AnyModelConfig, - flux_conf: Any, ) -> AnyModel: assert isinstance(config, MainBnbQuantized4bCheckpointConfig) if not bnb_available: @@ -248,13 +226,10 @@ def _load_from_singlefile( "The bnb modules are not available. Please install bitsandbytes if available on your platform." ) model_path = Path(config.path) - dataclass_fields = {f.name for f in fields(FluxParams)} - filtered_data = {k: v for k, v in flux_conf["params"].items() if k in dataclass_fields} - params = FluxParams(**filtered_data) with SilenceWarnings(): with accelerate.init_empty_weights(): - model = Flux(params) + model = Flux(configs[config.config_path].params) model = quantize_model_nf4(model, modules_to_not_convert=set(), compute_dtype=torch.bfloat16) sd = load_file(model_path) model.load_state_dict(sd, assign=True) diff --git a/invokeai/backend/model_manager/probe.py b/invokeai/backend/model_manager/probe.py index e552b1cf1e8..0ad537a5f36 100644 --- a/invokeai/backend/model_manager/probe.py +++ b/invokeai/backend/model_manager/probe.py @@ -329,9 +329,9 @@ def _get_checkpoint_config_path( checkpoint = ModelProbe._scan_and_load_checkpoint(model_path) state_dict = checkpoint.get("state_dict") or checkpoint if "guidance_in.out_layer.weight" in state_dict: - config_file = "flux/flux1-dev.yaml" + config_file = "flux-dev" else: - config_file = "flux/flux1-schnell.yaml" + config_file = "flux-schnell" else: config_file = LEGACY_CONFIGS[base_type][variant_type] if isinstance(config_file, dict): # need another tier for sd-2.x models diff --git a/invokeai/configs/flux/flux1-dev.yaml b/invokeai/configs/flux/flux1-dev.yaml deleted file mode 100644 index 40a5b26a973..00000000000 --- a/invokeai/configs/flux/flux1-dev.yaml +++ /dev/null @@ -1,19 +0,0 @@ -repo_id: "black-forest-labs/FLUX.1-dev" -repo_ae: "ae.safetensors" -max_seq_len: 512 -params: - in_channels: 64 - vec_in_dim: 768 - context_in_dim: 4096 - hidden_size: 3072 - mlp_ratio: 4.0 - num_heads: 24 - depth: 19 - depth_single_blocks: 38 - axes_dim: - - 16 - - 56 - - 56 - theta: 10_000 - qkv_bias: True - guidance_embed: True diff --git a/invokeai/configs/flux/flux1-schnell.yaml b/invokeai/configs/flux/flux1-schnell.yaml deleted file mode 100644 index 2e9208c2c4a..00000000000 --- a/invokeai/configs/flux/flux1-schnell.yaml +++ /dev/null @@ -1,19 +0,0 @@ -repo_id: "black-forest-labs/FLUX.1-schnell" -repo_ae: "ae.safetensors" -max_seq_len: 256 -params: - in_channels: 64 - vec_in_dim: 768 - context_in_dim: 4096 - hidden_size: 3072 - mlp_ratio: 4.0 - num_heads: 24 - depth: 19 - depth_single_blocks: 38 - axes_dim: - - 16 - - 56 - - 56 - theta: 10_000 - qkv_bias: True - guidance_embed: False diff --git a/invokeai/configs/flux/flux1-vae.yaml b/invokeai/configs/flux/flux1-vae.yaml deleted file mode 100644 index 2949378a2ba..00000000000 --- a/invokeai/configs/flux/flux1-vae.yaml +++ /dev/null @@ -1,16 +0,0 @@ -repo_id: "black-forest-labs/FLUX.1-schnell" -repo_path: "ae.safetensors" -params: - resolution: 256 - in_channels: 3 - ch: 128 - out_ch: 3 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - z_channels: 16 - scale_factor: 0.3611 - shift_factor: 0.1159 \ No newline at end of file From 877b88e59e97f0bd34b7e8c59755204984f741d6 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Mon, 26 Aug 2024 14:09:02 +0000 Subject: [PATCH 098/113] ruff --- invokeai/app/invocations/model.py | 3 +-- invokeai/backend/model_manager/load/model_loaders/flux.py | 2 +- .../quantization/scripts/load_flux_model_bnb_llm_int8.py | 4 ++-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/invokeai/app/invocations/model.py b/invokeai/app/invocations/model.py index d68f8eaa971..88874f302a7 100644 --- a/invokeai/app/invocations/model.py +++ b/invokeai/app/invocations/model.py @@ -1,7 +1,6 @@ import copy from typing import List, Literal, Optional -import yaml from pydantic import BaseModel, Field from invokeai.app.invocations.baseinvocation import ( @@ -11,10 +10,10 @@ invocation, invocation_output, ) -from invokeai.backend.flux.util import max_seq_lengths from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, OutputField, UIType from invokeai.app.services.shared.invocation_context import InvocationContext from invokeai.app.shared.models import FreeUConfig +from invokeai.backend.flux.util import max_seq_lengths from invokeai.backend.model_manager.config import ( AnyModelConfig, BaseModelType, diff --git a/invokeai/backend/model_manager/load/model_loaders/flux.py b/invokeai/backend/model_manager/load/model_loaders/flux.py index 063367f30d2..33090ccc10e 100644 --- a/invokeai/backend/model_manager/load/model_loaders/flux.py +++ b/invokeai/backend/model_manager/load/model_loaders/flux.py @@ -11,8 +11,8 @@ from invokeai.app.services.config.config_default import get_config from invokeai.backend.flux.model import Flux -from invokeai.backend.flux.util import configs, ae_params from invokeai.backend.flux.modules.autoencoder import AutoEncoder +from invokeai.backend.flux.util import ae_params, configs from invokeai.backend.model_manager import ( AnyModel, AnyModelConfig, diff --git a/invokeai/backend/quantization/scripts/load_flux_model_bnb_llm_int8.py b/invokeai/backend/quantization/scripts/load_flux_model_bnb_llm_int8.py index 286c96b5277..51c787d8ef3 100644 --- a/invokeai/backend/quantization/scripts/load_flux_model_bnb_llm_int8.py +++ b/invokeai/backend/quantization/scripts/load_flux_model_bnb_llm_int8.py @@ -1,10 +1,10 @@ from pathlib import Path import accelerate -from invokeai.backend.flux.model import Flux -from invokeai.backend.flux.util import configs as flux_configs from safetensors.torch import load_file, save_file +from invokeai.backend.flux.model import Flux +from invokeai.backend.flux.util import configs as flux_configs from invokeai.backend.quantization.bnb_llm_int8 import quantize_model_llm_int8 from invokeai.backend.quantization.scripts.load_flux_model_bnb_nf4 import log_time From ae94e48afe0ab1714a8d3afcfebbcaf05b7dbbed Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Mon, 26 Aug 2024 14:30:42 +0000 Subject: [PATCH 099/113] Remove flux repo dependency --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 0c9e79b5831..848e67d57f8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,7 +39,6 @@ dependencies = [ "compel==2.0.2", "controlnet-aux==0.0.7", "diffusers[torch]==0.27.2", - "flux @ git+https://github.com/black-forest-labs/flux.git@c23ae247225daba30fbd56058d247cc1b1fc20a3", "invisible-watermark==0.2.0", # needed to install SDXL base and refiner using their repo_ids "mediapipe==0.10.7", # needed for "mediapipeface" controlnet model "numpy==1.26.4", # >1.24.0 is needed to use the 'strict' argument to np.testing.assert_array_equal() From f046a38d1db8c82c5b3118678ba54f0c90c89c7e Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Mon, 26 Aug 2024 14:47:38 +0000 Subject: [PATCH 100/113] Downgrade accelerate and huggingface-hub deps to original versions. --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 848e67d57f8..2cbd8298570 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,7 @@ classifiers = [ ] dependencies = [ # Core generation dependencies, pinned for reproducible builds. - "accelerate==0.33.0", + "accelerate==0.30.1", "bitsandbytes==0.43.3; sys_platform!='darwin'", "clip_anytorch==2.6.0", # replacing "clip @ https://github.com/openai/CLIP/archive/eaa22acb90a5876642d0507623e859909230a52d.zip", "compel==2.0.2", @@ -60,7 +60,7 @@ dependencies = [ # Core application dependencies, pinned for reproducible builds. "fastapi-events==0.11.1", "fastapi==0.111.0", - "huggingface-hub==0.24.5", + "huggingface-hub==0.23.1", "pydantic-settings==2.2.1", "pydantic==2.7.2", "python-socketio==5.11.1", From 9f6f404090849cbe05c814d3530dad22c8df534a Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Mon, 26 Aug 2024 14:48:15 +0000 Subject: [PATCH 101/113] ruff format --- invokeai/backend/flux/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/invokeai/backend/flux/util.py b/invokeai/backend/flux/util.py index 40e0554dcd9..748c79e11d8 100644 --- a/invokeai/backend/flux/util.py +++ b/invokeai/backend/flux/util.py @@ -25,7 +25,7 @@ class ModelSpec: } -ae_params=AutoEncoderParams( +ae_params = AutoEncoderParams( resolution=256, in_channels=3, ch=128, From 9a530c7eda565edca729926a7b3e941ed7bdfe0c Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Mon, 26 Aug 2024 15:23:35 +0000 Subject: [PATCH 102/113] Remove outdated TODO. --- invokeai/backend/model_manager/load/model_loaders/flux.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/invokeai/backend/model_manager/load/model_loaders/flux.py b/invokeai/backend/model_manager/load/model_loaders/flux.py index 33090ccc10e..53119f6df07 100644 --- a/invokeai/backend/model_manager/load/model_loaders/flux.py +++ b/invokeai/backend/model_manager/load/model_loaders/flux.py @@ -153,9 +153,7 @@ def _load_model( case SubModelType.Tokenizer2: return T5Tokenizer.from_pretrained(Path(config.path) / "tokenizer_2", max_length=512) case SubModelType.TextEncoder2: - return T5EncoderModel.from_pretrained( - Path(config.path) / "text_encoder_2" - ) # TODO: Fix hf subfolder install + return T5EncoderModel.from_pretrained(Path(config.path) / "text_encoder_2") raise ValueError( f"Only Tokenizer and TextEncoder submodels are currently supported. Received: {submodel_type.value if submodel_type else 'None'}" From bb806970bc0035cb14a73a0b307468fbf0910161 Mon Sep 17 00:00:00 2001 From: Brandon Rising Date: Mon, 26 Aug 2024 12:54:28 -0400 Subject: [PATCH 103/113] Only install starter models if not already installed --- .../StarterModels/StartModelsResultItem.tsx | 9 +++++++-- .../StarterModels/StarterModelsForm.tsx | 20 ++++++++++++++++--- .../StarterModels/StarterModelsResults.tsx | 6 ++++-- 3 files changed, 28 insertions(+), 7 deletions(-) diff --git a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/AddModelPanel/StarterModels/StartModelsResultItem.tsx b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/AddModelPanel/StarterModels/StartModelsResultItem.tsx index f85b124b21d..81913f3e8ee 100644 --- a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/AddModelPanel/StarterModels/StartModelsResultItem.tsx +++ b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/AddModelPanel/StarterModels/StartModelsResultItem.tsx @@ -5,11 +5,13 @@ import { memo, useCallback, useMemo } from 'react'; import { useTranslation } from 'react-i18next'; import { PiPlusBold } from 'react-icons/pi'; import type { GetStarterModelsResponse } from 'services/api/endpoints/models'; +import type { AnyModelConfig } from 'services/api/types'; type Props = { result: GetStarterModelsResponse[number]; + modelList: AnyModelConfig[]; }; -export const StarterModelsResultItem = memo(({ result }: Props) => { +export const StarterModelsResultItem = memo(({ result, modelList }: Props) => { const { t } = useTranslation(); const allSources = useMemo(() => { const _allSources = [ @@ -38,9 +40,12 @@ export const StarterModelsResultItem = memo(({ result }: Props) => { const onClick = useCallback(() => { for (const { config, source } of allSources) { + if (modelList.some((mc) => config.base === mc.base && config.name === mc.name && config.type === mc.type)) { + continue; + } installModel({ config, source }); } - }, [allSources, installModel]); + }, [modelList, allSources, installModel]); return ( diff --git a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/AddModelPanel/StarterModels/StarterModelsForm.tsx b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/AddModelPanel/StarterModels/StarterModelsForm.tsx index 837ef5c63b8..eaf2cb534ef 100644 --- a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/AddModelPanel/StarterModels/StarterModelsForm.tsx +++ b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/AddModelPanel/StarterModels/StarterModelsForm.tsx @@ -1,17 +1,31 @@ import { Flex } from '@invoke-ai/ui-library'; +import { EMPTY_ARRAY } from 'app/store/constants'; import { FetchingModelsLoader } from 'features/modelManagerV2/subpanels/ModelManagerPanel/FetchingModelsLoader'; -import { memo } from 'react'; -import { useGetStarterModelsQuery } from 'services/api/endpoints/models'; +import { memo, useMemo } from 'react'; +import { + modelConfigsAdapterSelectors, + useGetModelConfigsQuery, + useGetStarterModelsQuery, +} from 'services/api/endpoints/models'; import { StarterModelsResults } from './StarterModelsResults'; export const StarterModelsForm = memo(() => { const { isLoading, data } = useGetStarterModelsQuery(); + const { data: modelListRes } = useGetModelConfigsQuery(); + + const modelList = useMemo(() => { + if (!modelListRes) { + return EMPTY_ARRAY; + } + + return modelConfigsAdapterSelectors.selectAll(modelListRes); + }, [modelListRes]); return ( {isLoading && } - {data && } + {data && } ); }); diff --git a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/AddModelPanel/StarterModels/StarterModelsResults.tsx b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/AddModelPanel/StarterModels/StarterModelsResults.tsx index e593ee5fc3c..c443171060e 100644 --- a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/AddModelPanel/StarterModels/StarterModelsResults.tsx +++ b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/AddModelPanel/StarterModels/StarterModelsResults.tsx @@ -5,14 +5,16 @@ import { memo, useCallback, useMemo, useState } from 'react'; import { useTranslation } from 'react-i18next'; import { PiXBold } from 'react-icons/pi'; import type { GetStarterModelsResponse } from 'services/api/endpoints/models'; +import type { AnyModelConfig } from 'services/api/types'; import { StarterModelsResultItem } from './StartModelsResultItem'; type StarterModelsResultsProps = { results: NonNullable; + modelList: AnyModelConfig[]; }; -export const StarterModelsResults = memo(({ results }: StarterModelsResultsProps) => { +export const StarterModelsResults = memo(({ results, modelList }: StarterModelsResultsProps) => { const { t } = useTranslation(); const [searchTerm, setSearchTerm] = useState(''); @@ -72,7 +74,7 @@ export const StarterModelsResults = memo(({ results }: StarterModelsResultsProps {filteredResults.map((result) => ( - + ))} From 642a953df0bf5bd736440ed841cbffed493e799a Mon Sep 17 00:00:00 2001 From: Brandon Rising Date: Mon, 26 Aug 2024 12:59:36 -0400 Subject: [PATCH 104/113] Remove in progress images until we're able to make the valuable --- .../app/invocations/flux_text_to_image.py | 48 +++++++++---------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/invokeai/app/invocations/flux_text_to_image.py b/invokeai/app/invocations/flux_text_to_image.py index 43cf1f9d650..19af5baae63 100644 --- a/invokeai/app/invocations/flux_text_to_image.py +++ b/invokeai/app/invocations/flux_text_to_image.py @@ -116,30 +116,30 @@ def step_callback(img: torch.Tensor, state: PipelineIntermediateState) -> None: if context.util.is_canceled(): raise CanceledException - # TODO: Make this look like the image - latent_image = unpack(img.float(), self.height, self.width) - latent_image = latent_image.squeeze() # Remove unnecessary dimensions - flattened_tensor = latent_image.reshape(-1) # Flatten to shape [48*128*128] - - # Create a new tensor of the required shape [255, 255, 3] - latent_image = flattened_tensor[: 255 * 255 * 3].reshape(255, 255, 3) # Reshape to RGB format - - # Convert to a NumPy array and then to a PIL Image - image = Image.fromarray(latent_image.cpu().numpy().astype(np.uint8)) - - (width, height) = image.size - width *= 8 - height *= 8 - - dataURL = image_to_dataURL(image, image_format="JPEG") - - # TODO: move this whole function to invocation context to properly reference these variables - context._services.events.emit_invocation_denoise_progress( - context._data.queue_item, - context._data.invocation, - state, - ProgressImage(dataURL=dataURL, width=width, height=height), - ) + # TODO: Make this look like the image before re-enabling + # latent_image = unpack(img.float(), self.height, self.width) + # latent_image = latent_image.squeeze() # Remove unnecessary dimensions + # flattened_tensor = latent_image.reshape(-1) # Flatten to shape [48*128*128] + + # # Create a new tensor of the required shape [255, 255, 3] + # latent_image = flattened_tensor[: 255 * 255 * 3].reshape(255, 255, 3) # Reshape to RGB format + + # # Convert to a NumPy array and then to a PIL Image + # image = Image.fromarray(latent_image.cpu().numpy().astype(np.uint8)) + + # (width, height) = image.size + # width *= 8 + # height *= 8 + + # dataURL = image_to_dataURL(image, image_format="JPEG") + + # # TODO: move this whole function to invocation context to properly reference these variables + # context._services.events.emit_invocation_denoise_progress( + # context._data.queue_item, + # context._data.invocation, + # state, + # ProgressImage(dataURL=dataURL, width=width, height=height), + # ) x = denoise( model=transformer, From a90d09886b5292501a3a82e149515e669fbb0ba3 Mon Sep 17 00:00:00 2001 From: Brandon Rising Date: Mon, 26 Aug 2024 13:07:31 -0400 Subject: [PATCH 105/113] Remove no longer used code in the flux denoise function --- invokeai/app/invocations/flux_text_to_image.py | 2 +- invokeai/backend/flux/sampling.py | 15 ++------------- 2 files changed, 3 insertions(+), 14 deletions(-) diff --git a/invokeai/app/invocations/flux_text_to_image.py b/invokeai/app/invocations/flux_text_to_image.py index 19af5baae63..33a09da9bfb 100644 --- a/invokeai/app/invocations/flux_text_to_image.py +++ b/invokeai/app/invocations/flux_text_to_image.py @@ -112,7 +112,7 @@ def _run_diffusion( with transformer_info as transformer: assert isinstance(transformer, Flux) - def step_callback(img: torch.Tensor, state: PipelineIntermediateState) -> None: + def step_callback() -> None: if context.util.is_canceled(): raise CanceledException diff --git a/invokeai/backend/flux/sampling.py b/invokeai/backend/flux/sampling.py index ab9d41797b8..5001959e505 100644 --- a/invokeai/backend/flux/sampling.py +++ b/invokeai/backend/flux/sampling.py @@ -109,7 +109,7 @@ def denoise( vec: Tensor, # sampling parameters timesteps: list[float], - step_callback: Callable[[Tensor, PipelineIntermediateState], None], + step_callback: Callable[[], None], guidance: float = 4.0, ): dtype = model.txt_in.bias.dtype @@ -123,7 +123,6 @@ def denoise( # this is ignored for schnell guidance_vec = torch.full((img.shape[0],), guidance, device=img.device, dtype=img.dtype) - step_count = 0 for t_curr, t_prev in tqdm(list(zip(timesteps[:-1], timesteps[1:], strict=True))): t_vec = torch.full((img.shape[0],), t_curr, dtype=img.dtype, device=img.device) pred = model( @@ -137,17 +136,7 @@ def denoise( ) img = img + (t_prev - t_curr) * pred - step_callback( - img, - PipelineIntermediateState( - step=step_count, - order=0, - total_steps=len(timesteps), - timestep=math.floor(t_curr), - latents=img, - ), - ) - step_count += 1 + step_callback() return img From 40a3fa54fec84fa696f6964a9de084208bd5d194 Mon Sep 17 00:00:00 2001 From: Brandon Rising Date: Mon, 26 Aug 2024 13:12:45 -0400 Subject: [PATCH 106/113] Fix type error in tsc --- .../subpanels/ModelManagerPanel/ModelFormatBadge.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelManagerPanel/ModelFormatBadge.tsx b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelManagerPanel/ModelFormatBadge.tsx index 167588ddd38..68cd4556463 100644 --- a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelManagerPanel/ModelFormatBadge.tsx +++ b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelManagerPanel/ModelFormatBadge.tsx @@ -14,7 +14,7 @@ const FORMAT_NAME_MAP: Record = { embedding_file: 'embedding', embedding_folder: 'embedding', t5_encoder: 't5_encoder', - t5_encoder_8b: 't5_encoder_8b', + bnb_quantized_int8b: 'bnb_quantized_int8b', bnb_quantized_nf4b: 'quantized', }; @@ -26,7 +26,7 @@ const FORMAT_COLOR_MAP: Record = { embedding_file: 'base', embedding_folder: 'base', t5_encoder: 'base', - t5_encoder_8b: 'base', + bnb_quantized_int8b: 'base', bnb_quantized_nf4b: 'base', }; From b9238b6174685a996ce3eab14c9b6423bb05ca79 Mon Sep 17 00:00:00 2001 From: Brandon Rising Date: Mon, 26 Aug 2024 13:14:48 -0400 Subject: [PATCH 107/113] Run ruff --- invokeai/app/invocations/flux_text_to_image.py | 5 +---- invokeai/backend/flux/sampling.py | 1 - 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/invokeai/app/invocations/flux_text_to_image.py b/invokeai/app/invocations/flux_text_to_image.py index 33a09da9bfb..b6ff06c67bf 100644 --- a/invokeai/app/invocations/flux_text_to_image.py +++ b/invokeai/app/invocations/flux_text_to_image.py @@ -1,4 +1,3 @@ -import numpy as np import torch from einops import rearrange from PIL import Image @@ -14,15 +13,13 @@ ) from invokeai.app.invocations.model import TransformerField, VAEField from invokeai.app.invocations.primitives import ImageOutput -from invokeai.app.services.session_processor.session_processor_common import CanceledException, ProgressImage +from invokeai.app.services.session_processor.session_processor_common import CanceledException from invokeai.app.services.shared.invocation_context import InvocationContext -from invokeai.app.util.step_callback import PipelineIntermediateState from invokeai.backend.flux.model import Flux from invokeai.backend.flux.modules.autoencoder import AutoEncoder from invokeai.backend.flux.sampling import denoise, get_noise, get_schedule, prepare_latent_img_patches, unpack from invokeai.backend.stable_diffusion.diffusion.conditioning_data import FLUXConditioningInfo from invokeai.backend.util.devices import TorchDevice -from invokeai.backend.util.util import image_to_dataURL @invocation( diff --git a/invokeai/backend/flux/sampling.py b/invokeai/backend/flux/sampling.py index 5001959e505..19de48ae81a 100644 --- a/invokeai/backend/flux/sampling.py +++ b/invokeai/backend/flux/sampling.py @@ -8,7 +8,6 @@ from torch import Tensor from tqdm import tqdm -from invokeai.app.util.step_callback import PipelineIntermediateState from invokeai.backend.flux.model import Flux from invokeai.backend.flux.modules.conditioner import HFEncoder From 5a5ca10ee0166bc4e95422fdbf86f3dd3869b332 Mon Sep 17 00:00:00 2001 From: Brandon Rising Date: Mon, 26 Aug 2024 15:42:42 -0400 Subject: [PATCH 108/113] Rename params for flux and flux vae, add comments explaining use of the config_path in model config --- invokeai/backend/flux/util.py | 114 ++++++------------ .../model_manager/load/model_loaders/flux.py | 8 +- invokeai/backend/model_manager/probe.py | 14 ++- .../scripts/load_flux_model_bnb_llm_int8.py | 6 +- .../scripts/load_flux_model_bnb_nf4.py | 6 +- 5 files changed, 63 insertions(+), 85 deletions(-) diff --git a/invokeai/backend/flux/util.py b/invokeai/backend/flux/util.py index 748c79e11d8..703b032fa32 100644 --- a/invokeai/backend/flux/util.py +++ b/invokeai/backend/flux/util.py @@ -25,82 +25,48 @@ class ModelSpec: } -ae_params = AutoEncoderParams( - resolution=256, - in_channels=3, - ch=128, - out_ch=3, - ch_mult=[1, 2, 4, 4], - num_res_blocks=2, - z_channels=16, - scale_factor=0.3611, - shift_factor=0.1159, -) +ae_params = { + "flux": AutoEncoderParams( + resolution=256, + in_channels=3, + ch=128, + out_ch=3, + ch_mult=[1, 2, 4, 4], + num_res_blocks=2, + z_channels=16, + scale_factor=0.3611, + shift_factor=0.1159, + ) +} -configs = { - "flux-dev": ModelSpec( - repo_id="black-forest-labs/FLUX.1-dev", - repo_flow="flux1-dev.safetensors", - repo_ae="ae.safetensors", - ckpt_path=os.getenv("FLUX_DEV"), - params=FluxParams( - in_channels=64, - vec_in_dim=768, - context_in_dim=4096, - hidden_size=3072, - mlp_ratio=4.0, - num_heads=24, - depth=19, - depth_single_blocks=38, - axes_dim=[16, 56, 56], - theta=10_000, - qkv_bias=True, - guidance_embed=True, - ), - ae_path=os.getenv("AE"), - ae_params=AutoEncoderParams( - resolution=256, - in_channels=3, - ch=128, - out_ch=3, - ch_mult=[1, 2, 4, 4], - num_res_blocks=2, - z_channels=16, - scale_factor=0.3611, - shift_factor=0.1159, - ), +params = { + "flux-dev": FluxParams( + in_channels=64, + vec_in_dim=768, + context_in_dim=4096, + hidden_size=3072, + mlp_ratio=4.0, + num_heads=24, + depth=19, + depth_single_blocks=38, + axes_dim=[16, 56, 56], + theta=10_000, + qkv_bias=True, + guidance_embed=True, ), - "flux-schnell": ModelSpec( - repo_id="black-forest-labs/FLUX.1-schnell", - repo_flow="flux1-schnell.safetensors", - repo_ae="ae.safetensors", - ckpt_path=os.getenv("FLUX_SCHNELL"), - params=FluxParams( - in_channels=64, - vec_in_dim=768, - context_in_dim=4096, - hidden_size=3072, - mlp_ratio=4.0, - num_heads=24, - depth=19, - depth_single_blocks=38, - axes_dim=[16, 56, 56], - theta=10_000, - qkv_bias=True, - guidance_embed=False, - ), - ae_path=os.getenv("AE"), - ae_params=AutoEncoderParams( - resolution=256, - in_channels=3, - ch=128, - out_ch=3, - ch_mult=[1, 2, 4, 4], - num_res_blocks=2, - z_channels=16, - scale_factor=0.3611, - shift_factor=0.1159, - ), + "flux-schnell": FluxParams( + in_channels=64, + vec_in_dim=768, + context_in_dim=4096, + hidden_size=3072, + mlp_ratio=4.0, + num_heads=24, + depth=19, + depth_single_blocks=38, + axes_dim=[16, 56, 56], + theta=10_000, + qkv_bias=True, + guidance_embed=False, ), } diff --git a/invokeai/backend/model_manager/load/model_loaders/flux.py b/invokeai/backend/model_manager/load/model_loaders/flux.py index 53119f6df07..0316de60440 100644 --- a/invokeai/backend/model_manager/load/model_loaders/flux.py +++ b/invokeai/backend/model_manager/load/model_loaders/flux.py @@ -12,7 +12,7 @@ from invokeai.app.services.config.config_default import get_config from invokeai.backend.flux.model import Flux from invokeai.backend.flux.modules.autoencoder import AutoEncoder -from invokeai.backend.flux.util import ae_params, configs +from invokeai.backend.flux.util import ae_params, params from invokeai.backend.model_manager import ( AnyModel, AnyModelConfig, @@ -59,7 +59,7 @@ def _load_model( model_path = Path(config.path) with SilenceWarnings(): - model = AutoEncoder(ae_params) + model = AutoEncoder(ae_params[config.config_path]) sd = load_file(model_path) model.load_state_dict(sd, assign=True) model.to(dtype=self._torch_dtype) @@ -188,7 +188,7 @@ def _load_from_singlefile( model_path = Path(config.path) with SilenceWarnings(): - model = Flux(configs[config.config_path].params) + model = Flux(params[config.config_path]) sd = load_file(model_path) model.load_state_dict(sd, assign=True) return model @@ -227,7 +227,7 @@ def _load_from_singlefile( with SilenceWarnings(): with accelerate.init_empty_weights(): - model = Flux(configs[config.config_path].params) + model = Flux(params[config.config_path]) model = quantize_model_nf4(model, modules_to_not_convert=set(), compute_dtype=torch.bfloat16) sd = load_file(model_path) model.load_state_dict(sd, assign=True) diff --git a/invokeai/backend/model_manager/probe.py b/invokeai/backend/model_manager/probe.py index 0ad537a5f36..029366e3573 100644 --- a/invokeai/backend/model_manager/probe.py +++ b/invokeai/backend/model_manager/probe.py @@ -329,8 +329,16 @@ def _get_checkpoint_config_path( checkpoint = ModelProbe._scan_and_load_checkpoint(model_path) state_dict = checkpoint.get("state_dict") or checkpoint if "guidance_in.out_layer.weight" in state_dict: + # For flux, this is a key in invokeai.backend.flux.util.params + # Due to model type and format being the descriminator for model configs this + # is used rather than attempting to support flux with separate model types and format + # If changed in the future, please fix me config_file = "flux-dev" else: + # For flux, this is a key in invokeai.backend.flux.util.params + # Due to model type and format being the descriminator for model configs this + # is used rather than attempting to support flux with separate model types and format + # If changed in the future, please fix me config_file = "flux-schnell" else: config_file = LEGACY_CONFIGS[base_type][variant_type] @@ -345,7 +353,11 @@ def _get_checkpoint_config_path( ) elif model_type is ModelType.VAE: config_file = ( - "flux/flux1-vae.yaml" + # For flux, this is a key in invokeai.backend.flux.util.ae_params + # Due to model type and format being the descriminator for model configs this + # is used rather than attempting to support flux with separate model types and format + # If changed in the future, please fix me + "flux" if base_type is BaseModelType.Flux else "stable-diffusion/v1-inference.yaml" if base_type is BaseModelType.StableDiffusion1 diff --git a/invokeai/backend/quantization/scripts/load_flux_model_bnb_llm_int8.py b/invokeai/backend/quantization/scripts/load_flux_model_bnb_llm_int8.py index 51c787d8ef3..804336e0007 100644 --- a/invokeai/backend/quantization/scripts/load_flux_model_bnb_llm_int8.py +++ b/invokeai/backend/quantization/scripts/load_flux_model_bnb_llm_int8.py @@ -4,7 +4,7 @@ from safetensors.torch import load_file, save_file from invokeai.backend.flux.model import Flux -from invokeai.backend.flux.util import configs as flux_configs +from invokeai.backend.flux.util import params from invokeai.backend.quantization.bnb_llm_int8 import quantize_model_llm_int8 from invokeai.backend.quantization.scripts.load_flux_model_bnb_nf4 import log_time @@ -22,11 +22,11 @@ def main(): with log_time("Intialize FLUX transformer on meta device"): # TODO(ryand): Determine if this is a schnell model or a dev model and load the appropriate config. - params = flux_configs["flux-schnell"].params + p = params["flux-schnell"] # Initialize the model on the "meta" device. with accelerate.init_empty_weights(): - model = Flux(params) + model = Flux(p) # TODO(ryand): We may want to add some modules to not quantize here (e.g. the proj_out layer). See the accelerate # `get_keys_to_not_convert(...)` function for a heuristic to determine which modules to not quantize. diff --git a/invokeai/backend/quantization/scripts/load_flux_model_bnb_nf4.py b/invokeai/backend/quantization/scripts/load_flux_model_bnb_nf4.py index 5415407a2bd..f1621dbc6dd 100644 --- a/invokeai/backend/quantization/scripts/load_flux_model_bnb_nf4.py +++ b/invokeai/backend/quantization/scripts/load_flux_model_bnb_nf4.py @@ -7,7 +7,7 @@ from safetensors.torch import load_file, save_file from invokeai.backend.flux.model import Flux -from invokeai.backend.flux.util import configs as flux_configs +from invokeai.backend.flux.util import params from invokeai.backend.quantization.bnb_nf4 import quantize_model_nf4 @@ -35,11 +35,11 @@ def main(): # inference_dtype = torch.bfloat16 with log_time("Intialize FLUX transformer on meta device"): # TODO(ryand): Determine if this is a schnell model or a dev model and load the appropriate config. - params = flux_configs["flux-schnell"].params + p = params["flux-schnell"] # Initialize the model on the "meta" device. with accelerate.init_empty_weights(): - model = Flux(params) + model = Flux(p) # TODO(ryand): We may want to add some modules to not quantize here (e.g. the proj_out layer). See the accelerate # `get_keys_to_not_convert(...)` function for a heuristic to determine which modules to not quantize. From bf59ab3ddf203077cc9280b9be3b85bf7d19cbdc Mon Sep 17 00:00:00 2001 From: Mary Hipp Date: Mon, 26 Aug 2024 16:00:32 -0400 Subject: [PATCH 109/113] update default workflow for flux --- .../default_workflows/Flux Text to Image.json | 156 +++++++++--------- 1 file changed, 78 insertions(+), 78 deletions(-) diff --git a/invokeai/app/services/workflow_records/default_workflows/Flux Text to Image.json b/invokeai/app/services/workflow_records/default_workflows/Flux Text to Image.json index ccf5bc7a7b9..ccd33d96cce 100644 --- a/invokeai/app/services/workflow_records/default_workflows/Flux Text to Image.json +++ b/invokeai/app/services/workflow_records/default_workflows/Flux Text to Image.json @@ -1,108 +1,108 @@ { "name": "FLUX Text to Image", "author": "InvokeAI", - "description": "A simple text-to-image workflow using FLUX dev or schnell models. Prerequisite model downloads: T5 Encoder, CLIP-L Encoder, and FLUX VAE. These can be found in the starter models tab within your Model Manager.", + "description": "A simple text-to-image workflow using FLUX dev or schnell models. Prerequisite model downloads: T5 Encoder, CLIP-L Encoder, and FLUX VAE. Quantized and un-quantized versions can be found in the starter models tab within your Model Manager.", "version": "1.0.0", "contact": "", "tags": "text2image, flux", "notes": "", "exposedFields": [ { - "nodeId": "fd1274a3-c56d-4a96-aa21-83d14c920f08", + "nodeId": "4f0207c2-ff40-41fd-b047-ad33fbb1c33a", "fieldName": "model" }, { - "nodeId": "fd1274a3-c56d-4a96-aa21-83d14c920f08", + "nodeId": "4f0207c2-ff40-41fd-b047-ad33fbb1c33a", "fieldName": "t5_encoder" }, { - "nodeId": "0fa90f47-79cd-4d84-9445-f0b1065e0c3c", - "fieldName": "positive_prompt" + "nodeId": "01f674f8-b3d1-4df1-acac-6cb8e0bfb63c", + "fieldName": "prompt" } ], "meta": { "version": "3.0.0", "category": "default" }, -"nodes": [ + "nodes": [ { - "id": "0fa90f47-79cd-4d84-9445-f0b1065e0c3c", + "id": "4f0207c2-ff40-41fd-b047-ad33fbb1c33a", "type": "invocation", "data": { - "id": "0fa90f47-79cd-4d84-9445-f0b1065e0c3c", - "type": "flux_text_encoder", - "version": "1.0.0", + "id": "4f0207c2-ff40-41fd-b047-ad33fbb1c33a", + "type": "flux_model_loader", + "version": "1.0.3", "label": "", "notes": "", "isOpen": true, "isIntermediate": true, - "useCache": true, + "useCache": false, "inputs": { - "clip": { - "name": "clip", - "label": "" + "model": { + "name": "model", + "label": "", + "value": { + "key": "f04a7a2f-c74d-4538-8d5e-879a53501662", + "hash": "random:4875da7a9508444ffa706f61961c260d0c6729f6181a86b31fad06df1277b850", + "name": "FLUX Dev (Quantized)", + "base": "flux", + "type": "main" + } }, "t5_encoder": { "name": "t5_encoder", - "label": "" - }, - "t5_max_seq_len": { - "name": "t5_max_seq_len", "label": "", - "value": 256 - }, - "positive_prompt": { - "name": "positive_prompt", - "label": "", - "value": "" + "value": { + "key": "20dcd9ec-5fbb-4012-8401-049e707da5e5", + "hash": "random:f986be43ff3502169e4adbdcee158afb0e0a65a1edc4cab16ae59963630cfd8f", + "name": "t5_bnb_int8_quantized_encoder", + "base": "any", + "type": "t5_encoder" + } } } }, "position": { - "x": 817.9049777916891, - "y": 137.19130248771572 + "x": 337.09365228062825, + "y": 40.63469521079861 } }, { - "id": "fd1274a3-c56d-4a96-aa21-83d14c920f08", + "id": "01f674f8-b3d1-4df1-acac-6cb8e0bfb63c", "type": "invocation", "data": { - "id": "fd1274a3-c56d-4a96-aa21-83d14c920f08", - "type": "flux_model_loader", - "version": "1.0.3", + "id": "01f674f8-b3d1-4df1-acac-6cb8e0bfb63c", + "type": "flux_text_encoder", + "version": "1.0.0", "label": "", "notes": "", "isOpen": true, "isIntermediate": true, "useCache": true, "inputs": { - "model": { - "name": "model", - "label": "", - "value": { - "key": "a1deb125-2781-482c-8a71-9a22e76fd956", - "hash": "random:40bd0a5b8b2c6edf8f5611e049000329b952efc6a1a24b4f77ca4ae3dbecaf6a", - "name": "flux1-schnell-bnb_nf4", - "base": "flux", - "type": "main" - } + "clip": { + "name": "clip", + "label": "" }, "t5_encoder": { "name": "t5_encoder", - "label": "T5 Encoder (Model Manager -> Starter Models)", - "value": { - "key": "798baafd-63bd-4799-8600-5db43662f3aa", - "hash": "random:4495f3ac1650e9d5969c365e951a9cffee526573a15f9f4c997e1c63550444fd", - "name": "t5_8b_quantized_encoder", - "base": "any", - "type": "t5_encoder" - } + "label": "" + }, + "t5_max_seq_len": { + "name": "t5_max_seq_len", + "label": "T5 Max Seq Len", + "value": 256 + }, + "prompt": { + "name": "prompt", + "label": "", + "value": "a cat" } } }, "position": { - "x": 343.41871407356723, - "y": -1.0211223664301414 + "x": 824.1970602278849, + "y": 146.98251001061735 } }, { @@ -146,7 +146,7 @@ "notes": "", "isOpen": true, "isIntermediate": false, - "useCache": false, + "useCache": true, "inputs": { "board": { "name": "board", @@ -203,52 +203,52 @@ ], "edges": [ { - "id": "reactflow__edge-0fa90f47-79cd-4d84-9445-f0b1065e0c3cconditioning-159bdf1b-79e7-4174-b86e-d40e646964c8positive_text_conditioning", + "id": "reactflow__edge-4f0207c2-ff40-41fd-b047-ad33fbb1c33amax_seq_len-01f674f8-b3d1-4df1-acac-6cb8e0bfb63ct5_max_seq_len", + "type": "default", + "source": "4f0207c2-ff40-41fd-b047-ad33fbb1c33a", + "target": "01f674f8-b3d1-4df1-acac-6cb8e0bfb63c", + "sourceHandle": "max_seq_len", + "targetHandle": "t5_max_seq_len" + }, + { + "id": "reactflow__edge-4f0207c2-ff40-41fd-b047-ad33fbb1c33avae-159bdf1b-79e7-4174-b86e-d40e646964c8vae", "type": "default", - "source": "0fa90f47-79cd-4d84-9445-f0b1065e0c3c", + "source": "4f0207c2-ff40-41fd-b047-ad33fbb1c33a", "target": "159bdf1b-79e7-4174-b86e-d40e646964c8", - "sourceHandle": "conditioning", - "targetHandle": "positive_text_conditioning" + "sourceHandle": "vae", + "targetHandle": "vae" }, { - "id": "reactflow__edge-fd1274a3-c56d-4a96-aa21-83d14c920f08max_seq_len-0fa90f47-79cd-4d84-9445-f0b1065e0c3ct5_max_seq_len", + "id": "reactflow__edge-4f0207c2-ff40-41fd-b047-ad33fbb1c33atransformer-159bdf1b-79e7-4174-b86e-d40e646964c8transformer", "type": "default", - "source": "fd1274a3-c56d-4a96-aa21-83d14c920f08", - "target": "0fa90f47-79cd-4d84-9445-f0b1065e0c3c", - "sourceHandle": "max_seq_len", - "targetHandle": "t5_max_seq_len" + "source": "4f0207c2-ff40-41fd-b047-ad33fbb1c33a", + "target": "159bdf1b-79e7-4174-b86e-d40e646964c8", + "sourceHandle": "transformer", + "targetHandle": "transformer" }, { - "id": "reactflow__edge-fd1274a3-c56d-4a96-aa21-83d14c920f08t5_encoder-0fa90f47-79cd-4d84-9445-f0b1065e0c3ct5_encoder", + "id": "reactflow__edge-4f0207c2-ff40-41fd-b047-ad33fbb1c33at5_encoder-01f674f8-b3d1-4df1-acac-6cb8e0bfb63ct5_encoder", "type": "default", - "source": "fd1274a3-c56d-4a96-aa21-83d14c920f08", - "target": "0fa90f47-79cd-4d84-9445-f0b1065e0c3c", + "source": "4f0207c2-ff40-41fd-b047-ad33fbb1c33a", + "target": "01f674f8-b3d1-4df1-acac-6cb8e0bfb63c", "sourceHandle": "t5_encoder", "targetHandle": "t5_encoder" }, { - "id": "reactflow__edge-fd1274a3-c56d-4a96-aa21-83d14c920f08clip-0fa90f47-79cd-4d84-9445-f0b1065e0c3cclip", + "id": "reactflow__edge-4f0207c2-ff40-41fd-b047-ad33fbb1c33aclip-01f674f8-b3d1-4df1-acac-6cb8e0bfb63cclip", "type": "default", - "source": "fd1274a3-c56d-4a96-aa21-83d14c920f08", - "target": "0fa90f47-79cd-4d84-9445-f0b1065e0c3c", + "source": "4f0207c2-ff40-41fd-b047-ad33fbb1c33a", + "target": "01f674f8-b3d1-4df1-acac-6cb8e0bfb63c", "sourceHandle": "clip", "targetHandle": "clip" }, { - "id": "reactflow__edge-fd1274a3-c56d-4a96-aa21-83d14c920f08vae-159bdf1b-79e7-4174-b86e-d40e646964c8vae", + "id": "reactflow__edge-01f674f8-b3d1-4df1-acac-6cb8e0bfb63cconditioning-159bdf1b-79e7-4174-b86e-d40e646964c8positive_text_conditioning", "type": "default", - "source": "fd1274a3-c56d-4a96-aa21-83d14c920f08", + "source": "01f674f8-b3d1-4df1-acac-6cb8e0bfb63c", "target": "159bdf1b-79e7-4174-b86e-d40e646964c8", - "sourceHandle": "vae", - "targetHandle": "vae" - }, - { - "id": "reactflow__edge-fd1274a3-c56d-4a96-aa21-83d14c920f08transformer-159bdf1b-79e7-4174-b86e-d40e646964c8transformer", - "type": "default", - "source": "fd1274a3-c56d-4a96-aa21-83d14c920f08", - "target": "159bdf1b-79e7-4174-b86e-d40e646964c8", - "sourceHandle": "transformer", - "targetHandle": "transformer" + "sourceHandle": "conditioning", + "targetHandle": "positive_text_conditioning" }, { "id": "reactflow__edge-4754c534-a5f3-4ad0-9382-7887985e668cvalue-159bdf1b-79e7-4174-b86e-d40e646964c8seed", From bd2692be66f204b9cfa10b4801b74936500856f9 Mon Sep 17 00:00:00 2001 From: Mary Hipp Date: Mon, 26 Aug 2024 16:03:54 -0400 Subject: [PATCH 110/113] remove prompt --- .../workflow_records/default_workflows/Flux Text to Image.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/invokeai/app/services/workflow_records/default_workflows/Flux Text to Image.json b/invokeai/app/services/workflow_records/default_workflows/Flux Text to Image.json index ccd33d96cce..af27ea94fa0 100644 --- a/invokeai/app/services/workflow_records/default_workflows/Flux Text to Image.json +++ b/invokeai/app/services/workflow_records/default_workflows/Flux Text to Image.json @@ -96,7 +96,7 @@ "prompt": { "name": "prompt", "label": "", - "value": "a cat" + "value": "" } } }, From 5d42e67a8b3ee2d6c926b7352ff5741858a14cf8 Mon Sep 17 00:00:00 2001 From: Brandon Rising Date: Mon, 26 Aug 2024 16:45:01 -0400 Subject: [PATCH 111/113] Run ruff --- invokeai/backend/flux/util.py | 1 - 1 file changed, 1 deletion(-) diff --git a/invokeai/backend/flux/util.py b/invokeai/backend/flux/util.py index 703b032fa32..c81424f8ce4 100644 --- a/invokeai/backend/flux/util.py +++ b/invokeai/backend/flux/util.py @@ -1,6 +1,5 @@ # Initially pulled from https://github.com/black-forest-labs/flux -import os from dataclasses import dataclass from typing import Dict, Literal From c510234c9a8df81c7895ff2c07d207c9ce24f844 Mon Sep 17 00:00:00 2001 From: Mary Hipp Date: Mon, 26 Aug 2024 19:12:55 -0400 Subject: [PATCH 112/113] default workflow: add steps to exposed fields, add more notes --- .../default_workflows/Flux Text to Image.json | 26 +++++++++++-------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/invokeai/app/services/workflow_records/default_workflows/Flux Text to Image.json b/invokeai/app/services/workflow_records/default_workflows/Flux Text to Image.json index af27ea94fa0..783fdeed5e3 100644 --- a/invokeai/app/services/workflow_records/default_workflows/Flux Text to Image.json +++ b/invokeai/app/services/workflow_records/default_workflows/Flux Text to Image.json @@ -1,23 +1,27 @@ { "name": "FLUX Text to Image", "author": "InvokeAI", - "description": "A simple text-to-image workflow using FLUX dev or schnell models. Prerequisite model downloads: T5 Encoder, CLIP-L Encoder, and FLUX VAE. Quantized and un-quantized versions can be found in the starter models tab within your Model Manager.", + "description": "A simple text-to-image workflow using FLUX dev or schnell models. Prerequisite model downloads: T5 Encoder, CLIP-L Encoder, and FLUX VAE. Quantized and un-quantized versions can be found in the starter models tab within your Model Manager. We recommend 4 steps for FLUX schnell models and 30 steps for FLUX dev models.", "version": "1.0.0", "contact": "", "tags": "text2image, flux", - "notes": "", + "notes": "Prerequisite model downloads: T5 Encoder, CLIP-L Encoder, and FLUX VAE. Quantized and un-quantized versions can be found in the starter models tab within your Model Manager. We recommend 4 steps for FLUX schnell models and 30 steps for FLUX dev models.", "exposedFields": [ { "nodeId": "4f0207c2-ff40-41fd-b047-ad33fbb1c33a", "fieldName": "model" }, - { - "nodeId": "4f0207c2-ff40-41fd-b047-ad33fbb1c33a", - "fieldName": "t5_encoder" - }, { "nodeId": "01f674f8-b3d1-4df1-acac-6cb8e0bfb63c", "fieldName": "prompt" + }, + { + "nodeId": "159bdf1b-79e7-4174-b86e-d40e646964c8", + "fieldName": "num_steps" + }, + { + "nodeId": "4f0207c2-ff40-41fd-b047-ad33fbb1c33a", + "fieldName": "t5_encoder" } ], "meta": { @@ -40,7 +44,7 @@ "inputs": { "model": { "name": "model", - "label": "", + "label": "Model (Starter Models can be found in Model Manager)", "value": { "key": "f04a7a2f-c74d-4538-8d5e-879a53501662", "hash": "random:4875da7a9508444ffa706f61961c260d0c6729f6181a86b31fad06df1277b850", @@ -51,7 +55,7 @@ }, "t5_encoder": { "name": "t5_encoder", - "label": "", + "label": "T 5 Encoder (Starter Models can be found in Model Manager)", "value": { "key": "20dcd9ec-5fbb-4012-8401-049e707da5e5", "hash": "random:f986be43ff3502169e4adbdcee158afb0e0a65a1edc4cab16ae59963630cfd8f", @@ -96,7 +100,7 @@ "prompt": { "name": "prompt", "label": "", - "value": "" + "value": "a cat" } } }, @@ -180,8 +184,8 @@ }, "num_steps": { "name": "num_steps", - "label": "", - "value": 4 + "label": "Steps (Recommend 30 for Dev, 4 for Schnell)", + "value": 30 }, "guidance": { "name": "guidance", From 3b29bad4c7d28caa0fa283922552854a23b5e59a Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Mon, 26 Aug 2024 23:39:00 +0000 Subject: [PATCH 113/113] Update starter model size estimates. --- invokeai/backend/model_manager/starter_models.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/invokeai/backend/model_manager/starter_models.py b/invokeai/backend/model_manager/starter_models.py index d08fc9fc978..76b91f0d34c 100644 --- a/invokeai/backend/model_manager/starter_models.py +++ b/invokeai/backend/model_manager/starter_models.py @@ -94,7 +94,7 @@ class StarterModel(StarterModelWithoutDependencies): name="FLUX Schnell (Quantized)", base=BaseModelType.Flux, source="InvokeAI/flux_schnell::transformer/bnb_nf4/flux1-schnell-bnb_nf4.safetensors", - description="FLUX schnell transformer quantized to bitsandbytes NF4 format. Total size with dependencies: ~14GB", + description="FLUX schnell transformer quantized to bitsandbytes NF4 format. Total size with dependencies: ~12GB", type=ModelType.Main, dependencies=[t5_8b_quantized_encoder, flux_vae, clip_l_encoder], ), @@ -102,7 +102,7 @@ class StarterModel(StarterModelWithoutDependencies): name="FLUX Dev (Quantized)", base=BaseModelType.Flux, source="InvokeAI/flux_dev::transformer/bnb_nf4/flux1-dev-bnb_nf4.safetensors", - description="FLUX dev transformer quantized to bitsandbytes NF4 format. Total size with dependencies: ~16GB", + description="FLUX dev transformer quantized to bitsandbytes NF4 format. Total size with dependencies: ~12GB", type=ModelType.Main, dependencies=[t5_8b_quantized_encoder, flux_vae, clip_l_encoder], ), @@ -118,7 +118,7 @@ class StarterModel(StarterModelWithoutDependencies): name="FLUX Dev", base=BaseModelType.Flux, source="InvokeAI/flux_dev::transformer/base/flux1-dev.safetensors", - description="FLUX dev transformer in bfloat16. Total size with dependencies: ~34GB", + description="FLUX dev transformer in bfloat16. Total size with dependencies: ~33GB", type=ModelType.Main, dependencies=[t5_base_encoder, flux_vae, clip_l_encoder], ),