From 74dfe5ae48359f1fb69974fc75d153c3a68ab2c5 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Thu, 30 Nov 2023 17:11:16 -0500 Subject: [PATCH 1/2] Add support for LoRAs in generate_images.py. --- .../scripts/invoke_generate_images.py | 30 +++++++++++++++++++ .../training/tools/generate_images.py | 9 ++++++ 2 files changed, 39 insertions(+) diff --git a/src/invoke_training/scripts/invoke_generate_images.py b/src/invoke_training/scripts/invoke_generate_images.py index 9d4078fc..a1970fa4 100644 --- a/src/invoke_training/scripts/invoke_generate_images.py +++ b/src/invoke_training/scripts/invoke_generate_images.py @@ -1,4 +1,5 @@ import argparse +from pathlib import Path from invoke_training.training.shared.model_loading_utils import PipelineVersionEnum from invoke_training.training.tools.generate_images import generate_images @@ -25,6 +26,14 @@ def parse_args(): "stable diffusion checkpoint file. (E.g. 'runwayml/stable-diffusion-v1-5', " "'stabilityai/stable-diffusion-xl-base-1.0', '/path/to/realisticVisionV51_v51VAE.safetensors', etc. )", ) + parser.add_argument( + "-l", + "--lora", + type=str, + nargs="*", + help="LoRA models to apply to the base model. The LoRA weight can optionally be provided after a colon " + "separator. E.g. `-l path/to/lora.bin:0.5 -l path/to/lora_2.safetensors`. ", + ) parser.add_argument( "--sd-version", type=str, @@ -67,9 +76,29 @@ def parse_args(): return parser.parse_args() +def parse_lora_args(lora_args: list[str] | None) -> list[tuple[Path, int]]: + loras: list[tuple[Path, int]] = [] + + lora_args = lora_args or [] + for lora in lora_args: + lora_split = lora.split(":") + + if len(lora_split) == 1: + # If weight is not specified, assume 1.0. + loras.append((Path(lora_split[0]), 1.0)) + elif len(lora_split) == 2: + loras.append((Path(lora_split[0]), float(lora_split[1]))) + else: + raise ValueError(f"Invalid lora argument syntax: '{lora}'.") + + return loras + + def main(): args = parse_args() + loras = parse_lora_args(args.lora) + print(f"Generating {args.num_images} images in '{args.out_dir}'.") generate_images( out_dir=args.out_dir, @@ -79,6 +108,7 @@ def main(): num_images=args.num_images, height=args.height, width=args.width, + loras=loras, seed=args.seed, enable_cpu_offload=args.enable_cpu_offload, ) diff --git a/src/invoke_training/training/tools/generate_images.py b/src/invoke_training/training/tools/generate_images.py index 80877ffb..2cbe8156 100644 --- a/src/invoke_training/training/tools/generate_images.py +++ b/src/invoke_training/training/tools/generate_images.py @@ -1,4 +1,6 @@ import os +from pathlib import Path +from typing import Optional import torch from tqdm import tqdm @@ -17,6 +19,7 @@ def generate_images( num_images: int, height: int, width: int, + loras: Optional[list[tuple[Path, float]]] = None, seed: int = 0, torch_dtype: torch.dtype = torch.float16, torch_device: str = "cuda", @@ -44,6 +47,12 @@ def generate_images( pipeline = load_pipeline(model, pipeline_version) + loras = loras or [] + for lora in loras: + lora_path, lora_scale = lora + pipeline.load_lora_weights(str(lora_path), weight_name=str(lora_path.name)) + pipeline.fuse_lora(lora_scale=lora_scale) + pipeline.to(torch_dtype=torch_dtype) if enable_cpu_offload: pipeline.enable_model_cpu_offload() From 7ecdc596fe33526ee6fb877a835ca9f077e80d6c Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Tue, 5 Dec 2023 14:15:43 -0500 Subject: [PATCH 2/2] Add ability to apply TI embeddings in the generate_images.py script. --- src/invoke_training/scripts/invoke_generate_images.py | 7 +++++++ src/invoke_training/training/tools/generate_images.py | 8 ++++++++ 2 files changed, 15 insertions(+) diff --git a/src/invoke_training/scripts/invoke_generate_images.py b/src/invoke_training/scripts/invoke_generate_images.py index a1970fa4..f31d21cd 100644 --- a/src/invoke_training/scripts/invoke_generate_images.py +++ b/src/invoke_training/scripts/invoke_generate_images.py @@ -34,6 +34,12 @@ def parse_args(): help="LoRA models to apply to the base model. The LoRA weight can optionally be provided after a colon " "separator. E.g. `-l path/to/lora.bin:0.5 -l path/to/lora_2.safetensors`. ", ) + parser.add_argument( + "--ti", + type=str, + nargs="*", + help="Paths(s) to Textual Inversion embeddings to apply to the base model.", + ) parser.add_argument( "--sd-version", type=str, @@ -109,6 +115,7 @@ def main(): height=args.height, width=args.width, loras=loras, + ti_embeddings=args.ti, seed=args.seed, enable_cpu_offload=args.enable_cpu_offload, ) diff --git a/src/invoke_training/training/tools/generate_images.py b/src/invoke_training/training/tools/generate_images.py index 2cbe8156..87cf680c 100644 --- a/src/invoke_training/training/tools/generate_images.py +++ b/src/invoke_training/training/tools/generate_images.py @@ -20,6 +20,7 @@ def generate_images( height: int, width: int, loras: Optional[list[tuple[Path, float]]] = None, + ti_embeddings: Optional[list[str]] = None, seed: int = 0, torch_dtype: torch.dtype = torch.float16, torch_device: str = "cuda", @@ -38,6 +39,9 @@ def generate_images( with). width (int): The output image width in pixels (recommended to match the resolution that the model was trained with). + loras (list[tuple[Path, float]], optional): Paths to LoRA models to apply to the base model with associated + weights. + ti_embeddings (list[str], optional): Paths to TI embeddings to apply to the base model. seed (int, optional): A seed for repeatability. Defaults to 0. torch_dtype (torch.dtype, optional): The torch dtype. Defaults to torch.float16. torch_device (str, optional): The torch device. Defaults to "cuda". @@ -53,6 +57,10 @@ def generate_images( pipeline.load_lora_weights(str(lora_path), weight_name=str(lora_path.name)) pipeline.fuse_lora(lora_scale=lora_scale) + ti_embeddings = ti_embeddings or [] + for ti_embedding in ti_embeddings: + pipeline.load_textual_inversion(ti_embedding) + pipeline.to(torch_dtype=torch_dtype) if enable_cpu_offload: pipeline.enable_model_cpu_offload()