Merge pull request #7 from invoke-ai/ryan/lora-training

Add initial LoRA training script
invoke-ai · Aug 8, 2023 · 3973019 · 3973019
2 parents 19f0766 + b33d971
commit 3973019
Show file tree

Hide file tree

Showing 23 changed files with 1,218 additions and 23 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,5 @@
+output/
+
 # pyenv
 .python-version
 

diff --git a/README.md b/README.md
@@ -1,13 +1,43 @@
-# InvokeTraining
+# invoke-training
 
 A library for training custom Stable Diffusion models (fine-tuning, LoRA training, textual inversion, etc.) that can be used in [InvokeAI](https://github.com/invoke-ai/InvokeAI).
 
 **WARNING:**  This repo is currently under construction. More details coming soon.
 
 ## Developer Quick Start
 
+### Setup Development Environment
 1. (Optional) Create a python virtual environment.
-2. Install dependencies: `pip install -e .[test]`.
-3. Run tests: `pytest tests`.
-4. (Optional) Install the pre-commit hooks: `pre-commit install`. This will run static analysis tools (black, ruff, isort) on `git commit`.
-5. (Optional) Set up `black`, `isort`, and `ruff` in your IDE of choice.
+1. Install dependencies: `pip install -e .[test]`.
+1. (Optional) Install the pre-commit hooks: `pre-commit install`. This will run static analysis tools (black, ruff, isort) on `git commit`.
+1. (Optional) Set up `black`, `isort`, and `ruff` in your IDE of choice.
+
+### Unit Tests
+Run all unit tests with:
+```bash
+pytest tests/
+```
+
+There are some test 'markers' defined in [pyproject.toml](/pyproject.toml) that can be used to skip some tests. For example, the following command skips tests that require a GPU or require downloading model weights:
+```bash
+pytest tests/ -m "not cuda and not loads_model"
+```
+
+### Train a LoRA
+The following steps explain how to train a basic Pokemon Style LoRA using the [lambdalabs/pokemon-blip-captions](https://huggingface.co/datasets/lambdalabs/pokemon-blip-captions) dataset, and how to use it in [InvokeAI](https://github.com/invoke-ai/InvokeAI).
+
+This training process has been tested on an Nvidia GPU with 8GB of VRAM.
+
+1. For this example, we will use the [lora_training_example.yaml]() config file. See [lora_training_config.py](/src/invoke_training/training/lora/lora_training_config.py) for the full list of supported LoRA training configs.
+2. Start training with `invoke-train-lora --cfg-file configs/lora_training_example.yaml`.
+3. Monitor the training process with Tensorboard by running `tensorboard --logdir output/` and visiting [localhost:6006](http://localhost:6006) in your browser. Here you can see generated images for fixed prompts throughout the training process.
+4. Select a checkpoint based on the quality of the generated images. As an example, we'll use the **Epoch 19** checkpoint.
+5. If you haven't already, setup [InvokeAI](https://github.com/invoke-ai/InvokeAI) by following its documentation.
+6. Copy your selected LoRA checkpoint into your `${INVOKEAI_ROOT}/autoimport/lora` directory. For example:
+```bash
+cp output/1691088769.5694647/checkpoint_epoch-00000019.safetensors ${INVOKEAI_ROOT}/autoimport/lora/pokemon_epoch-00000019.safetensors
+```
+7. You can now use your trained Pokemon LoRA in the InvokeAI UI! 🎉
+
+![Screenshot of the InvokeAI UI with an example of a Yoda pokemon generated using a Pokemon LoRA.](images/invokeai_yoda_pokemon_lora.png)
+*Example image generated with the prompt "yoda" and Pokemon LoRA.*
diff --git a/configs/lora_training_example.yaml b/configs/lora_training_example.yaml
@@ -0,0 +1,27 @@
+# This is a sample config for training a Pokemon LoRA model.
+
+output:
+  base_output_dir: output/
+
+optimizer:
+  learning_rate: 1.0e-3
+
+dataset:
+  name: lambdalabs/pokemon-blip-captions
+
+# General
+seed: 1
+gradient_accumulation_steps: 1
+mixed_precision: fp16
+xformers: True
+gradient_checkpointing: True
+max_train_steps: 4000
+save_every_n_epochs: 1
+save_every_n_steps: null
+max_checkpoints: 100
+validation_prompts:
+  - yoda
+  - astronaut
+  - yoda in a space suit
+validate_every_n_epochs: 1
+train_batch_size: 4
diff --git a/images/invokeai_yoda_pokemon_lora.png b/images/invokeai_yoda_pokemon_lora.png
diff --git a/pyproject.toml b/pyproject.toml
@@ -5,21 +5,31 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "invoke-training"
 version = "0.0.1"
-authors = [
-    { name="The Invoke AI Team", email="[email protected]" },
-]
+authors = [{ name = "The Invoke AI Team", email = "[email protected]" }]
 description = "A library for Stable Diffusion model training."
 readme = "README.md"
 requires-python = ">=3.9"
-license = {text = "Apache-2.0"}
+license = { text = "Apache-2.0" }
 classifiers = [
     "Programming Language :: Python :: 3",
     "Operating System :: OS Independent",
 ]
 dependencies = [
     "accelerate~=0.21.0",
+    "datasets~=2.14.3",
     "diffusers~=0.19.3",
-    "torch~=2.0.1",
+    "numpy",
+    "pydantic",
+    "pyyaml",
+    "safetensors",
+    "tensorboard",
+    "torch>=2.0.1",
+    "torchvision~=0.15.2",
+    "tqdm",
+    "transformers~=4.31.0",
+    # Known issue with xformers 0.0.16 on some GPUs:
+    # https://github.com/huggingface/diffusers/issues/2234#issuecomment-1416931212
+    "xformers>=0.0.17",
 ]
 
 [project.optional-dependencies]

diff --git a/src/invoke_training/lora/injection/stable_diffusion_v1.py b/src/invoke_training/lora/injection/stable_diffusion_v1.py
@@ -25,6 +25,7 @@ def inject_lora_into_unet_sd1(unet: UNet2DConditionModel) -> LoRALayerCollection
         include_descendants_of={Transformer2DModel},
         exclude_descendants_of=None,
         prefix="lora_unet",
+        dtype=torch.float32,
     )
 
     return lora_layers

diff --git a/src/invoke_training/lora/injection/utils.py b/src/invoke_training/lora/injection/utils.py
@@ -81,6 +81,7 @@ def inject_lora_layers(
     include_descendants_of: typing.Optional[typing.Set[typing.Type[torch.nn.Module]]] = None,
     exclude_descendants_of: typing.Optional[typing.Set[typing.Type[torch.nn.Module]]] = None,
     prefix: str = "",
+    dtype: torch.dtype = None,
 ) -> LoRALayerCollection:
     """Iterates over all of the modules in 'module' and if they are present in 'replace_map' then replaces them with the
     mapped LoRA layer type.
@@ -95,6 +96,7 @@ def inject_lora_layers(
         include_descendants_of (typing.Set[typing.Type[torch.nn.Module]], optional): Forwarded to find_modules(...).
         exclude_descendants_of (typing.Set[typing.Type[torch.nn.Module]], optional): Forwarded to find_modules(...).
         prefix (str, optional): A prefix that will be added to the names of all of the LoRA layers.
+        dtype (torch.dtype, optional): The dtype to construct the new layer with.
     Returns:
         LoRALayerCollection: A ModuleDict of all of the LoRA layers that were injected into the module.
     """
@@ -111,7 +113,7 @@ def inject_lora_layers(
         lora_layer_cls = lora_map[type(module)]
 
         # Initialize the LoRA layer with the correct dimensions.
-        lora_layer = lora_layer_cls.from_layer(module)
+        lora_layer = lora_layer_cls.from_layer(module, dtype=dtype)
 
         # Join the LoRA layer and the original layer in a block.
         lora_block = LoRABlock(original_module=module, lora_layer=lora_layer)

diff --git a/src/invoke_training/lora/layers/base_lora_layer.py b/src/invoke_training/lora/layers/base_lora_layer.py
@@ -8,11 +8,15 @@ class BaseLoRALayer(torch.nn.Module):
     def from_layer(
         cls,
         layer: torch.nn.Module,
+        device: torch.device = None,
+        dtype: torch.dtype = None,
         **kwargs,
     ):
         """Initialize a LoRA layer with dimensions that are compatible with 'layer'.
         Args:
             layer (torch.nn.Module): The existing layer whose in/out dimensions will be matched.
+            device (torch.device, optional): The device to construct the new layer on.
+            dtype (torch.dtype, optional): The dtype to construct the new layer with.
         Raises:
             TypeError: If layer has an unsupported type.
         Returns:

diff --git a/src/invoke_training/lora/lora_training.py b/src/invoke_training/lora/lora_training.py
diff --git a/src/invoke_training/scripts/invoke_train_lora.py b/src/invoke_training/scripts/invoke_train_lora.py
@@ -1,8 +1,33 @@
-from invoke_training.lora.lora_training import run_lora_training
+import argparse
+from pathlib import Path
+
+import yaml
+
+from invoke_training.training.lora.lora_training import run_lora_training
+from invoke_training.training.lora.lora_training_config import LoRATrainingConfig
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="LoRA model training.")
+    parser.add_argument(
+        "--cfg-file",
+        type=Path,
+        required=True,
+        help="Path to the YAML training config file. See `LoRATrainingConfig` for the supported fields.",
+    )
+    return parser.parse_args()
 
 
 def main():
-    run_lora_training()
+    args = parse_args()
+
+    # Load YAML config file.
+    with open(args.cfg_file, "r") as f:
+        cfg = yaml.safe_load(f)
+
+    train_config = LoRATrainingConfig(**cfg)
+
+    run_lora_training(train_config)
 
 
 if __name__ == "__main__":

diff --git a/src/invoke_training/training/__init__.py b/src/invoke_training/training/__init__.py
diff --git a/src/invoke_training/training/lora/__init__.py b/src/invoke_training/training/lora/__init__.py