diff --git a/docs/api.md b/docs/api.md index db25dc3..00ab930 100644 --- a/docs/api.md +++ b/docs/api.md @@ -6,6 +6,15 @@ Pipes classes implement different ways to generate or process images using diffusion models. + +All Pipe classes have two methods: *setup* and *gen*. + +*setup*'s purpuse is define parameters of the pipeline for image generation. Pipelines that take an image as an +input perform image preprocessing in *setup*. Setup's arguments are stored in the pipeline and used whenever *gen* method is used. + +*gen* takes as an input parameters are not stored inside the pipeline. All the pipelines take a dictionary as an input. +Expected parameters are prompt(str), generator (torch.Generator), negative_prompt (str). + **Prompt2ImPipe** is a pipe that generates an image from text prompt. @@ -16,6 +25,20 @@ image = pipe.gen({'prompt': 'bio-tech lab with computers and exotic flowers, art image.save('bio.png') ``` +*setup* parameters + +width - width of image to generate +heigth - height of image to generate +guidance_scale - strength of the prompt influence on generation process. +steps - The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference. Default value is 50. +clip_skip - Number of layers to be skipped from CLIP while computing the prompt embeddings. Skipping some layers gives less precise representation of the prompt. Default value is 0. + +Optimal values of guidance_scale and steps vary a lot between different checkpoints. + + +*gen* parameters + + ***Im2ImPipe** is a pipe that generates an image from another image. ``` @@ -25,20 +48,39 @@ img = pipe.gen({'prompt': 'biolab, anime style drawing, green colors'}) img.save('bio1.png') ``` +*setup* parameters + +fimage - File path to the input image. +image - Input image. Can be used instead of fimage. +strength - Strength of image modification. Defaults to 0.75. A lower strength values keep result close to the input image. value of 1 means input image more or less ignored. +scale - The scale factor for resizing of the input image. The output image will have dimentions (height * scale, width * scale) Defaults to None. +guidance_scale, steps, clip_skip - same as in Prompt2ImPipe + + **Cond2ImPipe** is a pipe that generates -an image from another image plus conditioning -image e.g. image after canny edge detection etc. +an image from a special conditioning +image e.g.image after canny edge detection, or etc. Conditioning image is processed internally with controlnet and uses StableDiffusion(XL)ControlNetPipeline Models are expected to be in ./models-cn/ for StableDiffusion and in ./models-cn-xl/ for StableDiffusionXL -**CIm2ImPipe** is similiar to Cond2ImPipe. + +**CIm2ImPipe** is a subclass of Cond2ImPipe. + The difference is that the conditional image is not taken as input but is obtained from the input image, which -should be processed, and the image processor -depends on the conditioning type. +is processed internally by the image processor. The image processor +depends on the conditioning type specified in the constructor. + + + +*setup* parameters + +fimage, image - same as in Im2ImPipe +cscales - strength of control image influence +width, height, steps, clip_skip, guidance_scale - same as in Prompt2ImPipe ``` model_id = 'runwayml/stable-diffusion-v1-5' @@ -81,6 +123,15 @@ img = pipe.gen({'prompt': prompt, 'seed':84958344}) img.save('inpaint1.png') ``` +*setup* parameters + +original_image - image without mask +image_painted - modified version of original_image, this parameter should be skipped if mask is passed. +mask - The mask. Defaults to None. If None it will be computed from the difference +between original_image and image_painted, should be skipped if image_painted is passed +blur - The blur radius for the mask to apply for generation process. +blur_compose - The blur radius for composing the original and generated images. +scale - The scale factor for resizing of the input image. The output image will have dimentions (height * scale, width * scale) ## metafusion service diff --git a/docs/generated/multigen.html b/docs/generated/multigen.html index eb9d0de..5ed7b98 100644 --- a/docs/generated/multigen.html +++ b/docs/generated/multigen.html @@ -19,12 +19,24 @@        
hypernet
loader
+log
+lpw_stable_diffusion
+
lpw_stable_diffusion_xl
+lpwxl
pipelines (package)
-
pipes
+pipes
+
prompt_helper
prompting
sessions
-
util
-worker
+util
+
worker
worker_base
-
+

+ + + + + +
 
+Data
       hints = {'author_style': ['Jean-Baptiste Monge style', 'styled in Art Nouveau', 'painting by Ed Blinkey', 'by Atey Ghailan', 'by Jeremy Mann', 'by Greg Manchess', 'by Antonio Moro', 'painting art by greg rutkowski', 'in the style of kandinsky', 'in the style of guillem h. pongiluppi', 'by Abigail Larson', 'by John Sloane', 'in the style of John Berkey', 'by Russ Mills', 'by George Callaghan', 'by Ian Miller', 'by Luke Fildes', 'inspired by Chris Achilleos', 'by James Jean', 'style of Wayne Barlowe', ...], 'colors': ['pastel colors', 'bioluminescens color', 'vibrant palette', 'colorful', 'dreamlike colors', 'neon lights', 'black and white', 'monochrome', 'unusual colors', 'complementary colors', 'flowing colors', 'dark', 'bright', '24bit colors'], 'effects': ['strong environmental light', ['cinema lighting', 'cinematic light'], 'haze lighting', 'reflections', 'light leaks', 'detailed shading', 'soft shadows', 'sheen', 'glowing', 'dynamic motion'], 'quality': [['ultra resolution', 'ultra high res', 'highest resolution', 'absurdres'], ['hyperrealistic', 'detailed hyperrealism'], 'perfectly balanced', 'score_9', '35mm photography', ['8k RAW photo', '8k uhd', 'cg unity 8k wallpaper', '16k hdri', '32k UHD'], ['masterpiece', 'famous artwork'], ['trending on ArtStation', 'trending on CGSociety'], ['super quality', 'best quality', 'extreme quality', 'top quality'], ['highly detailed', 'high detail', 'ultra-detailed']], 'shooting': ['double exposure', 'extremely sharp focus', 'large depth of field', 'deep depth of field', ['artistic blur', 'bokeh'], 'soft focus', 'cinematic shot', ['35mm photo', '50mm photo'], 'dynamic angle', 'film grain'], 'style': ['surrealism', ['symbolism', 'cubism'], 'psychedelic art', ['impressionism', 'expressionism', 'neo-expressionist'], 'neoclassicism', ['cyberpunk', 'urbanpunk', 'cyberpunk mood', 'cyberpunk rave'], ['snthwve style', 'nvinkpunk style', 'ink punk style'], ['art photography', 'artistic photorealism'], ['professional photo', 'iphone photo'], 'poster art', 'edgy street art', ['afrofuturism style', 'in the style of chinapunk'], ['sci-fi concept art', 'scifi style', 'futuristic'], 'painterly style', 'pixar style', 'quilling', ['pencil sketch', 'pencil painting'], 'watercolor painting'], 'style_modifiers': ['geometric patterns', ['fusion of art styles', 'concept art', 'fine art'], 'grafitti', 'splash', 'modern ink', ['multi-layered collages', 'abstract multilayer texture'], 'iconic album covers', 'fragmented icons', 'stylized photo', 'cinematic illustration', 'dripping paint', 'splatter art', 'fractal art', 'oil on canvas', ['digital painting', 'digital dreamscape'], 'holographic style'], 'vibe': ['energy-filled illustration', 'intricate', 'vibrant', 'stunning', 'aesthetic', 'sensual', 'radiant vibe']}
\ No newline at end of file diff --git a/docs/generated/multigen.loader.html b/docs/generated/multigen.loader.html index f80c73e..136baa2 100644 --- a/docs/generated/multigen.loader.html +++ b/docs/generated/multigen.loader.html @@ -17,9 +17,15 @@ Modules         -
logging
-
torch
-

+
copy
+diffusers
+
logging
+psutil
+
random
+sys
+
threading
+torch
+

-
 
@@ -46,11 +52,17 @@
Methods defined here:
__init__(self)
Initialize self.  See help(type(self)) for accurate signature.
-
get_pipeline(self, model_id)
+
cache_pipeline(self, pipe: diffusers.pipelines.pipeline_utils.DiffusionPipeline, model_id)
+ +
clear_cache(self, device)
+ +
from_pipe(self, cls, pipe, additional_args)
+ +
get_gpu(self, model_id) -> List[int]
return list of gpus with loaded model
-
load_pipeline(self, cls: Type[diffusers.pipelines.pipeline_utils.DiffusionPipeline], path, torch_dtype=torch.float16, device=device(type='cuda'), **additional_args)
+
get_pipeline(self, model_id, device=None)
-
register_pipeline(self, pipe: diffusers.pipelines.pipeline_utils.DiffusionPipeline, model_id)
+
load_pipeline(self, cls: Type[diffusers.pipelines.pipeline_utils.DiffusionPipeline], path, torch_dtype=torch.float16, device=None, **additional_args)
remove_pipeline(self, model_id)
@@ -69,7 +81,11 @@ Functions
       
copy_pipe(pipe)
+
awailable_ram()
+
copy_pipe(pipe)
+
count_params(model)
+
get_model_size(pipeline)
+
get_size(obj)

@@ -77,6 +93,7 @@ Data -
       Type = typing.Type
+
List = typing.List
+Type = typing.Type
logger = <Logger /home/imgen/projects/metafusion/multigen/loader.py (WARNING)>
\ No newline at end of file diff --git a/docs/generated/multigen.pipelines.masked_stable_diffusion_img2img.html b/docs/generated/multigen.pipelines.masked_stable_diffusion_img2img.html index 48be425..78a7242 100644 --- a/docs/generated/multigen.pipelines.masked_stable_diffusion_img2img.html +++ b/docs/generated/multigen.pipelines.masked_stable_diffusion_img2img.html @@ -28,7 +28,7 @@        

-
diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.StableDiffusionImg2ImgPipeline(diffusers.pipelines.pipeline_utils.DiffusionPipeline, diffusers.pipelines.pipeline_utils.StableDiffusionMixin, diffusers.loaders.textual_inversion.TextualInversionLoaderMixin, diffusers.loaders.ip_adapter.IPAdapterMixin, diffusers.loaders.lora.LoraLoaderMixin, diffusers.loaders.single_file.FromSingleFileMixin) +
diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.StableDiffusionImg2ImgPipeline(diffusers.pipelines.pipeline_utils.DiffusionPipeline, diffusers.pipelines.pipeline_utils.StableDiffusionMixin, diffusers.loaders.textual_inversion.TextualInversionLoaderMixin, diffusers.loaders.ip_adapter.IPAdapterMixin, diffusers.loaders.lora_pipeline.StableDiffusionLoraLoaderMixin, diffusers.loaders.single_file.FromSingleFileMixin)
MaskedStableDiffusionImg2ImgPipeline @@ -55,13 +55,14 @@
diffusers.pipelines.pipeline_utils.StableDiffusionMixin
diffusers.loaders.textual_inversion.TextualInversionLoaderMixin
diffusers.loaders.ip_adapter.IPAdapterMixin
-
diffusers.loaders.lora.LoraLoaderMixin
+
diffusers.loaders.lora_pipeline.StableDiffusionLoraLoaderMixin
+
diffusers.loaders.lora_base.LoraBaseMixin
diffusers.loaders.single_file.FromSingleFileMixin
builtins.object

Methods defined here:
-
__call__(self, prompt: Union[str, List[str]] = None, image: Union[PIL.Image.Image, numpy.ndarray, torch.FloatTensor, List[PIL.Image.Image], List[numpy.ndarray], List[torch.FloatTensor]] = None, original_image: Union[PIL.Image.Image, numpy.ndarray, torch.FloatTensor, List[PIL.Image.Image], List[numpy.ndarray], List[torch.FloatTensor]] = None, strength: float = 0.8, num_inference_steps: Optional[int] = 50, timesteps: List[int] = None, guidance_scale: Optional[float] = 7.5, negative_prompt: Union[str, List[str], NoneType] = None, num_images_per_prompt: Optional[int] = 1, eta: Optional[float] = 0.0, generator: Union[torch._C.Generator, List[torch._C.Generator], NoneType] = None, prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.FloatTensor] = None, ip_adapter_image: Union[PIL.Image.Image, numpy.ndarray, torch.FloatTensor, List[PIL.Image.Image], List[numpy.ndarray], List[torch.FloatTensor], NoneType] = None, ip_adapter_image_embeds: Optional[List[torch.FloatTensor]] = None, output_type: Optional[str] = 'pil', return_dict: bool = True, cross_attention_kwargs: Optional[Dict[str, Any]] = None, clip_skip: int = None, callback_on_step_end: Optional[Callable[[int, int, Dict], NoneType]] = None, callback_on_step_end_tensor_inputs: List[str] = ['latents'], mask: Union[torch.FloatTensor, PIL.Image.Image, numpy.ndarray, List[torch.FloatTensor], List[PIL.Image.Image], List[numpy.ndarray]] = None, sample_mode='sample', **kwargs)
The call function to the pipeline for generation.
+
__call__(self, prompt: Union[str, List[str]] = None, image: Union[PIL.Image.Image, numpy.ndarray, torch.Tensor, List[PIL.Image.Image], List[numpy.ndarray], List[torch.Tensor]] = None, original_image: Union[PIL.Image.Image, numpy.ndarray, torch.Tensor, List[PIL.Image.Image], List[numpy.ndarray], List[torch.Tensor]] = None, strength: float = 0.8, num_inference_steps: Optional[int] = 50, timesteps: List[int] = None, guidance_scale: Optional[float] = 7.5, negative_prompt: Union[str, List[str], NoneType] = None, num_images_per_prompt: Optional[int] = 1, eta: Optional[float] = 0.0, generator: Union[torch._C.Generator, List[torch._C.Generator], NoneType] = None, prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.FloatTensor] = None, ip_adapter_image: Union[PIL.Image.Image, numpy.ndarray, torch.Tensor, List[PIL.Image.Image], List[numpy.ndarray], List[torch.Tensor], NoneType] = None, ip_adapter_image_embeds: Optional[List[torch.FloatTensor]] = None, output_type: Optional[str] = 'pil', return_dict: bool = True, cross_attention_kwargs: Optional[Dict[str, Any]] = None, clip_skip: int = None, callback_on_step_end: Optional[Callable[[int, int, Dict], NoneType]] = None, callback_on_step_end_tensor_inputs: List[str] = ['latents'], mask: Union[torch.FloatTensor, PIL.Image.Image, numpy.ndarray, List[torch.FloatTensor], List[PIL.Image.Image], List[numpy.ndarray]] = None, sample_mode='sample', **kwargs)
The call function to the pipeline for generation.
 
Args:
    prompt (`str` or `List[str]`, *optional*):
@@ -168,7 +169,7 @@
encode_image(self, image, device, num_images_per_prompt, output_hidden_states=None)
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.encode_image
-
encode_prompt(self, prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt=None, prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.FloatTensor] = None, lora_scale: Optional[float] = None, clip_skip: Optional[int] = None)
Encodes the prompt into text encoder hidden states.
+
encode_prompt(self, prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt=None, prompt_embeds: Optional[torch.Tensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None, lora_scale: Optional[float] = None, clip_skip: Optional[int] = None)
Encodes the prompt into text encoder hidden states.
 
Args:
    prompt (`str` or `List[str]`, *optional*):
@@ -183,10 +184,10 @@         The prompt or prompts not to guide the image generation. If not defined, one has to pass
        `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
        less than `1`).
-    prompt_embeds (`torch.FloatTensor`, *optional*):
+    prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
        provided, text embeddings will be generated from `prompt` input argument.
-    negative_prompt_embeds (`torch.FloatTensor`, *optional*):
+    negative_prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
        weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
        argument.
@@ -196,7 +197,7 @@         Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
        the output of the pre-final layer will be used for computing the prompt embeddings.
-
get_guidance_scale_embedding(self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32) -> torch.FloatTensor
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
+
get_guidance_scale_embedding(self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32) -> torch.Tensor
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
 
Args:
    w (`torch.Tensor`):
@@ -207,7 +208,7 @@         Data type of the generated embeddings.
 
Returns:
-    `torch.FloatTensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
+    `torch.Tensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
get_timesteps(self, num_inference_steps, strength, device)
@@ -443,9 +444,7 @@     force_download (`bool`, *optional*, defaults to `False`):
        Whether or not to force the (re-)download of the model weights and configuration files, overriding the
        cached versions if they exist.
-    resume_download (`bool`, *optional*, defaults to `False`):
-        Whether or not to resume downloading the model weights and configuration files. If set to `False`, any
-        incompletely downloaded files are deleted.

    proxies (`Dict[str, str]`, *optional*):
        A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
        'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
@@ -570,9 +569,7 @@     cache_dir (`Union[str, os.PathLike]`, *optional*):
        Path to a directory where a downloaded pretrained model configuration is cached if the standard cache
        is not used.
-    resume_download (`bool`, *optional*, defaults to `False`):
-        Whether or not to resume downloading the model weights and configuration files. If set to `False`, any
-        incompletely downloaded files are deleted.

    proxies (`Dict[str, str]`, *optional*):
        A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
        'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
@@ -796,9 +793,6 @@     force_download (`bool`, *optional*, defaults to `False`):
        Whether or not to force the (re-)download of the model weights and configuration files, overriding the
        cached versions if they exist.
-    resume_download (`bool`, *optional*, defaults to `False`):
-        Whether or not to resume downloading the model weights and configuration files. If set to `False`, any
-        incompletely downloaded files are deleted.
    proxies (`Dict[str, str]`, *optional*):
        A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
        'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
@@ -979,9 +973,7 @@     force_download (`bool`, *optional*, defaults to `False`):
        Whether or not to force the (re-)download of the model weights and configuration files, overriding the
        cached versions if they exist.
-    resume_download (`bool`, *optional*, defaults to `False`):
-        Whether or not to resume downloading the model weights and configuration files. If set to `False`, any
-        incompletely downloaded files are deleted.

    proxies (`Dict[str, str]`, *optional*):
        A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
        'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
@@ -1139,9 +1131,7 @@     force_download (`bool`, *optional*, defaults to `False`):
        Whether or not to force the (re-)download of the model weights and configuration files, overriding the
        cached versions if they exist.
-    resume_download (`bool`, *optional*, defaults to `False`):
-        Whether or not to resume downloading the model weights and configuration files. If set to `False`, any
-        incompletely downloaded files are deleted.

    proxies (`Dict[str, str]`, *optional*):
        A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
        'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
@@ -1160,12 +1150,32 @@         Only supported for PyTorch >= 1.9.0. If you are using an older version of PyTorch, setting this
        argument to `True` will raise an error.
-
set_ip_adapter_scale(self, scale)
Sets the conditioning scale between text and image.
+
set_ip_adapter_scale(self, scale)
Set IP-Adapter scales per-transformer block. Input `scale` could be a single config or a list of configs for
+granular control over each IP-Adapter behavior. A config can be a float or a dictionary.
 
Example:
 
```py
-pipeline.set_ip_adapter_scale(0.5)
+# To use original IP-Adapter
+scale = 1.0
+pipeline.set_ip_adapter_scale(scale)

+# To use style block only
+scale = {
+    "up": {"block_0": [0.0, 1.0, 0.0]},
+}
+pipeline.set_ip_adapter_scale(scale)

+# To use style+layout blocks
+scale = {
+    "down": {"block_2": [0.0, 1.0]},
+    "up": {"block_0": [0.0, 1.0, 0.0]},
+}
+pipeline.set_ip_adapter_scale(scale)

+# To use style and layout from 2 reference images
+scales = [{"down": {"block_2": [0.0, 1.0]}}, {"up": {"block_0": [0.0, 1.0, 0.0]}}]
+pipeline.set_ip_adapter_scale(scales)
```
unload_ip_adapter(self)
Unloads the IP Adapter weights
@@ -1179,31 +1189,8 @@ ```

-Methods inherited from diffusers.loaders.lora.LoraLoaderMixin:
-
delete_adapters(self, adapter_names: Union[List[str], str])
Args:
-Deletes the LoRA layers of `adapter_name` for the unet and text-encoder(s).
-    adapter_names (`Union[List[str], str]`):
-        The names of the adapter to delete. Can be a single string or a list of strings
- -
disable_lora(self)
- -
disable_lora_for_text_encoder(self, text_encoder: Optional[ForwardRef('PreTrainedModel')] = None)
Disables the LoRA layers for the text encoder.

-Args:
-    text_encoder (`torch.nn.Module`, *optional*):
-        The text encoder module to disable the LoRA layers for. If `None`, it will try to get the
-        `text_encoder` attribute.
- -
enable_lora(self)
- -
enable_lora_for_text_encoder(self, text_encoder: Optional[ForwardRef('PreTrainedModel')] = None)
Enables the LoRA layers for the text encoder.

-Args:
-    text_encoder (`torch.nn.Module`, *optional*):
-        The text encoder module to enable the LoRA layers for. If `None`, it will try to get the `text_encoder`
-        attribute.
- -
fuse_lora(self, fuse_unet: bool = True, fuse_text_encoder: bool = True, lora_scale: float = 1.0, safe_fusing: bool = False, adapter_names: Optional[List[str]] = None)
Fuses the LoRA parameters into the original parameters of the corresponding blocks.
+Methods inherited from diffusers.loaders.lora_pipeline.StableDiffusionLoraLoaderMixin:
+
fuse_lora(self, components: List[str] = ['unet', 'text_encoder'], lora_scale: float = 1.0, safe_fusing: bool = False, adapter_names: Optional[List[str]] = None, **kwargs)
Fuses the LoRA parameters into the original parameters of the corresponding blocks.
 
<Tip warning={true}>
 
@@ -1212,10 +1199,7 @@ </Tip>
 
Args:
-    fuse_unet (`bool`, defaults to `True`): Whether to fuse the UNet LoRA parameters.
-    fuse_text_encoder (`bool`, defaults to `True`):
-        Whether to fuse the text encoder LoRA parameters. If the text encoder wasn't monkey-patched with the
-        LoRA parameters then it won't have any effect.
+    components: (`List[str]`): List of LoRA-injectable components to fuse the LoRAs into.
    lora_scale (`float`, defaults to 1.0):
        Controls how much to influence the outputs with the LoRA parameters.
    safe_fusing (`bool`, defaults to `False`):
@@ -1236,68 +1220,31 @@ pipeline.fuse_lora(lora_scale=0.7)
```
-
get_active_adapters(self) -> List[str]
Gets the list of the current active adapters.

-Example:

-```python
-from diffusers import DiffusionPipeline

-pipeline = DiffusionPipeline.from_pretrained(
-    "stabilityai/stable-diffusion-xl-base-1.0",
-).to("cuda")
-pipeline.load_lora_weights("CiroN2022/toy-face", weight_name="toy_face_sdxl.safetensors", adapter_name="toy")
-pipeline.get_active_adapters()
-```
- -
get_list_adapters(self) -> Dict[str, List[str]]
Gets the current list of all available adapters in the pipeline.
-
load_lora_weights(self, pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]], adapter_name=None, **kwargs)
Load LoRA weights specified in `pretrained_model_name_or_path_or_dict` into `self.unet` and
`self.text_encoder`.
 
All kwargs are forwarded to `self.lora_state_dict`.
 
-See [`~loaders.LoraLoaderMixin.lora_state_dict`] for more details on how the state dict is loaded.
+See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`] for more details on how the state dict is
+loaded.
 
-See [`~loaders.LoraLoaderMixin.load_lora_into_unet`] for more details on how the state dict is loaded into
-`self.unet`.
+See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_into_unet`] for more details on how the state dict is
+loaded into `self.unet`.
 
-See [`~loaders.LoraLoaderMixin.load_lora_into_text_encoder`] for more details on how the state dict is loaded
-into `self.text_encoder`.
+See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_into_text_encoder`] for more details on how the state
+dict is loaded into `self.text_encoder`.
 
Parameters:
    pretrained_model_name_or_path_or_dict (`str` or `os.PathLike` or `dict`):
-        See [`~loaders.LoraLoaderMixin.lora_state_dict`].
+        See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`].
    kwargs (`dict`, *optional*):
-        See [`~loaders.LoraLoaderMixin.lora_state_dict`].
+        See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`].
    adapter_name (`str`, *optional*):
        Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
        `default_{i}` where i is the total number of adapters being loaded.
-
set_adapters(self, adapter_names: Union[List[str], str], adapter_weights: Union[float, Dict, List[float], List[Dict], NoneType] = None)
- -
set_adapters_for_text_encoder(self, adapter_names: Union[List[str], str], text_encoder: Optional[ForwardRef('PreTrainedModel')] = None, text_encoder_weights: Union[float, List[float], List[NoneType], NoneType] = None)
Sets the adapter layers for the text encoder.

-Args:
-    adapter_names (`List[str]` or `str`):
-        The names of the adapters to use.
-    text_encoder (`torch.nn.Module`, *optional*):
-        The text encoder module to set the adapter layers for. If `None`, it will try to get the `text_encoder`
-        attribute.
-    text_encoder_weights (`List[float]`, *optional*):
-        The weights to use for the text encoder. If `None`, the weights are set to `1.0` for all the adapters.
- -
set_lora_device(self, adapter_names: List[str], device: Union[torch.device, str, int]) -> None
Moves the LoRAs listed in `adapter_names` to a target device. Useful for offloading the LoRA to the CPU in case
-you want to load multiple adapters and free some GPU memory.

-Args:
-    adapter_names (`List[str]`):
-        List of adapters to send device to.
-    device (`Union[torch.device, str, int]`):
-        Device to send the adapters to. Can be either a torch device, a str or an integer.
- -
unfuse_lora(self, unfuse_unet: bool = True, unfuse_text_encoder: bool = True)
Reverses the effect of
-[`pipe.fuse_lora()`](https://huggingface.co/docs/diffusers/main/en/api/loaders#diffusers.loaders.LoraLoaderMixin.fuse_lora).
+
unfuse_lora(self, components: List[str] = ['unet', 'text_encoder'], **kwargs)
Reverses the effect of
+[`pipe.fuse_lora()`](https://huggingface.co/docs/diffusers/main/en/api/loaders#diffusers.loaders.LoraBaseMixin.fuse_lora).
 
<Tip warning={true}>
 
@@ -1306,24 +1253,15 @@ </Tip>
 
Args:
+    components (`List[str]`): List of LoRA-injectable components to unfuse LoRA from.
    unfuse_unet (`bool`, defaults to `True`): Whether to unfuse the UNet LoRA parameters.
    unfuse_text_encoder (`bool`, defaults to `True`):
        Whether to unfuse the text encoder LoRA parameters. If the text encoder wasn't monkey-patched with the
        LoRA parameters then it won't have any effect.
-
unload_lora_weights(self)
Unloads the LoRA parameters.

-Examples:

-```python
->>> # Assuming `pipeline` is already loaded with the LoRA parameters.
->>> pipeline.unload_lora_weights()
->>> ...
-```
-
-Class methods inherited from diffusers.loaders.lora.LoraLoaderMixin:
-
load_lora_into_text_encoder(state_dict, network_alphas, text_encoder, prefix=None, lora_scale=1.0, low_cpu_mem_usage=None, adapter_name=None, _pipeline=None) from builtins.type
This will load the LoRA layers specified in `state_dict` into `text_encoder`
+Class methods inherited from diffusers.loaders.lora_pipeline.StableDiffusionLoraLoaderMixin:
+
load_lora_into_text_encoder(state_dict, network_alphas, text_encoder, prefix=None, lora_scale=1.0, adapter_name=None, _pipeline=None) from builtins.type
This will load the LoRA layers specified in `state_dict` into `text_encoder`
 
Parameters:
    state_dict (`dict`):
@@ -1338,36 +1276,11 @@     lora_scale (`float`):
        How much to scale the output of the lora linear layer before it is added with the output of the regular
        lora layer.
-    low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 else `False`):
-        Speed up model loading only loading the pretrained weights and not initializing the weights. This also
-        tries to not use more than 1x model size in CPU memory (including peak memory) while loading the model.
-        Only supported for PyTorch >= 1.9.0. If you are using an older version of PyTorch, setting this
-        argument to `True` will raise an error.
-    adapter_name (`str`, *optional*):
-        Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
-        `default_{i}` where i is the total number of adapters being loaded.
- -
load_lora_into_transformer(state_dict, network_alphas, transformer, low_cpu_mem_usage=None, adapter_name=None, _pipeline=None) from builtins.type
This will load the LoRA layers specified in `state_dict` into `transformer`.

-Parameters:
-    state_dict (`dict`):
-        A standard state dict containing the lora layer parameters. The keys can either be indexed directly
-        into the unet or prefixed with an additional `unet` which can be used to distinguish between text
-        encoder lora layers.
-    network_alphas (`Dict[str, float]`):
-        See `LoRALinearLayer` for more details.
-    unet (`UNet2DConditionModel`):
-        The UNet model to load the LoRA layers into.
-    low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 else `False`):
-        Speed up model loading only loading the pretrained weights and not initializing the weights. This also
-        tries to not use more than 1x model size in CPU memory (including peak memory) while loading the model.
-        Only supported for PyTorch >= 1.9.0. If you are using an older version of PyTorch, setting this
-        argument to `True` will raise an error.
    adapter_name (`str`, *optional*):
        Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
        `default_{i}` where i is the total number of adapters being loaded.
-
load_lora_into_unet(state_dict, network_alphas, unet, low_cpu_mem_usage=None, adapter_name=None, _pipeline=None) from builtins.type
This will load the LoRA layers specified in `state_dict` into `unet`.
+
load_lora_into_unet(state_dict, network_alphas, unet, adapter_name=None, _pipeline=None) from builtins.type
This will load the LoRA layers specified in `state_dict` into `unet`.
 
Parameters:
    state_dict (`dict`):
@@ -1375,14 +1288,11 @@         into the unet or prefixed with an additional `unet` which can be used to distinguish between text
        encoder lora layers.
    network_alphas (`Dict[str, float]`):
-        See `LoRALinearLayer` for more details.
+        The value of the network alpha used for stable learning and preventing underflow. This value has the
+        same meaning as the `--network_alpha` option in the kohya-ss trainer script. Refer to [this
+        link](https://github.com/darkstorm2150/sd-scripts/blob/main/docs/train_network_README-en.md#execute-learning).
    unet (`UNet2DConditionModel`):
        The UNet model to load the LoRA layers into.
-    low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 else `False`):
-        Speed up model loading only loading the pretrained weights and not initializing the weights. This also
-        tries to not use more than 1x model size in CPU memory (including peak memory) while loading the model.
-        Only supported for PyTorch >= 1.9.0. If you are using an older version of PyTorch, setting this
-        argument to `True` will raise an error.
    adapter_name (`str`, *optional*):
        Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
        `default_{i}` where i is the total number of adapters being loaded.
@@ -1414,9 +1324,7 @@     force_download (`bool`, *optional*, defaults to `False`):
        Whether or not to force the (re-)download of the model weights and configuration files, overriding the
        cached versions if they exist.
-    resume_download (`bool`, *optional*, defaults to `False`):
-        Whether or not to resume downloading the model weights and configuration files. If set to `False`, any
-        incompletely downloaded files are deleted.

    proxies (`Dict[str, str]`, *optional*):
        A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
        'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
@@ -1431,17 +1339,10 @@         allowed by Git.
    subfolder (`str`, *optional*, defaults to `""`):
        The subfolder location of a model file within a larger model repository on the Hub or locally.
-    low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 else `False`):
-        Speed up model loading only loading the pretrained weights and not initializing the weights. This also
-        tries to not use more than 1x model size in CPU memory (including peak memory) while loading the model.
-        Only supported for PyTorch >= 1.9.0. If you are using an older version of PyTorch, setting this
-        argument to `True` will raise an error.
-    mirror (`str`, *optional*):
-        Mirror source to resolve accessibility issues if you're downloading a model in China. We do not
-        guarantee the timeliness or safety of the source, and you should refer to the mirror site for more
-        information.
+    weight_name (`str`, *optional*, defaults to None):
+        Name of the serialized state dict file.
-
save_lora_weights(save_directory: Union[str, os.PathLike], unet_lora_layers: Dict[str, Union[torch.nn.modules.module.Module, torch.Tensor]] = None, text_encoder_lora_layers: Dict[str, torch.nn.modules.module.Module] = None, transformer_lora_layers: Dict[str, torch.nn.modules.module.Module] = None, is_main_process: bool = True, weight_name: str = None, save_function: Callable = None, safe_serialization: bool = True) from builtins.type
Save the LoRA parameters corresponding to the UNet and text encoder.
+
save_lora_weights(save_directory: Union[str, os.PathLike], unet_lora_layers: Dict[str, Union[torch.nn.modules.module.Module, torch.Tensor]] = None, text_encoder_lora_layers: Dict[str, torch.nn.modules.module.Module] = None, is_main_process: bool = True, weight_name: str = None, save_function: Callable = None, safe_serialization: bool = True) from builtins.type
Save the LoRA parameters corresponding to the UNet and text encoder.
 
Arguments:
    save_directory (`str` or `os.PathLike`):
@@ -1463,23 +1364,73 @@         Whether to save the model using `safetensors` or the traditional PyTorch way with `pickle`.

-Static methods inherited from diffusers.loaders.lora.LoraLoaderMixin:
+Data and other attributes inherited from diffusers.loaders.lora_pipeline.StableDiffusionLoraLoaderMixin:
+
text_encoder_name = 'text_encoder'
+ +
unet_name = 'unet'
+ +
+Methods inherited from diffusers.loaders.lora_base.LoraBaseMixin:
+
delete_adapters(self, adapter_names: Union[List[str], str])
Args:
+Deletes the LoRA layers of `adapter_name` for the unet and text-encoder(s).
+    adapter_names (`Union[List[str], str]`):
+        The names of the adapter to delete. Can be a single string or a list of strings
+ +
disable_lora(self)
+ +
enable_lora(self)
+ +
get_active_adapters(self) -> List[str]
Gets the list of the current active adapters.

+Example:

+```python
+from diffusers import DiffusionPipeline

+pipeline = DiffusionPipeline.from_pretrained(
+    "stabilityai/stable-diffusion-xl-base-1.0",
+).to("cuda")
+pipeline.load_lora_weights("CiroN2022/toy-face", weight_name="toy_face_sdxl.safetensors", adapter_name="toy")
+pipeline.get_active_adapters()
+```
+ +
get_list_adapters(self) -> Dict[str, List[str]]
Gets the current list of all available adapters in the pipeline.
+ +
set_adapters(self, adapter_names: Union[List[str], str], adapter_weights: Union[float, Dict, List[float], List[Dict], NoneType] = None)
+ +
set_lora_device(self, adapter_names: List[str], device: Union[torch.device, str, int]) -> None
Moves the LoRAs listed in `adapter_names` to a target device. Useful for offloading the LoRA to the CPU in case
+you want to load multiple adapters and free some GPU memory.

+Args:
+    adapter_names (`List[str]`):
+        List of adapters to send device to.
+    device (`Union[torch.device, str, int]`):
+        Device to send the adapters to. Can be either a torch device, a str or an integer.
+ +
unload_lora_weights(self)
Unloads the LoRA parameters.

+Examples:

+```python
+>>> # Assuming `pipeline` is already loaded with the LoRA parameters.
+>>> pipeline.unload_lora_weights()
+>>> ...
+```
+ +
+Static methods inherited from diffusers.loaders.lora_base.LoraBaseMixin:
+
pack_weights(layers, prefix)
+
write_lora_layers(state_dict: Dict[str, torch.Tensor], save_directory: str, is_main_process: bool, weight_name: str, save_function: Callable, safe_serialization: bool)

-Readonly properties inherited from diffusers.loaders.lora.LoraLoaderMixin:
+Readonly properties inherited from diffusers.loaders.lora_base.LoraBaseMixin:
lora_scale

-Data and other attributes inherited from diffusers.loaders.lora.LoraLoaderMixin:
+Data and other attributes inherited from diffusers.loaders.lora_base.LoraBaseMixin:
num_fused_loras = 0
-
text_encoder_name = 'text_encoder'
- -
transformer_name = 'transformer'
- -
unet_name = 'unet'
-
Class methods inherited from diffusers.loaders.single_file.FromSingleFileMixin:
from_single_file(pretrained_model_link_or_path, **kwargs) from builtins.type
Instantiate a [`DiffusionPipeline`] from pretrained pipeline weights saved in the `.ckpt` or `.safetensors`
@@ -1499,9 +1450,7 @@     cache_dir (`Union[str, os.PathLike]`, *optional*):
        Path to a directory where a downloaded pretrained model configuration is cached if the standard cache
        is not used.
-    resume_download (`bool`, *optional*, defaults to `False`):
-        Whether or not to resume downloading the model weights and configuration files. If set to `False`, any
-        incompletely downloaded files are deleted.

    proxies (`Dict[str, str]`, *optional*):
        A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
        'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
@@ -1517,27 +1466,12 @@     original_config_file (`str`, *optional*):
        The path to the original config file that was used to train the model. If not provided, the config file
        will be inferred from the checkpoint file.
-    model_type (`str`, *optional*):
-        The type of model to load. If not provided, the model type will be inferred from the checkpoint file.
-    image_size (`int`, *optional*):
-        The size of the image output. It's used to configure the `sample_size` parameter of the UNet and VAE
-        model.
-    load_safety_checker (`bool`, *optional*, defaults to `False`):
-        Whether to load the safety checker model or not. By default, the safety checker is not loaded unless a
-        `safety_checker` component is passed to the `kwargs`.
-    num_in_channels (`int`, *optional*):
-        Specify the number of input channels for the UNet model. Read more about how to configure UNet model
-        with this parameter
-        [here](https://huggingface.co/docs/diffusers/training/adapt_a_model#configure-unet2dconditionmodel-parameters).
-    scaling_factor (`float`, *optional*):
-        The scaling factor to use for the VAE model. If not provided, it is inferred from the config file
-        first. If the scaling factor is not found in the config file, the default value 0.18215 is used.
-    scheduler_type (`str`, *optional*):
-        The type of scheduler to load. If not provided, the scheduler type will be inferred from the checkpoint
-        file.
-    prediction_type (`str`, *optional*):
-        The type of prediction to load. If not provided, the prediction type will be inferred from the
-        checkpoint file.
+    config (`str`, *optional*):
+        Can be either:
+            - A string, the *repo id* (for example `CompVis/ldm-text2im-large-256`) of a pretrained pipeline
+              hosted on the Hub.
+            - A path to a *directory* (for example `./my_pipeline_directory/`) containing the pipeline
+              component configs in Diffusers format.
    kwargs (remaining dictionary of keyword arguments, *optional*):
        Can be used to overwrite load and saveable variables (the pipeline components of the specific pipeline
        class). The overwritten components are passed directly to the pipelines `__init__` method. See example
@@ -1555,7 +1489,7 @@  
>>> # Download pipeline from local file
>>> # file is downloaded under ./v1-5-pruned-emaonly.ckpt
->>> pipeline = StableDiffusionPipeline.from_single_file("./v1-5-pruned-emaonly")
+>>> pipeline = StableDiffusionPipeline.from_single_file("./v1-5-pruned-emaonly.ckpt")
 
>>> # Enable float16 and move to GPU
>>> pipeline = StableDiffusionPipeline.from_single_file(
@@ -1577,6 +1511,6 @@ Dict = typing.Dict
List = typing.List
Optional = typing.Optional
-PipelineImageInput = typing.Union[PIL.Image.Image, numpy.ndarray, tor...t[numpy.ndarray], typing.List[torch.FloatTensor]]
+PipelineImageInput = typing.Union[PIL.Image.Image, numpy.ndarray, tor...g.List[numpy.ndarray], typing.List[torch.Tensor]]
Union = typing.Union \ No newline at end of file diff --git a/docs/generated/multigen.pipelines.masked_stable_diffusion_xl_img2img.html b/docs/generated/multigen.pipelines.masked_stable_diffusion_xl_img2img.html index 926d49a..261636f 100644 --- a/docs/generated/multigen.pipelines.masked_stable_diffusion_xl_img2img.html +++ b/docs/generated/multigen.pipelines.masked_stable_diffusion_xl_img2img.html @@ -30,7 +30,7 @@        
-
diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl_img2img.StableDiffusionXLImg2ImgPipeline(diffusers.pipelines.pipeline_utils.DiffusionPipeline, diffusers.pipelines.pipeline_utils.StableDiffusionMixin, diffusers.loaders.textual_inversion.TextualInversionLoaderMixin, diffusers.loaders.single_file.FromSingleFileMixin, diffusers.loaders.lora.StableDiffusionXLLoraLoaderMixin, diffusers.loaders.ip_adapter.IPAdapterMixin) +
diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl_img2img.StableDiffusionXLImg2ImgPipeline(diffusers.pipelines.pipeline_utils.DiffusionPipeline, diffusers.pipelines.pipeline_utils.StableDiffusionMixin, diffusers.loaders.textual_inversion.TextualInversionLoaderMixin, diffusers.loaders.single_file.FromSingleFileMixin, diffusers.loaders.lora_pipeline.StableDiffusionXLLoraLoaderMixin, diffusers.loaders.ip_adapter.IPAdapterMixin)
MaskedStableDiffusionXLImg2ImgPipeline @@ -57,14 +57,14 @@
diffusers.pipelines.pipeline_utils.StableDiffusionMixin
diffusers.loaders.textual_inversion.TextualInversionLoaderMixin
diffusers.loaders.single_file.FromSingleFileMixin
-
diffusers.loaders.lora.StableDiffusionXLLoraLoaderMixin
-
diffusers.loaders.lora.LoraLoaderMixin
+
diffusers.loaders.lora_pipeline.StableDiffusionXLLoraLoaderMixin
+
diffusers.loaders.lora_base.LoraBaseMixin
diffusers.loaders.ip_adapter.IPAdapterMixin
builtins.object

Methods defined here:
-
__call__(self, prompt: Union[str, List[str]] = None, prompt_2: Union[str, List[str], NoneType] = None, image: Union[PIL.Image.Image, numpy.ndarray, torch.FloatTensor, List[PIL.Image.Image], List[numpy.ndarray], List[torch.FloatTensor]] = None, original_image: Union[PIL.Image.Image, numpy.ndarray, torch.FloatTensor, List[PIL.Image.Image], List[numpy.ndarray], List[torch.FloatTensor]] = None, strength: float = 0.3, num_inference_steps: Optional[int] = 50, timesteps: List[int] = None, denoising_start: Optional[float] = None, denoising_end: Optional[float] = None, guidance_scale: Optional[float] = 5.0, negative_prompt: Union[str, List[str], NoneType] = None, negative_prompt_2: Union[str, List[str], NoneType] = None, num_images_per_prompt: Optional[int] = 1, eta: Optional[float] = 0.0, generator: Union[torch._C.Generator, List[torch._C.Generator], NoneType] = None, latents: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.FloatTensor] = None, pooled_prompt_embeds: Optional[torch.FloatTensor] = None, negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None, ip_adapter_image: Union[PIL.Image.Image, numpy.ndarray, torch.FloatTensor, List[PIL.Image.Image], List[numpy.ndarray], List[torch.FloatTensor], NoneType] = None, ip_adapter_image_embeds: Optional[List[torch.FloatTensor]] = None, output_type: Optional[str] = 'pil', return_dict: bool = True, cross_attention_kwargs: Optional[Dict[str, Any]] = None, guidance_rescale: float = 0.0, original_size: Tuple[int, int] = None, crops_coords_top_left: Tuple[int, int] = (0, 0), target_size: Tuple[int, int] = None, negative_original_size: Optional[Tuple[int, int]] = None, negative_crops_coords_top_left: Tuple[int, int] = (0, 0), negative_target_size: Optional[Tuple[int, int]] = None, aesthetic_score: float = 6.0, negative_aesthetic_score: float = 2.5, clip_skip: Optional[int] = None, callback_on_step_end: Optional[Callable[[int, int, Dict], NoneType]] = None, callback_on_step_end_tensor_inputs: List[str] = ['latents'], mask: Union[torch.FloatTensor, PIL.Image.Image, numpy.ndarray, List[torch.FloatTensor], List[PIL.Image.Image], List[numpy.ndarray]] = None, sample_mode='sample', **kwargs)
The call function to the pipeline for generation.
+
__call__(self, prompt: Union[str, List[str]] = None, prompt_2: Union[str, List[str], NoneType] = None, image: Union[PIL.Image.Image, numpy.ndarray, torch.Tensor, List[PIL.Image.Image], List[numpy.ndarray], List[torch.Tensor]] = None, original_image: Union[PIL.Image.Image, numpy.ndarray, torch.Tensor, List[PIL.Image.Image], List[numpy.ndarray], List[torch.Tensor]] = None, strength: float = 0.3, num_inference_steps: Optional[int] = 50, timesteps: List[int] = None, denoising_start: Optional[float] = None, denoising_end: Optional[float] = None, guidance_scale: Optional[float] = 5.0, negative_prompt: Union[str, List[str], NoneType] = None, negative_prompt_2: Union[str, List[str], NoneType] = None, num_images_per_prompt: Optional[int] = 1, eta: Optional[float] = 0.0, generator: Union[torch._C.Generator, List[torch._C.Generator], NoneType] = None, latents: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.FloatTensor] = None, pooled_prompt_embeds: Optional[torch.FloatTensor] = None, negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None, ip_adapter_image: Union[PIL.Image.Image, numpy.ndarray, torch.Tensor, List[PIL.Image.Image], List[numpy.ndarray], List[torch.Tensor], NoneType] = None, ip_adapter_image_embeds: Optional[List[torch.FloatTensor]] = None, output_type: Optional[str] = 'pil', return_dict: bool = True, cross_attention_kwargs: Optional[Dict[str, Any]] = None, guidance_rescale: float = 0.0, original_size: Tuple[int, int] = None, crops_coords_top_left: Tuple[int, int] = (0, 0), target_size: Tuple[int, int] = None, negative_original_size: Optional[Tuple[int, int]] = None, negative_crops_coords_top_left: Tuple[int, int] = (0, 0), negative_target_size: Optional[Tuple[int, int]] = None, aesthetic_score: float = 6.0, negative_aesthetic_score: float = 2.5, clip_skip: Optional[int] = None, callback_on_step_end: Optional[Callable[[int, int, Dict], NoneType]] = None, callback_on_step_end_tensor_inputs: List[str] = ['latents'], mask: Union[torch.FloatTensor, PIL.Image.Image, numpy.ndarray, List[torch.FloatTensor], List[PIL.Image.Image], List[numpy.ndarray]] = None, sample_mode='sample', **kwargs)
The call function to the pipeline for generation.
 
Args:
    prompt (`str` or `List[str]`, *optional*):
@@ -148,7 +148,7 @@
encode_image(self, image, device, num_images_per_prompt, output_hidden_states=None)
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.encode_image
-
encode_prompt(self, prompt: str, prompt_2: Optional[str] = None, device: Optional[torch.device] = None, num_images_per_prompt: int = 1, do_classifier_free_guidance: bool = True, negative_prompt: Optional[str] = None, negative_prompt_2: Optional[str] = None, prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.FloatTensor] = None, pooled_prompt_embeds: Optional[torch.FloatTensor] = None, negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None, lora_scale: Optional[float] = None, clip_skip: Optional[int] = None)
Encodes the prompt into text encoder hidden states.
+
encode_prompt(self, prompt: str, prompt_2: Optional[str] = None, device: Optional[torch.device] = None, num_images_per_prompt: int = 1, do_classifier_free_guidance: bool = True, negative_prompt: Optional[str] = None, negative_prompt_2: Optional[str] = None, prompt_embeds: Optional[torch.Tensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None, pooled_prompt_embeds: Optional[torch.Tensor] = None, negative_pooled_prompt_embeds: Optional[torch.Tensor] = None, lora_scale: Optional[float] = None, clip_skip: Optional[int] = None)
Encodes the prompt into text encoder hidden states.
 
Args:
    prompt (`str` or `List[str]`, *optional*):
@@ -169,17 +169,17 @@     negative_prompt_2 (`str` or `List[str]`, *optional*):
        The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and
        `text_encoder_2`. If not defined, `negative_prompt` is used in both text-encoders
-    prompt_embeds (`torch.FloatTensor`, *optional*):
+    prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
        provided, text embeddings will be generated from `prompt` input argument.
-    negative_prompt_embeds (`torch.FloatTensor`, *optional*):
+    negative_prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
        weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
        argument.
-    pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
+    pooled_prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
        If not provided, pooled text embeddings will be generated from `prompt` input argument.
-    negative_pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
+    negative_pooled_prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
        weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
        input argument.
@@ -189,7 +189,7 @@         Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
        the output of the pre-final layer will be used for computing the prompt embeddings.
-
get_guidance_scale_embedding(self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32) -> torch.FloatTensor
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
+
get_guidance_scale_embedding(self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32) -> torch.Tensor
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
 
Args:
    w (`torch.Tensor`):
@@ -200,7 +200,7 @@         Data type of the generated embeddings.
 
Returns:
-    `torch.FloatTensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
+    `torch.Tensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
get_timesteps(self, num_inference_steps, strength, device, denoising_start=None)
@@ -442,9 +442,7 @@     force_download (`bool`, *optional*, defaults to `False`):
        Whether or not to force the (re-)download of the model weights and configuration files, overriding the
        cached versions if they exist.
-    resume_download (`bool`, *optional*, defaults to `False`):
-        Whether or not to resume downloading the model weights and configuration files. If set to `False`, any
-        incompletely downloaded files are deleted.

    proxies (`Dict[str, str]`, *optional*):
        A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
        'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
@@ -569,9 +567,7 @@     cache_dir (`Union[str, os.PathLike]`, *optional*):
        Path to a directory where a downloaded pretrained model configuration is cached if the standard cache
        is not used.
-    resume_download (`bool`, *optional*, defaults to `False`):
-        Whether or not to resume downloading the model weights and configuration files. If set to `False`, any
-        incompletely downloaded files are deleted.

    proxies (`Dict[str, str]`, *optional*):
        A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
        'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
@@ -795,9 +791,6 @@     force_download (`bool`, *optional*, defaults to `False`):
        Whether or not to force the (re-)download of the model weights and configuration files, overriding the
        cached versions if they exist.
-    resume_download (`bool`, *optional*, defaults to `False`):
-        Whether or not to resume downloading the model weights and configuration files. If set to `False`, any
-        incompletely downloaded files are deleted.
    proxies (`Dict[str, str]`, *optional*):
        A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
        'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
@@ -978,9 +971,7 @@     force_download (`bool`, *optional*, defaults to `False`):
        Whether or not to force the (re-)download of the model weights and configuration files, overriding the
        cached versions if they exist.
-    resume_download (`bool`, *optional*, defaults to `False`):
-        Whether or not to resume downloading the model weights and configuration files. If set to `False`, any
-        incompletely downloaded files are deleted.

    proxies (`Dict[str, str]`, *optional*):
        A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
        'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
@@ -1126,9 +1117,7 @@     cache_dir (`Union[str, os.PathLike]`, *optional*):
        Path to a directory where a downloaded pretrained model configuration is cached if the standard cache
        is not used.
-    resume_download (`bool`, *optional*, defaults to `False`):
-        Whether or not to resume downloading the model weights and configuration files. If set to `False`, any
-        incompletely downloaded files are deleted.

    proxies (`Dict[str, str]`, *optional*):
        A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
        'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
@@ -1144,27 +1133,12 @@     original_config_file (`str`, *optional*):
        The path to the original config file that was used to train the model. If not provided, the config file
        will be inferred from the checkpoint file.
-    model_type (`str`, *optional*):
-        The type of model to load. If not provided, the model type will be inferred from the checkpoint file.
-    image_size (`int`, *optional*):
-        The size of the image output. It's used to configure the `sample_size` parameter of the UNet and VAE
-        model.
-    load_safety_checker (`bool`, *optional*, defaults to `False`):
-        Whether to load the safety checker model or not. By default, the safety checker is not loaded unless a
-        `safety_checker` component is passed to the `kwargs`.
-    num_in_channels (`int`, *optional*):
-        Specify the number of input channels for the UNet model. Read more about how to configure UNet model
-        with this parameter
-        [here](https://huggingface.co/docs/diffusers/training/adapt_a_model#configure-unet2dconditionmodel-parameters).
-    scaling_factor (`float`, *optional*):
-        The scaling factor to use for the VAE model. If not provided, it is inferred from the config file
-        first. If the scaling factor is not found in the config file, the default value 0.18215 is used.
-    scheduler_type (`str`, *optional*):
-        The type of scheduler to load. If not provided, the scheduler type will be inferred from the checkpoint
-        file.
-    prediction_type (`str`, *optional*):
-        The type of prediction to load. If not provided, the prediction type will be inferred from the
-        checkpoint file.
+    config (`str`, *optional*):
+        Can be either:
+            - A string, the *repo id* (for example `CompVis/ldm-text2im-large-256`) of a pretrained pipeline
+              hosted on the Hub.
+            - A path to a *directory* (for example `./my_pipeline_directory/`) containing the pipeline
+              component configs in Diffusers format.
    kwargs (remaining dictionary of keyword arguments, *optional*):
        Can be used to overwrite load and saveable variables (the pipeline components of the specific pipeline
        class). The overwritten components are passed directly to the pipelines `__init__` method. See example
@@ -1182,7 +1156,7 @@  
>>> # Download pipeline from local file
>>> # file is downloaded under ./v1-5-pruned-emaonly.ckpt
->>> pipeline = StableDiffusionPipeline.from_single_file("./v1-5-pruned-emaonly")
+>>> pipeline = StableDiffusionPipeline.from_single_file("./v1-5-pruned-emaonly.ckpt")
 
>>> # Enable float16 and move to GPU
>>> pipeline = StableDiffusionPipeline.from_single_file(
@@ -1193,78 +1167,8 @@ ```

-Methods inherited from diffusers.loaders.lora.StableDiffusionXLLoraLoaderMixin:
-
load_lora_weights(self, pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]], adapter_name: Optional[str] = None, **kwargs)
Load LoRA weights specified in `pretrained_model_name_or_path_or_dict` into `self.unet` and
-`self.text_encoder`.

-All kwargs are forwarded to `self.lora_state_dict`.

-See [`~loaders.LoraLoaderMixin.lora_state_dict`] for more details on how the state dict is loaded.

-See [`~loaders.LoraLoaderMixin.load_lora_into_unet`] for more details on how the state dict is loaded into
-`self.unet`.

-See [`~loaders.LoraLoaderMixin.load_lora_into_text_encoder`] for more details on how the state dict is loaded
-into `self.text_encoder`.

-Parameters:
-    pretrained_model_name_or_path_or_dict (`str` or `os.PathLike` or `dict`):
-        See [`~loaders.LoraLoaderMixin.lora_state_dict`].
-    adapter_name (`str`, *optional*):
-        Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
-        `default_{i}` where i is the total number of adapters being loaded.
-    kwargs (`dict`, *optional*):
-        See [`~loaders.LoraLoaderMixin.lora_state_dict`].
- -
-Class methods inherited from diffusers.loaders.lora.StableDiffusionXLLoraLoaderMixin:
-
save_lora_weights(save_directory: Union[str, os.PathLike], unet_lora_layers: Dict[str, Union[torch.nn.modules.module.Module, torch.Tensor]] = None, text_encoder_lora_layers: Dict[str, Union[torch.nn.modules.module.Module, torch.Tensor]] = None, text_encoder_2_lora_layers: Dict[str, Union[torch.nn.modules.module.Module, torch.Tensor]] = None, is_main_process: bool = True, weight_name: str = None, save_function: Callable = None, safe_serialization: bool = True) from builtins.type
Save the LoRA parameters corresponding to the UNet and text encoder.

-Arguments:
-    save_directory (`str` or `os.PathLike`):
-        Directory to save LoRA parameters to. Will be created if it doesn't exist.
-    unet_lora_layers (`Dict[str, torch.nn.Module]` or `Dict[str, torch.Tensor]`):
-        State dict of the LoRA layers corresponding to the `unet`.
-    text_encoder_lora_layers (`Dict[str, torch.nn.Module]` or `Dict[str, torch.Tensor]`):
-        State dict of the LoRA layers corresponding to the `text_encoder`. Must explicitly pass the text
-        encoder LoRA state dict because it comes from ðŸ¤— Transformers.
-    is_main_process (`bool`, *optional*, defaults to `True`):
-        Whether the process calling this is the main process or not. Useful during distributed training and you
-        need to call this function on all processes. In this case, set `is_main_process=True` only on the main
-        process to avoid race conditions.
-    save_function (`Callable`):
-        The function to use to save the state dictionary. Useful during distributed training when you need to
-        replace `torch.save` with another method. Can be configured with the environment variable
-        `DIFFUSERS_SAVE_MODE`.
-    safe_serialization (`bool`, *optional*, defaults to `True`):
-        Whether to save the model using `safetensors` or the traditional PyTorch way with `pickle`.
- -
-Methods inherited from diffusers.loaders.lora.LoraLoaderMixin:
-
delete_adapters(self, adapter_names: Union[List[str], str])
Args:
-Deletes the LoRA layers of `adapter_name` for the unet and text-encoder(s).
-    adapter_names (`Union[List[str], str]`):
-        The names of the adapter to delete. Can be a single string or a list of strings
- -
disable_lora(self)
- -
disable_lora_for_text_encoder(self, text_encoder: Optional[ForwardRef('PreTrainedModel')] = None)
Disables the LoRA layers for the text encoder.

-Args:
-    text_encoder (`torch.nn.Module`, *optional*):
-        The text encoder module to disable the LoRA layers for. If `None`, it will try to get the
-        `text_encoder` attribute.
- -
enable_lora(self)
- -
enable_lora_for_text_encoder(self, text_encoder: Optional[ForwardRef('PreTrainedModel')] = None)
Enables the LoRA layers for the text encoder.

-Args:
-    text_encoder (`torch.nn.Module`, *optional*):
-        The text encoder module to enable the LoRA layers for. If `None`, it will try to get the `text_encoder`
-        attribute.
- -
fuse_lora(self, fuse_unet: bool = True, fuse_text_encoder: bool = True, lora_scale: float = 1.0, safe_fusing: bool = False, adapter_names: Optional[List[str]] = None)
Fuses the LoRA parameters into the original parameters of the corresponding blocks.
+Methods inherited from diffusers.loaders.lora_pipeline.StableDiffusionXLLoraLoaderMixin:
+
fuse_lora(self, components: List[str] = ['unet', 'text_encoder', 'text_encoder_2'], lora_scale: float = 1.0, safe_fusing: bool = False, adapter_names: Optional[List[str]] = None, **kwargs)
Fuses the LoRA parameters into the original parameters of the corresponding blocks.
 
<Tip warning={true}>
 
@@ -1273,10 +1177,7 @@ </Tip>
 
Args:
-    fuse_unet (`bool`, defaults to `True`): Whether to fuse the UNet LoRA parameters.
-    fuse_text_encoder (`bool`, defaults to `True`):
-        Whether to fuse the text encoder LoRA parameters. If the text encoder wasn't monkey-patched with the
-        LoRA parameters then it won't have any effect.
+    components: (`List[str]`): List of LoRA-injectable components to fuse the LoRAs into.
    lora_scale (`float`, defaults to 1.0):
        Controls how much to influence the outputs with the LoRA parameters.
    safe_fusing (`bool`, defaults to `False`):
@@ -1297,46 +1198,31 @@ pipeline.fuse_lora(lora_scale=0.7)
```
-
get_active_adapters(self) -> List[str]
Gets the list of the current active adapters.
+
load_lora_weights(self, pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]], adapter_name: Optional[str] = None, **kwargs)
Load LoRA weights specified in `pretrained_model_name_or_path_or_dict` into `self.unet` and
+`self.text_encoder`.
 
-Example:
+All kwargs are forwarded to `self.lora_state_dict`.
 
-```python
-from diffusers import DiffusionPipeline
+See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`] for more details on how the state dict is
+loaded.
 
-pipeline = DiffusionPipeline.from_pretrained(
-    "stabilityai/stable-diffusion-xl-base-1.0",
-).to("cuda")
-pipeline.load_lora_weights("CiroN2022/toy-face", weight_name="toy_face_sdxl.safetensors", adapter_name="toy")
-pipeline.get_active_adapters()
-```
- -
get_list_adapters(self) -> Dict[str, List[str]]
Gets the current list of all available adapters in the pipeline.
- -
set_adapters(self, adapter_names: Union[List[str], str], adapter_weights: Union[float, Dict, List[float], List[Dict], NoneType] = None)
- -
set_adapters_for_text_encoder(self, adapter_names: Union[List[str], str], text_encoder: Optional[ForwardRef('PreTrainedModel')] = None, text_encoder_weights: Union[float, List[float], List[NoneType], NoneType] = None)
Sets the adapter layers for the text encoder.
+See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_into_unet`] for more details on how the state dict is
+loaded into `self.unet`.
 
-Args:
-    adapter_names (`List[str]` or `str`):
-        The names of the adapters to use.
-    text_encoder (`torch.nn.Module`, *optional*):
-        The text encoder module to set the adapter layers for. If `None`, it will try to get the `text_encoder`
-        attribute.
-    text_encoder_weights (`List[float]`, *optional*):
-        The weights to use for the text encoder. If `None`, the weights are set to `1.0` for all the adapters.
- -
set_lora_device(self, adapter_names: List[str], device: Union[torch.device, str, int]) -> None
Moves the LoRAs listed in `adapter_names` to a target device. Useful for offloading the LoRA to the CPU in case
-you want to load multiple adapters and free some GPU memory.
+See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_into_text_encoder`] for more details on how the state
+dict is loaded into `self.text_encoder`.
 
-Args:
-    adapter_names (`List[str]`):
-        List of adapters to send device to.
-    device (`Union[torch.device, str, int]`):
-        Device to send the adapters to. Can be either a torch device, a str or an integer.
+Parameters:
+    pretrained_model_name_or_path_or_dict (`str` or `os.PathLike` or `dict`):
+        See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`].
+    adapter_name (`str`, *optional*):
+        Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
+        `default_{i}` where i is the total number of adapters being loaded.
+    kwargs (`dict`, *optional*):
+        See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`].
-
unfuse_lora(self, unfuse_unet: bool = True, unfuse_text_encoder: bool = True)
Reverses the effect of
-[`pipe.fuse_lora()`](https://huggingface.co/docs/diffusers/main/en/api/loaders#diffusers.loaders.LoraLoaderMixin.fuse_lora).
+
unfuse_lora(self, components: List[str] = ['unet', 'text_encoder', 'text_encoder_2'], **kwargs)
Reverses the effect of
+[`pipe.fuse_lora()`](https://huggingface.co/docs/diffusers/main/en/api/loaders#diffusers.loaders.LoraBaseMixin.fuse_lora).
 
<Tip warning={true}>
 
@@ -1345,24 +1231,15 @@ </Tip>
 
Args:
+    components (`List[str]`): List of LoRA-injectable components to unfuse LoRA from.
    unfuse_unet (`bool`, defaults to `True`): Whether to unfuse the UNet LoRA parameters.
    unfuse_text_encoder (`bool`, defaults to `True`):
        Whether to unfuse the text encoder LoRA parameters. If the text encoder wasn't monkey-patched with the
        LoRA parameters then it won't have any effect.
-
unload_lora_weights(self)
Unloads the LoRA parameters.

-Examples:

-```python
->>> # Assuming `pipeline` is already loaded with the LoRA parameters.
->>> pipeline.unload_lora_weights()
->>> ...
-```
-
-Class methods inherited from diffusers.loaders.lora.LoraLoaderMixin:
-
load_lora_into_text_encoder(state_dict, network_alphas, text_encoder, prefix=None, lora_scale=1.0, low_cpu_mem_usage=None, adapter_name=None, _pipeline=None) from builtins.type
This will load the LoRA layers specified in `state_dict` into `text_encoder`
+Class methods inherited from diffusers.loaders.lora_pipeline.StableDiffusionXLLoraLoaderMixin:
+
load_lora_into_text_encoder(state_dict, network_alphas, text_encoder, prefix=None, lora_scale=1.0, adapter_name=None, _pipeline=None) from builtins.type
This will load the LoRA layers specified in `state_dict` into `text_encoder`
 
Parameters:
    state_dict (`dict`):
@@ -1377,16 +1254,11 @@     lora_scale (`float`):
        How much to scale the output of the lora linear layer before it is added with the output of the regular
        lora layer.
-    low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 else `False`):
-        Speed up model loading only loading the pretrained weights and not initializing the weights. This also
-        tries to not use more than 1x model size in CPU memory (including peak memory) while loading the model.
-        Only supported for PyTorch >= 1.9.0. If you are using an older version of PyTorch, setting this
-        argument to `True` will raise an error.
    adapter_name (`str`, *optional*):
        Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
        `default_{i}` where i is the total number of adapters being loaded.
-
load_lora_into_transformer(state_dict, network_alphas, transformer, low_cpu_mem_usage=None, adapter_name=None, _pipeline=None) from builtins.type
This will load the LoRA layers specified in `state_dict` into `transformer`.
+
load_lora_into_unet(state_dict, network_alphas, unet, adapter_name=None, _pipeline=None) from builtins.type
This will load the LoRA layers specified in `state_dict` into `unet`.
 
Parameters:
    state_dict (`dict`):
@@ -1394,34 +1266,11 @@         into the unet or prefixed with an additional `unet` which can be used to distinguish between text
        encoder lora layers.
    network_alphas (`Dict[str, float]`):
-        See `LoRALinearLayer` for more details.
+        The value of the network alpha used for stable learning and preventing underflow. This value has the
+        same meaning as the `--network_alpha` option in the kohya-ss trainer script. Refer to [this
+        link](https://github.com/darkstorm2150/sd-scripts/blob/main/docs/train_network_README-en.md#execute-learning).
    unet (`UNet2DConditionModel`):
        The UNet model to load the LoRA layers into.
-    low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 else `False`):
-        Speed up model loading only loading the pretrained weights and not initializing the weights. This also
-        tries to not use more than 1x model size in CPU memory (including peak memory) while loading the model.
-        Only supported for PyTorch >= 1.9.0. If you are using an older version of PyTorch, setting this
-        argument to `True` will raise an error.
-    adapter_name (`str`, *optional*):
-        Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
-        `default_{i}` where i is the total number of adapters being loaded.
- -
load_lora_into_unet(state_dict, network_alphas, unet, low_cpu_mem_usage=None, adapter_name=None, _pipeline=None) from builtins.type
This will load the LoRA layers specified in `state_dict` into `unet`.

-Parameters:
-    state_dict (`dict`):
-        A standard state dict containing the lora layer parameters. The keys can either be indexed directly
-        into the unet or prefixed with an additional `unet` which can be used to distinguish between text
-        encoder lora layers.
-    network_alphas (`Dict[str, float]`):
-        See `LoRALinearLayer` for more details.
-    unet (`UNet2DConditionModel`):
-        The UNet model to load the LoRA layers into.
-    low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 else `False`):
-        Speed up model loading only loading the pretrained weights and not initializing the weights. This also
-        tries to not use more than 1x model size in CPU memory (including peak memory) while loading the model.
-        Only supported for PyTorch >= 1.9.0. If you are using an older version of PyTorch, setting this
-        argument to `True` will raise an error.
    adapter_name (`str`, *optional*):
        Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
        `default_{i}` where i is the total number of adapters being loaded.
@@ -1453,9 +1302,7 @@     force_download (`bool`, *optional*, defaults to `False`):
        Whether or not to force the (re-)download of the model weights and configuration files, overriding the
        cached versions if they exist.
-    resume_download (`bool`, *optional*, defaults to `False`):
-        Whether or not to resume downloading the model weights and configuration files. If set to `False`, any
-        incompletely downloaded files are deleted.

    proxies (`Dict[str, str]`, *optional*):
        A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
        'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
@@ -1470,34 +1317,101 @@         allowed by Git.
    subfolder (`str`, *optional*, defaults to `""`):
        The subfolder location of a model file within a larger model repository on the Hub or locally.
-    low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 else `False`):
-        Speed up model loading only loading the pretrained weights and not initializing the weights. This also
-        tries to not use more than 1x model size in CPU memory (including peak memory) while loading the model.
-        Only supported for PyTorch >= 1.9.0. If you are using an older version of PyTorch, setting this
-        argument to `True` will raise an error.
-    mirror (`str`, *optional*):
-        Mirror source to resolve accessibility issues if you're downloading a model in China. We do not
-        guarantee the timeliness or safety of the source, and you should refer to the mirror site for more
-        information.
+    weight_name (`str`, *optional*, defaults to None):
+        Name of the serialized state dict file.
+ +
save_lora_weights(save_directory: Union[str, os.PathLike], unet_lora_layers: Dict[str, Union[torch.nn.modules.module.Module, torch.Tensor]] = None, text_encoder_lora_layers: Dict[str, Union[torch.nn.modules.module.Module, torch.Tensor]] = None, text_encoder_2_lora_layers: Dict[str, Union[torch.nn.modules.module.Module, torch.Tensor]] = None, is_main_process: bool = True, weight_name: str = None, save_function: Callable = None, safe_serialization: bool = True) from builtins.type
Save the LoRA parameters corresponding to the UNet and text encoder.

+Arguments:
+    save_directory (`str` or `os.PathLike`):
+        Directory to save LoRA parameters to. Will be created if it doesn't exist.
+    unet_lora_layers (`Dict[str, torch.nn.Module]` or `Dict[str, torch.Tensor]`):
+        State dict of the LoRA layers corresponding to the `unet`.
+    text_encoder_lora_layers (`Dict[str, torch.nn.Module]` or `Dict[str, torch.Tensor]`):
+        State dict of the LoRA layers corresponding to the `text_encoder`. Must explicitly pass the text
+        encoder LoRA state dict because it comes from ðŸ¤— Transformers.
+    text_encoder_2_lora_layers (`Dict[str, torch.nn.Module]` or `Dict[str, torch.Tensor]`):
+        State dict of the LoRA layers corresponding to the `text_encoder_2`. Must explicitly pass the text
+        encoder LoRA state dict because it comes from ðŸ¤— Transformers.
+    is_main_process (`bool`, *optional*, defaults to `True`):
+        Whether the process calling this is the main process or not. Useful during distributed training and you
+        need to call this function on all processes. In this case, set `is_main_process=True` only on the main
+        process to avoid race conditions.
+    save_function (`Callable`):
+        The function to use to save the state dictionary. Useful during distributed training when you need to
+        replace `torch.save` with another method. Can be configured with the environment variable
+        `DIFFUSERS_SAVE_MODE`.
+    safe_serialization (`bool`, *optional*, defaults to `True`):
+        Whether to save the model using `safetensors` or the traditional PyTorch way with `pickle`.
+ +
+Data and other attributes inherited from diffusers.loaders.lora_pipeline.StableDiffusionXLLoraLoaderMixin:
+
text_encoder_name = 'text_encoder'
+ +
unet_name = 'unet'
+ +
+Methods inherited from diffusers.loaders.lora_base.LoraBaseMixin:
+
delete_adapters(self, adapter_names: Union[List[str], str])
Args:
+Deletes the LoRA layers of `adapter_name` for the unet and text-encoder(s).
+    adapter_names (`Union[List[str], str]`):
+        The names of the adapter to delete. Can be a single string or a list of strings
+ +
disable_lora(self)
+ +
enable_lora(self)
+ +
get_active_adapters(self) -> List[str]
Gets the list of the current active adapters.

+Example:

+```python
+from diffusers import DiffusionPipeline

+pipeline = DiffusionPipeline.from_pretrained(
+    "stabilityai/stable-diffusion-xl-base-1.0",
+).to("cuda")
+pipeline.load_lora_weights("CiroN2022/toy-face", weight_name="toy_face_sdxl.safetensors", adapter_name="toy")
+pipeline.get_active_adapters()
+```
+ +
get_list_adapters(self) -> Dict[str, List[str]]
Gets the current list of all available adapters in the pipeline.
+ +
set_adapters(self, adapter_names: Union[List[str], str], adapter_weights: Union[float, Dict, List[float], List[Dict], NoneType] = None)
+ +
set_lora_device(self, adapter_names: List[str], device: Union[torch.device, str, int]) -> None
Moves the LoRAs listed in `adapter_names` to a target device. Useful for offloading the LoRA to the CPU in case
+you want to load multiple adapters and free some GPU memory.

+Args:
+    adapter_names (`List[str]`):
+        List of adapters to send device to.
+    device (`Union[torch.device, str, int]`):
+        Device to send the adapters to. Can be either a torch device, a str or an integer.
+ +
unload_lora_weights(self)
Unloads the LoRA parameters.

+Examples:

+```python
+>>> # Assuming `pipeline` is already loaded with the LoRA parameters.
+>>> pipeline.unload_lora_weights()
+>>> ...
+```

-Static methods inherited from diffusers.loaders.lora.LoraLoaderMixin:
+Static methods inherited from diffusers.loaders.lora_base.LoraBaseMixin:
+
pack_weights(layers, prefix)
+
write_lora_layers(state_dict: Dict[str, torch.Tensor], save_directory: str, is_main_process: bool, weight_name: str, save_function: Callable, safe_serialization: bool)

-Readonly properties inherited from diffusers.loaders.lora.LoraLoaderMixin:
+Readonly properties inherited from diffusers.loaders.lora_base.LoraBaseMixin:
lora_scale

-Data and other attributes inherited from diffusers.loaders.lora.LoraLoaderMixin:
+Data and other attributes inherited from diffusers.loaders.lora_base.LoraBaseMixin:
num_fused_loras = 0
-
text_encoder_name = 'text_encoder'
- -
transformer_name = 'transformer'
- -
unet_name = 'unet'
-
Methods inherited from diffusers.loaders.ip_adapter.IPAdapterMixin:
load_ip_adapter(self, pretrained_model_name_or_path_or_dict: Union[str, List[str], Dict[str, torch.Tensor]], subfolder: Union[str, List[str]], weight_name: Union[str, List[str]], image_encoder_folder: Optional[str] = 'image_encoder', **kwargs)
Parameters:
@@ -1529,9 +1443,7 @@     force_download (`bool`, *optional*, defaults to `False`):
        Whether or not to force the (re-)download of the model weights and configuration files, overriding the
        cached versions if they exist.
-    resume_download (`bool`, *optional*, defaults to `False`):
-        Whether or not to resume downloading the model weights and configuration files. If set to `False`, any
-        incompletely downloaded files are deleted.

    proxies (`Dict[str, str]`, *optional*):
        A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
        'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
@@ -1550,12 +1462,32 @@         Only supported for PyTorch >= 1.9.0. If you are using an older version of PyTorch, setting this
        argument to `True` will raise an error.
-
set_ip_adapter_scale(self, scale)
Sets the conditioning scale between text and image.
+
set_ip_adapter_scale(self, scale)
Set IP-Adapter scales per-transformer block. Input `scale` could be a single config or a list of configs for
+granular control over each IP-Adapter behavior. A config can be a float or a dictionary.
 
Example:
 
```py
-pipeline.set_ip_adapter_scale(0.5)
+# To use original IP-Adapter
+scale = 1.0
+pipeline.set_ip_adapter_scale(scale)

+# To use style block only
+scale = {
+    "up": {"block_0": [0.0, 1.0, 0.0]},
+}
+pipeline.set_ip_adapter_scale(scale)

+# To use style+layout blocks
+scale = {
+    "down": {"block_2": [0.0, 1.0]},
+    "up": {"block_0": [0.0, 1.0, 0.0]},
+}
+pipeline.set_ip_adapter_scale(scale)

+# To use style and layout from 2 reference images
+scales = [{"down": {"block_2": [0.0, 1.0]}}, {"up": {"block_0": [0.0, 1.0, 0.0]}}]
+pipeline.set_ip_adapter_scale(scales)
```
unload_ip_adapter(self)
Unloads the IP Adapter weights
@@ -1580,7 +1512,7 @@ Dict = typing.Dict
List = typing.List
Optional = typing.Optional
-PipelineImageInput = typing.Union[PIL.Image.Image, numpy.ndarray, tor...t[numpy.ndarray], typing.List[torch.FloatTensor]]
+PipelineImageInput = typing.Union[PIL.Image.Image, numpy.ndarray, tor...g.List[numpy.ndarray], typing.List[torch.Tensor]]
Tuple = typing.Tuple
Union = typing.Union
XLA_AVAILABLE = False
diff --git a/docs/generated/multigen.pipes.html b/docs/generated/multigen.pipes.html index e07d80f..3c6aa62 100644 --- a/docs/generated/multigen.pipes.html +++ b/docs/generated/multigen.pipes.html @@ -61,7 +61,7 @@
enum.Enum(builtins.object)
-
ControlnetType +
ModelType
@@ -72,7 +72,7 @@ class BasePipe(builtins.object)     -BasePipe(model_id: str, sd_pipe_class: Optional[Type[diffusers.pipelines.pipeline_utils.DiffusionPipeline]] = None, pipe: Optional[diffusers.pipelines.pipeline_utils.DiffusionPipeline] = None, **args)
+BasePipe(model_id: str, sd_pipe_class: Optional[Type[diffusers.pipelines.pipeline_utils.DiffusionPipeline]] = None, pipe: Optional[diffusers.pipelines.pipeline_utils.DiffusionPipeline] = None, model_type: Optional[multigen.pipes.ModelType] = None, device=None, lpw=False, **args)
 
Base class for all pipelines.
 
@@ -80,7 +80,7 @@ as well as pipeline configuration
 
  Methods defined here:
-
__init__(self, model_id: str, sd_pipe_class: Optional[Type[diffusers.pipelines.pipeline_utils.DiffusionPipeline]] = None, pipe: Optional[diffusers.pipelines.pipeline_utils.DiffusionPipeline] = None, **args)
Constructor
+
__init__(self, model_id: str, sd_pipe_class: Optional[Type[diffusers.pipelines.pipeline_utils.DiffusionPipeline]] = None, pipe: Optional[diffusers.pipelines.pipeline_utils.DiffusionPipeline] = None, model_type: Optional[multigen.pipes.ModelType] = None, device=None, lpw=False, **args)
Constructor
 
Args:
    model_id (str):
@@ -90,6 +90,10 @@     pipe (DiffusionPipeline, *optional*):
        an instance of the pipeline to use,
        if provided the model_id won't be used for loading.
+    model_type (ModelType, *optional*):
+        A flag to selected between SD or SDXL if neither sd_pipe_class nor pipe is given
+    lpw (bool, *optional*):
+        A flag to enable of disable long-prompt weighting
    **args:
        additional arguments passed to sd_pipe_class constructor
@@ -97,14 +101,16 @@
clear_hypernets(self)
-
from_pipe(self, pipe, **args)
-
get_config(self)
Return parameters for this model.
 
:return: dict
+
get_prompt_embeds(self, prompt, negative_prompt, clip_skip: Optional[int] = None, lora_scale: Optional[int] = None)
+
load_lora(self, path, multiplier=1.0)
+
prepare_inputs(self, inputs)
+
setup(self, steps=50, clip_skip=0, loras=[], **args)
Setup pipeline for generation.
 
Args:
@@ -148,7 +154,7 @@ class CIm2ImPipe(Cond2ImPipe)     -CIm2ImPipe(model_id, pipe: Optional[diffusers.pipelines.controlnet.pipeline_controlnet.StableDiffusionControlNetPipeline] = None, ctypes=['soft'], model_type=&lt;ControlnetType.SD: 1&gt;, **args)
+CIm2ImPipe(model_id, pipe: Optional[diffusers.pipelines.controlnet.pipeline_controlnet.StableDiffusionControlNetPipeline] = None, ctypes=['soft'], model_type=&lt;ModelType.SD: 1&gt;, **args)
 
A pipeline for conditional image-to-image generation
where the conditional image is derived from the input image.
@@ -162,7 +168,7 @@

Methods defined here:
-
__init__(self, model_id, pipe: Optional[diffusers.pipelines.controlnet.pipeline_controlnet.StableDiffusionControlNetPipeline] = None, ctypes=['soft'], model_type=<ControlnetType.SD: 1>, **args)
Initialize the CIm2ImPipe.
+
__init__(self, model_id, pipe: Optional[diffusers.pipelines.controlnet.pipeline_controlnet.StableDiffusionControlNetPipeline] = None, ctypes=['soft'], model_type=<ModelType.SD: 1>, **args)
Initialize the CIm2ImPipe.
 
Args:
    model_id (str):
@@ -172,8 +178,6 @@     ctypes (list of str, optional):
        The types of conditioning to apply to the input image. Defaults to ["soft"].
        can be one of canny, pose, soft, soft-sobel, depth, None
-    model_type (ControlnetType, optional):
-        The type of ControlNet model to use(SD or SDXL). Defaults to ControlnetType.SD.
    **args:
        Additional arguments passed to the Cond2ImPipe constructor.
@@ -215,17 +219,15 @@
get_default_cond_scales(self)
-
get_sd_class(self)
-
Data and other attributes inherited from Cond2ImPipe:
cmodels = {'canny': 'sd-controlnet-canny', 'depth': 'control_v11f1p_sd15_depth', 'inpaint': 'control_v11p_sd15_inpaint', 'ip2p': 'control_v11e_sd15_ip2p', 'pose': 'control_v11p_sd15_openpose', 'qr': 'controlnet_qrcode-control_v1p_sd15', 'scribble': 'control_v11p_sd15_scribble', 'soft': 'control_v11p_sd15_softedge', 'soft-sobel': 'control_v11p_sd15_softedge'}
-
cmodelsxl = {'qr': 'controlnet-qr-pattern-sdxl'}
+
cmodelsxl = {'canny': 'controlnet-canny-sdxl', 'depth': 'controlnet-depth-sdxl', 'inpaint': 'controlnet-inpaint-sdxl', 'pose': 'controlnet-openpose-sdxl', 'qr': 'controlnet-qr-pattern-sdxl', 'scribble': 'controlnet-scribble-sdxl', 'soft': 'controlnet-softedge-sdxl'}
cond_scales_defaults = {'canny': 0.75, 'depth': 0.5, 'inpaint': 1.0, 'ip2p': 0.5, 'pose': 1.0, 'qr': 1.5, 'scribble': 0.95, 'soft': 0.95, 'soft-sobel': 0.3}
-
cond_scales_defaults_xl = {'qr': 0.5}
+
cond_scales_defaults_xl = {'canny': 0.75, 'depth': 0.5, 'inpaint': 1.0, 'pose': 1.0, 'qr': 0.5, 'scribble': 0.95, 'soft': 0.95}
cpath = './models-cn/'
@@ -237,10 +239,12 @@
clear_hypernets(self)
-
from_pipe(self, pipe, **args)
+
get_prompt_embeds(self, prompt, negative_prompt, clip_skip: Optional[int] = None, lora_scale: Optional[int] = None)
load_lora(self, path, multiplier=1.0)
+
prepare_inputs(self, inputs)
+
try_set_scheduler(self, inputs)

@@ -268,7 +272,7 @@ class Cond2ImPipe(BasePipe)     -Cond2ImPipe(model_id, pipe: Optional[diffusers.pipelines.controlnet.pipeline_controlnet.StableDiffusionControlNetPipeline] = None, ctypes=['soft'], model_type=&lt;ControlnetType.SD: 1&gt;, **args)
+Cond2ImPipe(model_id, pipe: Optional[diffusers.pipelines.controlnet.pipeline_controlnet.StableDiffusionControlNetPipeline] = None, ctypes=['soft'], model_type=None, **args)
 

 
  @@ -279,15 +283,13 @@

Methods defined here:
-
__init__(self, model_id, pipe: Optional[diffusers.pipelines.controlnet.pipeline_controlnet.StableDiffusionControlNetPipeline] = None, ctypes=['soft'], model_type=<ControlnetType.SD: 1>, **args)
Constructor
+
__init__(self, model_id, pipe: Optional[diffusers.pipelines.controlnet.pipeline_controlnet.StableDiffusionControlNetPipeline] = None, ctypes=['soft'], model_type=None, **args)
Constructor
 
Args:
    model_id (str):
        Path or identifier of the model to load.
    pipe (StableDiffusion(XL)ControlNetPipeline, *optional*):
        An instance of the pipeline to use. If provided, `model_id` won't be used for loading.
-    model_type (ControlnetType, *optional*):
-        determines whether it's SD or SDXL model, defaults to ControlnetType.SD
    **args:
        Additional arguments passed to the `BasePipe` constructor.
@@ -309,8 +311,6 @@
get_default_cond_scales(self)
-
get_sd_class(self)
-
setup(self, fimage, width=None, height=None, image=None, cscales=None, guess_mode=False, **args)
Set up the pipeline with the given parameters.
 
Args:
@@ -333,11 +333,11 @@ Data and other attributes defined here:
cmodels = {'canny': 'sd-controlnet-canny', 'depth': 'control_v11f1p_sd15_depth', 'inpaint': 'control_v11p_sd15_inpaint', 'ip2p': 'control_v11e_sd15_ip2p', 'pose': 'control_v11p_sd15_openpose', 'qr': 'controlnet_qrcode-control_v1p_sd15', 'scribble': 'control_v11p_sd15_scribble', 'soft': 'control_v11p_sd15_softedge', 'soft-sobel': 'control_v11p_sd15_softedge'}
-
cmodelsxl = {'qr': 'controlnet-qr-pattern-sdxl'}
+
cmodelsxl = {'canny': 'controlnet-canny-sdxl', 'depth': 'controlnet-depth-sdxl', 'inpaint': 'controlnet-inpaint-sdxl', 'pose': 'controlnet-openpose-sdxl', 'qr': 'controlnet-qr-pattern-sdxl', 'scribble': 'controlnet-scribble-sdxl', 'soft': 'controlnet-softedge-sdxl'}
cond_scales_defaults = {'canny': 0.75, 'depth': 0.5, 'inpaint': 1.0, 'ip2p': 0.5, 'pose': 1.0, 'qr': 1.5, 'scribble': 0.95, 'soft': 0.95, 'soft-sobel': 0.3}
-
cond_scales_defaults_xl = {'qr': 0.5}
+
cond_scales_defaults_xl = {'canny': 0.75, 'depth': 0.5, 'inpaint': 1.0, 'pose': 1.0, 'qr': 0.5, 'scribble': 0.95, 'soft': 0.95}
cpath = './models-cn/'
@@ -349,10 +349,12 @@
clear_hypernets(self)
-
from_pipe(self, pipe, **args)
+
get_prompt_embeds(self, prompt, negative_prompt, clip_skip: Optional[int] = None, lora_scale: Optional[int] = None)
load_lora(self, path, multiplier=1.0)
+
prepare_inputs(self, inputs)
+
try_set_scheduler(self, inputs)

@@ -377,44 +379,6 @@ - - - - -
 
-class ControlnetType(enum.Enum)
   ControlnetType(value, names=None, *, module=None, qualname=None, type=None, start=1)

-An enumeration.
 
 
Method resolution order:
-
ControlnetType
-
enum.Enum
-
builtins.object
-
-
-Data and other attributes defined here:
-
SD = <ControlnetType.SD: 1>
- -
SDXL = <ControlnetType.SDXL: 2>
- -
-Data descriptors inherited from enum.Enum:
-
name
-
The name of the Enum member.
-
-
value
-
The value of the Enum member.
-
-
-Readonly properties inherited from enum.EnumMeta:
-
__members__
-
Returns a mapping of member name->value.

-This mapping lists all enum members, including aliases. Note that this
-is a read-only view of the internal mapping.
-
-

- - - @@ -439,6 +403,10 @@     pipe (DiffusionPipeline, *optional*):
        an instance of the pipeline to use,
        if provided the model_id won't be used for loading.
+    model_type (ModelType, *optional*):
+        A flag to selected between SD or SDXL if neither sd_pipe_class nor pipe is given
+    lpw (bool, *optional*):
+        A flag to enable of disable long-prompt weighting
    **args:
        additional arguments passed to sd_pipe_class constructor @@ -482,10 +450,12 @@
clear_hypernets(self)
-
from_pipe(self, pipe, **args)
+
get_prompt_embeds(self, prompt, negative_prompt, clip_skip: Optional[int] = None, lora_scale: Optional[int] = None)
load_lora(self, path, multiplier=1.0)
+
prepare_inputs(self, inputs)
+
try_set_scheduler(self, inputs)

@@ -562,14 +532,16 @@
clear_hypernets(self)
-
from_pipe(self, pipe, **args)
-
get_config(self)
Return parameters for this model.
 
:return: dict
+
get_prompt_embeds(self, prompt, negative_prompt, clip_skip: Optional[int] = None, lora_scale: Optional[int] = None)
+
load_lora(self, path, multiplier=1.0)
+
prepare_inputs(self, inputs)
+
try_set_scheduler(self, inputs)

@@ -597,7 +569,7 @@ class MaskedIm2ImPipe(Im2ImPipe) -
 
class Im2ImPipe(BasePipe)
   
   MaskedIm2ImPipe(*args, pipe: Optional[diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.StableDiffusionImg2ImgPipeline] = None, **kwargs)
+
MaskedIm2ImPipe(*args, pipe: Optional[diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.StableDiffusionImg2ImgPipeline] = None, lpw=False, **kwargs)
 
A pipeline for image-to-image translation with masking.
 
@@ -614,7 +586,7 @@
Methods defined here:
-
__init__(self, *args, pipe: Optional[diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.StableDiffusionImg2ImgPipeline] = None, **kwargs)
Initialize a MaskedIm2ImPipe instance.
+
__init__(self, *args, pipe: Optional[diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.StableDiffusionImg2ImgPipeline] = None, lpw=False, **kwargs)
Initialize a MaskedIm2ImPipe instance.
 
Args:
    *args: arguments passed to Im2ImPipe.
@@ -637,12 +609,12 @@    original_image (str or Image.Image, *optional*):
        The original image. Defaults to None.
   image_painted (str or Image.Image, *optional*):
-        The painted image. Defaults to None.
+        modified version of original_image, this parameter should be skipped if mask is passed. Defaults to None.
   mask (array-like or Image.Image, *optional*):
-       The mask. Defaults to None. If None tt will be computed from the difference
+       The mask. Defaults to None. If None it will be computed from the difference
       between original_image and image_painted
   blur (int, *optional*):
-        The blur radius for the mask. Defaults to 4.
+        The blur radius for the mask to apply for generation process. Defaults to 4.
   blur_compose (int, *optional*):
        The blur radius for composing the original and generated images. Defaults to 4.
   sample_mode (str, *optional*):
@@ -673,10 +645,12 @@
clear_hypernets(self)
-
from_pipe(self, pipe, **args)
+
get_prompt_embeds(self, prompt, negative_prompt, clip_skip: Optional[int] = None, lora_scale: Optional[int] = None)
load_lora(self, path, multiplier=1.0)
+
prepare_inputs(self, inputs)
+
try_set_scheduler(self, inputs)

@@ -701,10 +675,48 @@ + + + + +
 
+class ModelType(enum.Enum)
   ModelType(value, names=None, *, module=None, qualname=None, type=None, start=1)

+An enumeration.
 
 
Method resolution order:
+
ModelType
+
enum.Enum
+
builtins.object
+
+
+Data and other attributes defined here:
+
SD = <ModelType.SD: 1>
+ +
SDXL = <ModelType.SDXL: 2>
+ +
+Data descriptors inherited from enum.Enum:
+
name
+
The name of the Enum member.
+
+
value
+
The value of the Enum member.
+
+
+Readonly properties inherited from enum.EnumMeta:
+
__members__
+
Returns a mapping of member name->value.

+This mapping lists all enum members, including aliases. Note that this
+is a read-only view of the internal mapping.
+
+

+ + + - @@ -715,7 +727,7 @@
Methods defined here:
-
__init__(self, model_id: str, pipe: Optional[diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline] = None, lpw=False, **args)
Constructor
+
__init__(self, model_id: str, pipe: Optional[diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline] = None, **args)
Constructor
 
Args:
    model_id (str):
@@ -725,6 +737,10 @@     pipe (DiffusionPipeline, *optional*):
        an instance of the pipeline to use,
        if provided the model_id won't be used for loading.
+    model_type (ModelType, *optional*):
+        A flag to selected between SD or SDXL if neither sd_pipe_class nor pipe is given
+    lpw (bool, *optional*):
+        A flag to enable of disable long-prompt weighting
    **args:
        additional arguments passed to sd_pipe_class constructor
@@ -756,14 +772,16 @@
clear_hypernets(self)
-
from_pipe(self, pipe, **args)
-
get_config(self)
Return parameters for this model.
 
:return: dict
+
get_prompt_embeds(self, prompt, negative_prompt, clip_skip: Optional[int] = None, lora_scale: Optional[int] = None)
+
load_lora(self, path, multiplier=1.0)
+
prepare_inputs(self, inputs)
+
try_set_scheduler(self, inputs)

diff --git a/docs/generated/multigen.prompting.html b/docs/generated/multigen.prompting.html index 494cf6d..2abde6d 100644 --- a/docs/generated/multigen.prompting.html +++ b/docs/generated/multigen.prompting.html @@ -67,5 +67,12 @@
 
class Prompt2ImPipe(BasePipe)
   Prompt2ImPipe(model_id: str, pipe: Optional[diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline] = None, lpw=False, **args)
+
Prompt2ImPipe(model_id: str, pipe: Optional[diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline] = None, **args)
 
Base class for all pipelines that take a prompt and return an image.
 
 
       
get_prompt(prompt_desc)
-
+

+ + + + + +
 
+Data
       thread_data = <_thread._local object>
\ No newline at end of file diff --git a/docs/generated/multigen.worker.html b/docs/generated/multigen.worker.html index b33deef..96de9c1 100644 --- a/docs/generated/multigen.worker.html +++ b/docs/generated/multigen.worker.html @@ -17,9 +17,11 @@ Modules         -
time
+
concurrent
+
random
+
time
torch
-

+

- @@ -53,18 +55,7 @@
Methods defined here:
-
get_pipeline(self, pipe_name, model_id, cnet=None, xl=False)
- -
run(self)
Method representing the thread's activity.

-You may override this method in a subclass. The standard run() method
-invokes the callable object passed to the object's constructor as the
-target argument, if any, with sequential and keyword arguments taken
-from the args and kwargs arguments, respectively.
- -
-Methods inherited from multigen.worker_base.ServiceThreadBase:
-
__init__(self, cfg_file)
This constructor should always be called with keyword arguments. Arguments are:
+
__init__(self, *args, **kwargs)
This constructor should always be called with keyword arguments. Arguments are:
 
*group* should be None; reserved for future extension when a ThreadGroup
class is implemented.
@@ -84,6 +75,19 @@ the base class constructor (Thread.__init__()) before doing anything
else to the thread.
+
get_pipeline(self, pipe_name, model_id, cnet=None, xl=False)
+ +
run(self)
Method representing the thread's activity.

+You may override this method in a subclass. The standard run() method
+invokes the callable object passed to the object's constructor as the
+target argument, if any, with sequential and keyword arguments taken
+from the args and kwargs arguments, respectively.
+ +
worker(self, data)
+ +
+Methods inherited from multigen.worker_base.ServiceThreadBase:
close_session(self, session_id)
get_image_count(self, session_id)
diff --git a/multigen/pipes.py b/multigen/pipes.py index e2f47c0..2bec70b 100755 --- a/multigen/pipes.py +++ b/multigen/pipes.py @@ -493,12 +493,12 @@ def setup(self, original_image=None, image_painted=None, mask=None, blur=4, original_image (str or Image.Image, *optional*): The original image. Defaults to None. image_painted (str or Image.Image, *optional*): - The painted image. Defaults to None. + modified version of original_image, this parameter should be skipped if mask is passed. Defaults to None. mask (array-like or Image.Image, *optional*): - The mask. Defaults to None. If None tt will be computed from the difference + The mask. Defaults to None. If None it will be computed from the difference between original_image and image_painted blur (int, *optional*): - The blur radius for the mask. Defaults to 4. + The blur radius for the mask to apply for generation process. Defaults to 4. blur_compose (int, *optional*): The blur radius for composing the original and generated images. Defaults to 4. sample_mode (str, *optional*): diff --git a/util/gen.py b/util/gen.py new file mode 100644 index 0000000..a632316 --- /dev/null +++ b/util/gen.py @@ -0,0 +1,23 @@ +import os +import pydoc + +def list_modules(package): + package_dir = package.__path__[0] + modules = [] + for dirpath, _, filenames in os.walk(package_dir): + for filename in filenames: + if filename.endswith('.py') and filename != '__init__.py': + module = os.path.relpath(os.path.join(dirpath, filename), package_dir) + module = module[:-3].replace(os.path.sep, '.') + modules.append(f"{package.__name__}.{module}") + return modules + +if __name__ == "__main__": + import multigen + modules = list_modules(multigen) + modules.insert(0, 'multigen') # Ensure the package itself is documented first + + for module in modules: + print(f"Generating documentation for {module}...") + pydoc.writedoc(module) +
 
@@ -41,7 +43,7 @@ class ServiceThread(multigen.worker_base.ServiceThreadBase)
   ServiceThread(cfg_file)
+
ServiceThread(*args, **kwargs)