-
Notifications
You must be signed in to change notification settings - Fork 2.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
FLUX XLabs IP-Adapter Support (#7157)
## Summary This PR adds support for the XLabs IP-Adapter (https://huggingface.co/XLabs-AI/flux-ip-adapter) in workflows. Linear UI integration is coming in a follow-up PR. The XLabs IP-Adapter can be installed in the Starter Models tab. Usage tips: - Use a `cfg_scale` value of 2.0 to 4.0 - Start with an IP-Adatper weight of ~0.6 and adjust from there. - Set `cfg_scale_start_step = 1` - Set `cfg_scale_end_step` to roughly the halfway point (it's unnecessary to apply CFG to all steps, and this will improve processing time). Sample workflow: <img width="976" alt="image" src="https://github.com/user-attachments/assets/4627b459-7e5a-4703-80e7-f7575c5fce19"> Result: ![image](https://github.com/user-attachments/assets/220b6a4c-69c6-447f-8df6-8aa6a56f3b3f) ## Related Issues / Discussions Prerequisite: #7152 ## Remaining TODO: - [ ] Update default workflows. ## QA Instructions - [x] Test basic happy path - [x] Test with multiple IP-Adapters (it runs, but results aren't great) - [ ] ~Test with multiple images to a single IP-Adapter~ (this is not supported for now) - [ ] Test automatic runtime installation of CLIP-L, CLIP-H, and CLIP-G image encoder models if they are not already installed. - [ ] Test starter model installation of the XLabs FLUX IP-Adapter - [ ] Test SD and SDXL IP-Adapters for regression. - [ ] Check peak memory utilization. ## Merge Plan - [ ] Merge #7152 - [ ] Change target branch to main ## Checklist - [x] _The PR has a short but descriptive title, suitable for a changelog_ - [x] _Tests added / updated (if applicable)_ - [ ] _Documentation added / updated (if applicable)_
- Loading branch information
Showing
30 changed files
with
1,136 additions
and
55 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
from builtins import float | ||
from typing import List, Literal, Union | ||
|
||
from pydantic import field_validator, model_validator | ||
from typing_extensions import Self | ||
|
||
from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, invocation | ||
from invokeai.app.invocations.fields import InputField, UIType | ||
from invokeai.app.invocations.ip_adapter import ( | ||
CLIP_VISION_MODEL_MAP, | ||
IPAdapterField, | ||
IPAdapterInvocation, | ||
IPAdapterOutput, | ||
) | ||
from invokeai.app.invocations.model import ModelIdentifierField | ||
from invokeai.app.invocations.primitives import ImageField | ||
from invokeai.app.invocations.util import validate_begin_end_step, validate_weights | ||
from invokeai.app.services.shared.invocation_context import InvocationContext | ||
from invokeai.backend.model_manager.config import ( | ||
IPAdapterCheckpointConfig, | ||
IPAdapterInvokeAIConfig, | ||
) | ||
|
||
|
||
@invocation( | ||
"flux_ip_adapter", | ||
title="FLUX IP-Adapter", | ||
tags=["ip_adapter", "control"], | ||
category="ip_adapter", | ||
version="1.0.0", | ||
classification=Classification.Prototype, | ||
) | ||
class FluxIPAdapterInvocation(BaseInvocation): | ||
"""Collects FLUX IP-Adapter info to pass to other nodes.""" | ||
|
||
# FLUXIPAdapterInvocation is based closely on IPAdapterInvocation, but with some unsupported features removed. | ||
|
||
image: ImageField = InputField(description="The IP-Adapter image prompt(s).") | ||
ip_adapter_model: ModelIdentifierField = InputField( | ||
description="The IP-Adapter model.", title="IP-Adapter Model", ui_type=UIType.IPAdapterModel | ||
) | ||
# Currently, the only known ViT model used by FLUX IP-Adapters is ViT-L. | ||
clip_vision_model: Literal["ViT-L"] = InputField(description="CLIP Vision model to use.", default="ViT-L") | ||
weight: Union[float, List[float]] = InputField( | ||
default=1, description="The weight given to the IP-Adapter", title="Weight" | ||
) | ||
begin_step_percent: float = InputField( | ||
default=0, ge=0, le=1, description="When the IP-Adapter is first applied (% of total steps)" | ||
) | ||
end_step_percent: float = InputField( | ||
default=1, ge=0, le=1, description="When the IP-Adapter is last applied (% of total steps)" | ||
) | ||
|
||
@field_validator("weight") | ||
@classmethod | ||
def validate_ip_adapter_weight(cls, v: float) -> float: | ||
validate_weights(v) | ||
return v | ||
|
||
@model_validator(mode="after") | ||
def validate_begin_end_step_percent(self) -> Self: | ||
validate_begin_end_step(self.begin_step_percent, self.end_step_percent) | ||
return self | ||
|
||
def invoke(self, context: InvocationContext) -> IPAdapterOutput: | ||
# Lookup the CLIP Vision encoder that is intended to be used with the IP-Adapter model. | ||
ip_adapter_info = context.models.get_config(self.ip_adapter_model.key) | ||
assert isinstance(ip_adapter_info, (IPAdapterInvokeAIConfig, IPAdapterCheckpointConfig)) | ||
|
||
# Note: There is a IPAdapterInvokeAIConfig.image_encoder_model_id field, but it isn't trustworthy. | ||
image_encoder_starter_model = CLIP_VISION_MODEL_MAP[self.clip_vision_model] | ||
image_encoder_model_id = image_encoder_starter_model.source | ||
image_encoder_model_name = image_encoder_starter_model.name | ||
image_encoder_model = IPAdapterInvocation.get_clip_image_encoder( | ||
context, image_encoder_model_id, image_encoder_model_name | ||
) | ||
|
||
return IPAdapterOutput( | ||
ip_adapter=IPAdapterField( | ||
image=self.image, | ||
ip_adapter_model=self.ip_adapter_model, | ||
image_encoder_model=ModelIdentifierField.from_config(image_encoder_model), | ||
weight=self.weight, | ||
target_blocks=[], # target_blocks is currently unused for FLUX IP-Adapters. | ||
begin_step_percent=self.begin_step_percent, | ||
end_step_percent=self.end_step_percent, | ||
mask=None, # mask is currently unused for FLUX IP-Adapters. | ||
), | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.