diff --git a/src/diffusers/loaders/single_file_utils.py b/src/diffusers/loaders/single_file_utils.py index 21ff2841700d..2150b70192a3 100644 --- a/src/diffusers/loaders/single_file_utils.py +++ b/src/diffusers/loaders/single_file_utils.py @@ -100,6 +100,7 @@ ], "autoencoder-dc": "decoder.stages.1.op_list.0.main.conv.conv.bias", "autoencoder-dc-sana": "encoder.project_in.conv.bias", + "kolors": "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k.weight", } DIFFUSERS_DEFAULT_PIPELINE_PATHS = { @@ -151,6 +152,7 @@ "autoencoder-dc-f64c128": {"pretrained_model_name_or_path": "mit-han-lab/dc-ae-f64c128-mix-1.0-diffusers"}, "autoencoder-dc-f32c32": {"pretrained_model_name_or_path": "mit-han-lab/dc-ae-f32c32-mix-1.0-diffusers"}, "autoencoder-dc-f32c32-sana": {"pretrained_model_name_or_path": "mit-han-lab/dc-ae-f32c32-sana-1.0-diffusers"}, + "kolors": {"pretrained_model_name_or_path": "Kwai-Kolors/Kolors-diffusers"}, } # Use to configure model sample size when original config is provided @@ -597,6 +599,9 @@ def infer_diffusers_model_type(checkpoint): else: model_type = "autoencoder-dc-f128c512" + elif CHECKPOINT_KEY_NAMES["kolors"] in checkpoint and checkpoint[CHECKPOINT_KEY_NAMES["kolors"]].shape[-1] == 2048: + model_type = "kolors" + else: model_type = "v1" @@ -961,6 +966,9 @@ def convert_ldm_unet_checkpoint(checkpoint, config, extract_ema=False, **kwargs) """ Takes a state dict and a config, and returns a converted checkpoint. """ + is_diffusers = "time_embedding.linear_1.weight" in checkpoint + if is_diffusers: + return checkpoint # extract state_dict for UNet unet_state_dict = {} keys = list(checkpoint.keys())