Skip to content

Commit af7d305

Browse files
authored
Wan video (#338)
1 parent 427232c commit af7d305

18 files changed

+3892
-5
lines changed

README.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,9 @@ Until now, DiffSynth Studio has supported the following models:
3636

3737
## News
3838

39-
- **February 17, 2024** We support [StepVideo](https://modelscope.cn/models/stepfun-ai/stepvideo-t2v/summary)! State-of-the-art video synthesis model! See [./examples/stepvideo](./examples/stepvideo/).
39+
- **February 25, 2025** We support Wan-Video, a collection of video synthesis models open-sourced by Alibaba. See [./examples/wanvideo/](./examples/wanvideo/).
40+
41+
- **February 17, 2025** We support [StepVideo](https://modelscope.cn/models/stepfun-ai/stepvideo-t2v/summary)! State-of-the-art video synthesis model! See [./examples/stepvideo](./examples/stepvideo/).
4042

4143
- **December 31, 2024** We propose EliGen, a novel framework for precise entity-level controlled text-to-image generation, complemented by an inpainting fusion pipeline to extend its capabilities to image inpainting tasks. EliGen seamlessly integrates with existing community models, such as IP-Adapter and In-Context LoRA, enhancing its versatility. For more details, see [./examples/EntityControl](./examples/EntityControl/).
4244
- Paper: [EliGen: Entity-Level Controlled Image Generation with Regional Attention](https://arxiv.org/abs/2501.01097)
@@ -118,7 +120,7 @@ cd DiffSynth-Studio
118120
pip install -e .
119121
```
120122

121-
Or install from pypi:
123+
Or install from pypi (There is a delay in the update. If you want to experience the latest features, please do not use this installation method.):
122124

123125
```
124126
pip install diffsynth

diffsynth/configs/model_config.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,11 @@
5454
from ..models.stepvideo_vae import StepVideoVAE
5555
from ..models.stepvideo_dit import StepVideoModel
5656

57+
from ..models.wan_video_dit import WanModel
58+
from ..models.wan_video_text_encoder import WanTextEncoder
59+
from ..models.wan_video_image_encoder import WanImageEncoder
60+
from ..models.wan_video_vae import WanVideoVAE
61+
5762

5863
model_loader_configs = [
5964
# These configs are provided for detecting model type automatically.
@@ -108,6 +113,13 @@
108113
(None, "84ef4bd4757f60e906b54aa6a7815dc6", ["hunyuan_video_dit"], [HunyuanVideoDiT], "civitai"),
109114
(None, "68beaf8429b7c11aa8ca05b1bd0058bd", ["stepvideo_vae"], [StepVideoVAE], "civitai"),
110115
(None, "5c0216a2132b082c10cb7a0e0377e681", ["stepvideo_dit"], [StepVideoModel], "civitai"),
116+
(None, "9269f8db9040a9d860eaca435be61814", ["wan_video_dit"], [WanModel], "civitai"),
117+
(None, "aafcfd9672c3a2456dc46e1cb6e52c70", ["wan_video_dit"], [WanModel], "civitai"),
118+
(None, "6bfcfb3b342cb286ce886889d519a77e", ["wan_video_dit"], [WanModel], "civitai"),
119+
(None, "9c8818c2cbea55eca56c7b447df170da", ["wan_video_text_encoder"], [WanTextEncoder], "civitai"),
120+
(None, "5941c53e207d62f20f9025686193c40b", ["wan_video_image_encoder"], [WanImageEncoder], "civitai"),
121+
(None, "1378ea763357eea97acdef78e65d6d96", ["wan_video_vae"], [WanVideoVAE], "civitai"),
122+
(None, "ccc42284ea13e1ad04693284c7a09be6", ["wan_video_vae"], [WanVideoVAE], "civitai"),
111123
]
112124
huggingface_model_loader_configs = [
113125
# These configs are provided for detecting model type automatically.

diffsynth/models/kolors_text_encoder.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,6 @@ def __init__(self, code: bytes, function_names: List[str]):
7373
)
7474
except Exception as exception:
7575
kernels = None
76-
logger.warning("Failed to load cpm_kernels:" + str(exception))
7776

7877

7978
class W8A16Linear(torch.autograd.Function):

diffsynth/models/lora.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from .hunyuan_dit import HunyuanDiT
99
from .cog_dit import CogDiT
1010
from .hunyuan_video_dit import HunyuanVideoDiT
11+
from .wan_video_dit import WanModel
1112

1213

1314

@@ -197,7 +198,7 @@ def __init__(self):
197198

198199
class GeneralLoRAFromPeft:
199200
def __init__(self):
200-
self.supported_model_classes = [SDUNet, SDXLUNet, SD3DiT, HunyuanDiT, FluxDiT, CogDiT]
201+
self.supported_model_classes = [SDUNet, SDXLUNet, SD3DiT, HunyuanDiT, FluxDiT, CogDiT, WanModel]
201202

202203

203204
def fetch_device_dtype_from_state_dict(self, state_dict):

diffsynth/models/model_manager.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,9 @@ def load_model_from_single_file(state_dict, model_names, model_classes, model_re
6969
model_state_dict, extra_kwargs = state_dict_results, {}
7070
torch_dtype = torch.float32 if extra_kwargs.get("upcast_to_float32", False) else torch_dtype
7171
with init_weights_on_device():
72-
model= model_class(**extra_kwargs)
72+
model = model_class(**extra_kwargs)
73+
if hasattr(model, "eval"):
74+
model = model.eval()
7375
model.load_state_dict(model_state_dict, assign=True)
7476
model = model.to(dtype=torch_dtype, device=device)
7577
loaded_model_names.append(model_name)

0 commit comments

Comments
 (0)