TypeError: memory_efficient_attention() got an unexpected keyword argument 'scale' #638
scotchbirdy
started this conversation in
General
Replies: 0 comments
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
-
Getting stuck here when training a lora. Is the scale variable something i should just take out of that function? Heres the whole error
import network module: networks.lora
[Dataset 0]
caching latents.
0%| | 0/53 [00:01<?, ?it/s]
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ C:\Users\meand\Desktop_AI\LoraTraining\kohya_ss\train_network.py:974 in │
│ │
│ 971 │ args = train_util.read_config_from_file(args, parser) │
│ 972 │ │
│ 973 │ trainer = NetworkTrainer() │
│ ❱ 974 │ trainer.train(args) │
│ 975 │
│ │
│ C:\Users\meand\Desktop_AI\LoraTraining\kohya_ss\train_network.py:250 in train │
│ │
│ 247 │ │ │ vae.requires_grad_(False) │
│ 248 │ │ │ vae.eval() │
│ 249 │ │ │ with torch.no_grad(): │
│ ❱ 250 │ │ │ │ train_dataset_group.cache_latents(vae, args.vae_batch_size, args.cache_l │
│ 251 │ │ │ vae.to("cpu") │
│ 252 │ │ │ if torch.cuda.is_available(): │
│ 253 │ │ │ │ torch.cuda.empty_cache() │
│ │
│ C:\Users\meand\Desktop_AI\LoraTraining\kohya_ss\library\train_util.py:1728 in cache_latents │
│ │
│ 1725 │ def cache_latents(self, vae, vae_batch_size=1, cache_to_disk=False, is_main_process= │
│ 1726 │ │ for i, dataset in enumerate(self.datasets): │
│ 1727 │ │ │ print(f"[Dataset {i}]") │
│ ❱ 1728 │ │ │ dataset.cache_latents(vae, vae_batch_size, cache_to_disk, is_main_process) │
│ 1729 │ │
│ 1730 │ def is_latent_cacheable(self) -> bool: │
│ 1731 │ │ return all([dataset.is_latent_cacheable() for dataset in self.datasets]) │
│ │
│ C:\Users\meand\Desktop_AI\LoraTraining\kohya_ss\library\train_util.py:911 in cache_latents │
│ │
│ 908 │ │ │ img_tensors = torch.stack(images, dim=0) │
│ 909 │ │ │ img_tensors = img_tensors.to(device=vae.device, dtype=vae.dtype) │
│ 910 │ │ │ │
│ ❱ 911 │ │ │ latents = vae.encode(img_tensors).latent_dist.sample().to("cpu") │
│ 912 │ │ │ │
│ 913 │ │ │ for info, latent in zip(batch, latents): │
│ 914 │ │ │ │ # check NaN │
│ │
│ C:\Users\meand\Desktop_AI\LoraTraining\kohya_ss\venv\lib\site-packages\diffusers\utils\accelera │
│ te_utils.py:46 in wrapper │
│ │
│ 43 │ def wrapper(self, *args, **kwargs): │
│ 44 │ │ if hasattr(self, "_hf_hook") and hasattr(self._hf_hook, "pre_forward"): │
│ 45 │ │ │ self._hf_hook.pre_forward(self) │
│ ❱ 46 │ │ return method(self, *args, **kwargs) │
│ 47 │ │
│ 48 │ return wrapper │
│ 49 │
│ │
│ C:\Users\meand\Desktop_AI\LoraTraining\kohya_ss\venv\lib\site-packages\diffusers\models\autoenc │
│ oder_kl.py:164 in encode │
│ │
│ 161 │ │ if self.use_tiling and (x.shape[-1] > self.tile_sample_min_size or x.shape[-2] > │
│ 162 │ │ │ return self.tiled_encode(x, return_dict=return_dict) │
│ 163 │ │ │
│ ❱ 164 │ │ h = self.encoder(x) │
│ 165 │ │ moments = self.quant_conv(h) │
│ 166 │ │ posterior = DiagonalGaussianDistribution(moments) │
│ 167 │
│ │
│ C:\Users\meand\Desktop_AI\LoraTraining\kohya_ss\venv\lib\site-packages\torch\nn\modules\module. │
│ py:1130 in _call_impl │
│ │
│ 1127 │ │ # this function, and just call forward. │
│ 1128 │ │ if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o │
│ 1129 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1130 │ │ │ return forward_call(*input, **kwargs) │
│ 1131 │ │ # Do not call functions when jit is used │
│ 1132 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │
│ 1133 │ │ if self._backward_hooks or _global_backward_hooks: │
│ │
│ C:\Users\meand\Desktop_AI\LoraTraining\kohya_ss\venv\lib\site-packages\diffusers\models\vae.py: │
│ 142 in forward │
│ │
│ 139 │ │ │ │ sample = down_block(sample) │
│ 140 │ │ │ │
│ 141 │ │ │ # middle │
│ ❱ 142 │ │ │ sample = self.mid_block(sample) │
│ 143 │ │ │
│ 144 │ │ # post-process │
│ 145 │ │ sample = self.conv_norm_out(sample) │
│ │
│ C:\Users\meand\Desktop_AI\LoraTraining\kohya_ss\venv\lib\site-packages\torch\nn\modules\module. │
│ py:1130 in _call_impl │
│ │
│ 1127 │ │ # this function, and just call forward. │
│ 1128 │ │ if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o │
│ 1129 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1130 │ │ │ return forward_call(*input, **kwargs) │
│ 1131 │ │ # Do not call functions when jit is used │
│ 1132 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │
│ 1133 │ │ if self._backward_hooks or _global_backward_hooks: │
│ │
│ C:\Users\meand\Desktop_AI\LoraTraining\kohya_ss\venv\lib\site-packages\diffusers\models\unet_2d │
│ _blocks.py:472 in forward │
│ │
│ 469 │ │ hidden_states = self.resnets[0](hidden_states, temb) │
│ 470 │ │ for attn, resnet in zip(self.attentions, self.resnets[1:]): │
│ 471 │ │ │ if attn is not None: │
│ ❱ 472 │ │ │ │ hidden_states = attn(hidden_states, temb=temb) │
│ 473 │ │ │ hidden_states = resnet(hidden_states, temb) │
│ 474 │ │ │
│ 475 │ │ return hidden_states │
│ │
│ C:\Users\meand\Desktop_AI\LoraTraining\kohya_ss\venv\lib\site-packages\torch\nn\modules\module. │
│ py:1130 in _call_impl │
│ │
│ 1127 │ │ # this function, and just call forward. │
│ 1128 │ │ if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o │
│ 1129 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1130 │ │ │ return forward_call(*input, **kwargs) │
│ 1131 │ │ # Do not call functions when jit is used │
│ 1132 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │
│ 1133 │ │ if self._backward_hooks or _global_backward_hooks: │
│ │
│ C:\Users\meand\Desktop_AI\LoraTraining\kohya_ss\venv\lib\site-packages\diffusers\models\attenti │
│ on_processor.py:320 in forward │
│ │
│ 317 │ │ # The
Attention
class can call different attention processors / attention func ││ 318 │ │ # here we simply pass along all tensors to the selected processor class │
│ 319 │ │ # For standard processors that are defined here,
**cross_attention_kwargs
is e ││ ❱ 320 │ │ return self.processor( │
│ 321 │ │ │ self, │
│ 322 │ │ │ hidden_states, │
│ 323 │ │ │ encoder_hidden_states=encoder_hidden_states, │
│ │
│ C:\Users\meand\Desktop_AI\LoraTraining\kohya_ss\venv\lib\site-packages\diffusers\models\attenti │
│ on_processor.py:1045 in call │
│ │
│ 1042 │ │ key = attn.head_to_batch_dim(key).contiguous() │
│ 1043 │ │ value = attn.head_to_batch_dim(value).contiguous() │
│ 1044 │ │ │
│ ❱ 1045 │ │ hidden_states = xformers.ops.memory_efficient_attention( │
│ 1046 │ │ │ query, key, value, attn_bias=attention_mask, op=self.attention_op, scale=att │
│ 1047 │ │ ) │
│ 1048 │ │ hidden_states = hidden_states.to(query.dtype) │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
TypeError: memory_efficient_attention() got an unexpected keyword argument 'scale'
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ C:\Users\meand\AppData\Local\Programs\Python\Python310\lib\runpy.py:196 in _run_module_as_main │
│ │
│ 193 │ main_globals = sys.modules["main"].dict │
│ 194 │ if alter_argv: │
│ 195 │ │ sys.argv[0] = mod_spec.origin │
│ ❱ 196 │ return _run_code(code, main_globals, None, │
│ 197 │ │ │ │ │ "main", mod_spec) │
│ 198 │
│ 199 def run_module(mod_name, init_globals=None, │
│ │
│ C:\Users\meand\AppData\Local\Programs\Python\Python310\lib\runpy.py:86 in _run_code │
│ │
│ 83 │ │ │ │ │ loader = loader, │
│ 84 │ │ │ │ │ package = pkg_name, │
│ 85 │ │ │ │ │ spec = mod_spec) │
│ ❱ 86 │ exec(code, run_globals) │
│ 87 │ return run_globals │
│ 88 │
│ 89 def _run_module_code(code, init_globals=None, │
│ │
│ in :7 │
│ │
│ 4 from accelerate.commands.accelerate_cli import main │
│ 5 if name == 'main': │
│ 6 │ sys.argv[0] = re.sub(r'(-script.pyw|.exe)?$', '', sys.argv[0]) │
│ ❱ 7 │ sys.exit(main()) │
│ 8 │
│ │
│ C:\Users\meand\Desktop_AI\LoraTraining\kohya_ss\venv\lib\site-packages\accelerate\commands\acce │
│ lerate_cli.py:45 in main │
│ │
│ 42 │ │ exit(1) │
│ 43 │ │
│ 44 │ # Run │
│ ❱ 45 │ args.func(args) │
│ 46 │
│ 47 │
│ 48 if name == "main": │
│ │
│ C:\Users\meand\Desktop_AI\LoraTraining\kohya_ss\venv\lib\site-packages\accelerate\commands\laun │
│ ch.py:918 in launch_command │
│ │
│ 915 │ elif defaults is not None and defaults.compute_environment == ComputeEnvironment.AMA │
│ 916 │ │ sagemaker_launcher(defaults, args) │
│ 917 │ else: │
│ ❱ 918 │ │ simple_launcher(args) │
│ 919 │
│ 920 │
│ 921 def main(): │
│ │
│ C:\Users\meand\Desktop_AI\LoraTraining\kohya_ss\venv\lib\site-packages\accelerate\commands\laun │
│ ch.py:580 in simple_launcher │
│ │
│ 577 │ process.wait() │
│ 578 │ if process.returncode != 0: │
│ 579 │ │ if not args.quiet: │
│ ❱ 580 │ │ │ raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd) │
│ 581 │ │ else: │
│ 582 │ │ │ sys.exit(1) │
│ 583 │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
CalledProcessError: Command '['C:\Users\meand\Desktop\_AI\LoraTraining\kohya_ss\venv\Scripts\python.exe', './train_network.py', '--enable_bucket',
'--pretrained_model_name_or_path=runwayml/stable-diffusion-v1-5', '--train_data_dir=C:/Users/meand/Desktop/deepblack/images/img', '--resolution=512,512',
'--output_dir=C:/Users/meand/Desktop/deepblack/model', '--logging_dir=C:/Users/meand/Desktop/deepblack/log', '--network_alpha=128', '--save_model_as=safetensors',
'--network_module=networks.lora', '--text_encoder_lr=5e-05', '--unet_lr=0.0001', '--network_dim=128', '--output_name=My_LoRA_Model', '--lr_scheduler_num_cycles=1', '--learning_rate=0.0001',
'--lr_scheduler=constant', '--train_batch_size=2', '--max_train_steps=530', '--save_every_n_epochs=1', '--mixed_precision=fp16', '--save_precision=fp16', '--seed=1234',
'--caption_extension=.txt', '--cache_latents', '--optimizer_type=AdamW8bit', '--max_data_loader_n_workers=1', '--clip_skip=2', '--bucket_reso_steps=64', '--xformers', '--bucket_no_upscale']'
returned non-zero exit status 1.
Beta Was this translation helpful? Give feedback.
All reactions