We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ key ┃ value ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ train_path │ data/mixed_train_dataset.jsonl │ │ dev_path │ data/mixed_dev_dataset.jsonl │ │ save_dir │ checkpoints/finetune │ │ max_source_seq_len │ 8 │ │ max_target_seq_len │ 8 │ │ batch_size │ 1 │ │ learning_rate │ 3e-05 │ │ weight_decay │ 0.0 │ │ num_train_epochs │ 2 │ │ warmup_ratio │ 0.0 │ │ save_freq │ 1000 │ │ logging_steps │ 100 │ │ device │ cuda:1 │ │ img_log_dir │ log/fintune_log │ │ img_log_name │ ChatGLM Fine-Tune │ │ use_lora │ True │ │ use_ptuning │ False │ │ lora_rank │ 4 │ │ pre_seq_len │ 128 │ │ prefix_projection │ False │ │ preprocessing_num_workers │ 1 │ │ quantization_bit │ 4 │ └───────────────────────────┴────────────────────────────────┘
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮ │ /ml/szs/Project/LLM_szs/transformers_tasks/LLM/finetune/train.py:352 in │ │ │ │ 349 │ │ 350 │ │ 351 if name == "main": │ │ ❱ 352 │ main() │ │ 353 │ │ │ │ /ml/szs/Project/LLM_szs/transformers_tasks/LLM/finetune/train.py:295 in main │ │ │ │ 292 │ │ for batch in train_dataloader: │ │ 293 │ │ │ if args.use_lora: │ │ 294 │ │ │ │ with autocast(): │ │ ❱ 295 │ │ │ │ │ loss = model( │ │ 296 │ │ │ │ │ │ input_ids=batch['input_ids'].to(dtype=torch.long, device=args.de │ │ 297 │ │ │ │ │ │ labels=batch['labels'].to(dtype=torch.long, device=args.device) │ │ 298 │ │ │ │ │ ).loss │ │ │ │ /ml/temp/envs/llm_env/lib/python3.8/site-packages/torch/nn/modules/module.py:1501 in _call_impl │ │ │ │ 1498 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │ │ 1499 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │ │ 1500 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │ │ ❱ 1501 │ │ │ return forward_call(*args, **kwargs) │ │ 1502 │ │ # Do not call functions when jit is used │ │ 1503 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │ │ 1504 │ │ backward_pre_hooks = [] │ │ │ │ /ml/temp/envs/llm_env/lib/python3.8/site-packages/peft/peft_model.py:678 in forward │ │ │ │ 675 │ ): │ │ 676 │ │ peft_config = self.active_peft_config │ │ 677 │ │ if not isinstance(peft_config, PromptLearningConfig): │ │ ❱ 678 │ │ │ return self.base_model( │ │ 679 │ │ │ │ input_ids=input_ids, │ │ 680 │ │ │ │ attention_mask=attention_mask, │ │ 681 │ │ │ │ inputs_embeds=inputs_embeds, │ │ │ │ /ml/temp/envs/llm_env/lib/python3.8/site-packages/torch/nn/modules/module.py:1501 in _call_impl │ │ │ │ 1498 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │ │ 1499 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │ │ 1500 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │ │ ❱ 1501 │ │ │ return forward_call(*args, **kwargs) │ │ 1502 │ │ # Do not call functions when jit is used │ │ 1503 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │ │ 1504 │ │ backward_pre_hooks = [] │ │ │ │ /home/yrobot/.cache/huggingface/modules/transformers_modules/modeling_chatglm.py:1160 in forward │ │ │ │ 1157 │ │ use_cache = use_cache if use_cache is not None else self.config.use_cache │ │ 1158 │ │ return_dict = return_dict if return_dict is not None else self.config.use_return │ │ 1159 │ │ │ │ ❱ 1160 │ │ transformer_outputs = self.transformer( │ │ 1161 │ │ │ input_ids=input_ids, │ │ 1162 │ │ │ position_ids=position_ids, │ │ 1163 │ │ │ attention_mask=attention_mask, │ │ │ │ /ml/temp/envs/llm_env/lib/python3.8/site-packages/torch/nn/modules/module.py:1501 in _call_impl │ │ │ │ 1498 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │ │ 1499 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │ │ 1500 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │ │ ❱ 1501 │ │ │ return forward_call(*args, **kwargs) │ │ 1502 │ │ # Do not call functions when jit is used │ │ 1503 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │ │ 1504 │ │ backward_pre_hooks = [] │ │ │ │ /home/yrobot/.cache/huggingface/modules/transformers_modules/modeling_chatglm.py:973 in forward │ │ │ │ 970 │ │ │ │ │ output_attentions │ │ 971 │ │ │ │ ) │ │ 972 │ │ │ else: │ │ ❱ 973 │ │ │ │ layer_ret = layer( │ │ 974 │ │ │ │ │ hidden_states, │ │ 975 │ │ │ │ │ position_ids=position_ids, │ │ 976 │ │ │ │ │ attention_mask=attention_mask, │ │ │ │ /ml/temp/envs/llm_env/lib/python3.8/site-packages/torch/nn/modules/module.py:1501 in _call_impl │ │ │ │ 1498 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │ │ 1499 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │ │ 1500 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │ │ ❱ 1501 │ │ │ return forward_call(*args, **kwargs) │ │ 1502 │ │ # Do not call functions when jit is used │ │ 1503 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │ │ 1504 │ │ backward_pre_hooks = [] │ │ │ │ /home/yrobot/.cache/huggingface/modules/transformers_modules/modeling_chatglm.py:614 in forward │ │ │ │ 611 │ │ attention_input = self.input_layernorm(hidden_states) │ │ 612 │ │ │ │ 613 │ │ # Self attention. │ │ ❱ 614 │ │ attention_outputs = self.attention( │ │ 615 │ │ │ attention_input, │ │ 616 │ │ │ position_ids, │ │ 617 │ │ │ attention_mask=attention_mask, │ │ │ │ /ml/temp/envs/llm_env/lib/python3.8/site-packages/torch/nn/modules/module.py:1501 in _call_impl │ │ │ │ 1498 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │ │ 1499 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │ │ 1500 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │ │ ❱ 1501 │ │ │ return forward_call(*args, **kwargs) │ │ 1502 │ │ # Do not call functions when jit is used │ │ 1503 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │ │ 1504 │ │ backward_pre_hooks = [] │ │ │ │ /home/yrobot/.cache/huggingface/modules/transformers_modules/modeling_chatglm.py:439 in forward │ │ │ │ 436 │ │ """ │ │ 437 │ │ │ │ 438 │ │ # [seq_len, batch, 3 * hidden_size] │ │ ❱ 439 │ │ mixed_raw_layer = self.query_key_value(hidden_states) │ │ 440 │ │ │ │ 441 │ │ # [seq_len, batch, 3 * hidden_size] --> [seq_len, batch, num_attention_heads, 3 │ │ 442 │ │ new_tensor_shape = mixed_raw_layer.size()[:-1] + ( │ │ │ │ /ml/temp/envs/llm_env/lib/python3.8/site-packages/torch/nn/modules/module.py:1501 in _call_impl │ │ │ │ 1498 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │ │ 1499 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │ │ 1500 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │ │ ❱ 1501 │ │ │ return forward_call(*args, **kwargs) │ │ 1502 │ │ # Do not call functions when jit is used │ │ 1503 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │ │ 1504 │ │ backward_pre_hooks = [] │ │ │ │ /ml/temp/envs/llm_env/lib/python3.8/site-packages/peft/tuners/lora.py:565 in forward │ │ │ │ 562 │ │ │ │ self.unmerge() │ │ 563 │ │ │ result = F.linear(x, transpose(self.weight, self.fan_in_fan_out), bias=self. │ │ 564 │ │ elif self.r[self.active_adapter] > 0 and not self.merged: │ │ ❱ 565 │ │ │ result = F.linear(x, transpose(self.weight, self.fan_in_fan_out), bias=self. │ │ 566 │ │ │ │ │ 567 │ │ │ x = x.to(self.lora_A[self.active_adapter].weight.dtype) │ │ 568 │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯ RuntimeError: self and mat2 must have the same dtype 量化后的模型,训练的时候会提示类型不匹配,多卡也是同样的问题,训练时带参数--quantization_bit 4 @HarderThenHarder 麻烦问下如何解决
The text was updated successfully, but these errors were encountered:
@HarderThenHarder 求解答
Sorry, something went wrong.
@shangzhensen 请问下解决了么,碰到相同的问题
同求
No branches or pull requests
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ /ml/szs/Project/LLM_szs/transformers_tasks/LLM/finetune/train.py:352 in │
│ │
│ 349 │
│ 350 │
│ 351 if name == "main": │
│ ❱ 352 │ main() │
│ 353 │
│ │
│ /ml/szs/Project/LLM_szs/transformers_tasks/LLM/finetune/train.py:295 in main │
│ │
│ 292 │ │ for batch in train_dataloader: │
│ 293 │ │ │ if args.use_lora: │
│ 294 │ │ │ │ with autocast(): │
│ ❱ 295 │ │ │ │ │ loss = model( │
│ 296 │ │ │ │ │ │ input_ids=batch['input_ids'].to(dtype=torch.long, device=args.de │
│ 297 │ │ │ │ │ │ labels=batch['labels'].to(dtype=torch.long, device=args.device) │
│ 298 │ │ │ │ │ ).loss │
│ │
│ /ml/temp/envs/llm_env/lib/python3.8/site-packages/torch/nn/modules/module.py:1501 in _call_impl │
│ │
│ 1498 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │
│ 1499 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │
│ 1500 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1501 │ │ │ return forward_call(*args, **kwargs) │
│ 1502 │ │ # Do not call functions when jit is used │
│ 1503 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │
│ 1504 │ │ backward_pre_hooks = [] │
│ │
│ /ml/temp/envs/llm_env/lib/python3.8/site-packages/peft/peft_model.py:678 in forward │
│ │
│ 675 │ ): │
│ 676 │ │ peft_config = self.active_peft_config │
│ 677 │ │ if not isinstance(peft_config, PromptLearningConfig): │
│ ❱ 678 │ │ │ return self.base_model( │
│ 679 │ │ │ │ input_ids=input_ids, │
│ 680 │ │ │ │ attention_mask=attention_mask, │
│ 681 │ │ │ │ inputs_embeds=inputs_embeds, │
│ │
│ /ml/temp/envs/llm_env/lib/python3.8/site-packages/torch/nn/modules/module.py:1501 in _call_impl │
│ │
│ 1498 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │
│ 1499 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │
│ 1500 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1501 │ │ │ return forward_call(*args, **kwargs) │
│ 1502 │ │ # Do not call functions when jit is used │
│ 1503 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │
│ 1504 │ │ backward_pre_hooks = [] │
│ │
│ /home/yrobot/.cache/huggingface/modules/transformers_modules/modeling_chatglm.py:1160 in forward │
│ │
│ 1157 │ │ use_cache = use_cache if use_cache is not None else self.config.use_cache │
│ 1158 │ │ return_dict = return_dict if return_dict is not None else self.config.use_return │
│ 1159 │ │ │
│ ❱ 1160 │ │ transformer_outputs = self.transformer( │
│ 1161 │ │ │ input_ids=input_ids, │
│ 1162 │ │ │ position_ids=position_ids, │
│ 1163 │ │ │ attention_mask=attention_mask, │
│ │
│ /ml/temp/envs/llm_env/lib/python3.8/site-packages/torch/nn/modules/module.py:1501 in _call_impl │
│ │
│ 1498 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │
│ 1499 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │
│ 1500 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1501 │ │ │ return forward_call(*args, **kwargs) │
│ 1502 │ │ # Do not call functions when jit is used │
│ 1503 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │
│ 1504 │ │ backward_pre_hooks = [] │
│ │
│ /home/yrobot/.cache/huggingface/modules/transformers_modules/modeling_chatglm.py:973 in forward │
│ │
│ 970 │ │ │ │ │ output_attentions │
│ 971 │ │ │ │ ) │
│ 972 │ │ │ else: │
│ ❱ 973 │ │ │ │ layer_ret = layer( │
│ 974 │ │ │ │ │ hidden_states, │
│ 975 │ │ │ │ │ position_ids=position_ids, │
│ 976 │ │ │ │ │ attention_mask=attention_mask, │
│ │
│ /ml/temp/envs/llm_env/lib/python3.8/site-packages/torch/nn/modules/module.py:1501 in _call_impl │
│ │
│ 1498 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │
│ 1499 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │
│ 1500 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1501 │ │ │ return forward_call(*args, **kwargs) │
│ 1502 │ │ # Do not call functions when jit is used │
│ 1503 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │
│ 1504 │ │ backward_pre_hooks = [] │
│ │
│ /home/yrobot/.cache/huggingface/modules/transformers_modules/modeling_chatglm.py:614 in forward │
│ │
│ 611 │ │ attention_input = self.input_layernorm(hidden_states) │
│ 612 │ │ │
│ 613 │ │ # Self attention. │
│ ❱ 614 │ │ attention_outputs = self.attention( │
│ 615 │ │ │ attention_input, │
│ 616 │ │ │ position_ids, │
│ 617 │ │ │ attention_mask=attention_mask, │
│ │
│ /ml/temp/envs/llm_env/lib/python3.8/site-packages/torch/nn/modules/module.py:1501 in _call_impl │
│ │
│ 1498 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │
│ 1499 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │
│ 1500 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1501 │ │ │ return forward_call(*args, **kwargs) │
│ 1502 │ │ # Do not call functions when jit is used │
│ 1503 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │
│ 1504 │ │ backward_pre_hooks = [] │
│ │
│ /home/yrobot/.cache/huggingface/modules/transformers_modules/modeling_chatglm.py:439 in forward │
│ │
│ 436 │ │ """ │
│ 437 │ │ │
│ 438 │ │ # [seq_len, batch, 3 * hidden_size] │
│ ❱ 439 │ │ mixed_raw_layer = self.query_key_value(hidden_states) │
│ 440 │ │ │
│ 441 │ │ # [seq_len, batch, 3 * hidden_size] --> [seq_len, batch, num_attention_heads, 3 │
│ 442 │ │ new_tensor_shape = mixed_raw_layer.size()[:-1] + ( │
│ │
│ /ml/temp/envs/llm_env/lib/python3.8/site-packages/torch/nn/modules/module.py:1501 in _call_impl │
│ │
│ 1498 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │
│ 1499 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │
│ 1500 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1501 │ │ │ return forward_call(*args, **kwargs) │
│ 1502 │ │ # Do not call functions when jit is used │
│ 1503 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │
│ 1504 │ │ backward_pre_hooks = [] │
│ │
│ /ml/temp/envs/llm_env/lib/python3.8/site-packages/peft/tuners/lora.py:565 in forward │
│ │
│ 562 │ │ │ │ self.unmerge() │
│ 563 │ │ │ result = F.linear(x, transpose(self.weight, self.fan_in_fan_out), bias=self. │
│ 564 │ │ elif self.r[self.active_adapter] > 0 and not self.merged: │
│ ❱ 565 │ │ │ result = F.linear(x, transpose(self.weight, self.fan_in_fan_out), bias=self. │
│ 566 │ │ │ │
│ 567 │ │ │ x = x.to(self.lora_A[self.active_adapter].weight.dtype) │
│ 568 │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
RuntimeError: self and mat2 must have the same dtype
量化后的模型,训练的时候会提示类型不匹配,多卡也是同样的问题,训练时带参数--quantization_bit 4
@HarderThenHarder 麻烦问下如何解决
The text was updated successfully, but these errors were encountered: