diff --git a/config.yaml b/config.yaml index 7fc26f8..9c09b6b 100644 --- a/config.yaml +++ b/config.yaml @@ -8,7 +8,7 @@ deepspeed_config: zero3_save_16bit_model: true zero_stage: 3 distributed_type: DEEPSPEED -downcast_bf16: 'yes' +downcast_bf16: 'no' dynamo_backend: 'NO' fsdp_config: {} machine_rank: 0 diff --git a/train.sh b/train.sh index 2012b5e..8b3fa3c 100755 --- a/train.sh +++ b/train.sh @@ -9,7 +9,6 @@ accelerate launch --config_file config.yaml train.py \ --lora_dropout 0.05 \ --bf16 True \ --bf16_full_eval True \ - --torch_dtype bfloat16 \ --do_train \ --do_eval \ --output_dir $output_dir \