From 9099020e8e89a1d470399b4bc28939a1badc2a44 Mon Sep 17 00:00:00 2001 From: qusaiw Date: Tue, 28 Oct 2025 08:58:14 +0300 Subject: [PATCH] Fix argument max_seq_length (not supported) --- recipes/constitutional-ai/sft/config_anthropic.yaml | 2 +- recipes/constitutional-ai/sft/config_grok.yaml | 2 +- recipes/smollm/sft/config.yaml | 2 +- recipes/smollm2/sft/config.yaml | 2 +- recipes/smollm2/sft/config_smol.yaml | 2 +- recipes/starchat2-15b/sft/config_v0.1.yaml | 2 +- recipes/zephyr-7b-beta/sft/config_full.yaml | 2 +- recipes/zephyr-7b-beta/sft/config_qlora.yaml | 2 +- recipes/zephyr-7b-gemma/sft/config_full.yaml | 2 +- scripts/sft.py | 2 +- tests/fixtures/config_sft_full.yaml | 2 +- 11 files changed, 11 insertions(+), 11 deletions(-) diff --git a/recipes/constitutional-ai/sft/config_anthropic.yaml b/recipes/constitutional-ai/sft/config_anthropic.yaml index cfaba96e..a57e0034 100644 --- a/recipes/constitutional-ai/sft/config_anthropic.yaml +++ b/recipes/constitutional-ai/sft/config_anthropic.yaml @@ -40,7 +40,7 @@ log_level: info logging_steps: 5 logging_strategy: steps lr_scheduler_type: cosine -max_seq_length: 2048 +max_length: 2048 max_steps: -1 num_train_epochs: 1 output_dir: data/mistral-7b-sft-constitutional-ai diff --git a/recipes/constitutional-ai/sft/config_grok.yaml b/recipes/constitutional-ai/sft/config_grok.yaml index 681fd36f..12b7cb66 100644 --- a/recipes/constitutional-ai/sft/config_grok.yaml +++ b/recipes/constitutional-ai/sft/config_grok.yaml @@ -40,7 +40,7 @@ log_level: info logging_steps: 5 logging_strategy: steps lr_scheduler_type: cosine -max_seq_length: 2048 +max_length: 2048 max_steps: -1 num_train_epochs: 1 output_dir: data/mistral-7b-sft-constitutional-ai diff --git a/recipes/smollm/sft/config.yaml b/recipes/smollm/sft/config.yaml index b816e688..0cbab585 100644 --- a/recipes/smollm/sft/config.yaml +++ b/recipes/smollm/sft/config.yaml @@ -90,7 +90,7 @@ log_level: info logging_steps: 5 logging_strategy: steps lr_scheduler_type: cosine -max_seq_length: 2048 +max_length: 2048 max_steps: -1 num_train_epochs: 1 output_dir: data/smollm-360M-instruct-new diff --git a/recipes/smollm2/sft/config.yaml b/recipes/smollm2/sft/config.yaml index 78c9b77f..2ceda850 100644 --- a/recipes/smollm2/sft/config.yaml +++ b/recipes/smollm2/sft/config.yaml @@ -27,7 +27,7 @@ log_level: info logging_steps: 5 logging_strategy: steps lr_scheduler_type: cosine -max_seq_length: 8192 +max_length: 8192 max_steps: -1 num_train_epochs: 2 output_dir: data/smollm2-1.7B-sft diff --git a/recipes/smollm2/sft/config_smol.yaml b/recipes/smollm2/sft/config_smol.yaml index b8285f99..05fa2527 100644 --- a/recipes/smollm2/sft/config_smol.yaml +++ b/recipes/smollm2/sft/config_smol.yaml @@ -26,7 +26,7 @@ log_level: info logging_steps: 5 logging_strategy: steps lr_scheduler_type: cosine -max_seq_length: 8192 +max_length: 8192 max_steps: -1 num_train_epochs: 2 output_dir: data/smollm2-360M-sft diff --git a/recipes/starchat2-15b/sft/config_v0.1.yaml b/recipes/starchat2-15b/sft/config_v0.1.yaml index 9faecabf..54402cfe 100644 --- a/recipes/starchat2-15b/sft/config_v0.1.yaml +++ b/recipes/starchat2-15b/sft/config_v0.1.yaml @@ -87,7 +87,7 @@ log_level: info logging_steps: 5 logging_strategy: steps lr_scheduler_type: cosine -max_seq_length: 2048 +max_length: 2048 max_steps: -1 num_train_epochs: 3 output_dir: data/starchat2-15b-v0.1 diff --git a/recipes/zephyr-7b-beta/sft/config_full.yaml b/recipes/zephyr-7b-beta/sft/config_full.yaml index 8c6c7e97..e5b14d21 100644 --- a/recipes/zephyr-7b-beta/sft/config_full.yaml +++ b/recipes/zephyr-7b-beta/sft/config_full.yaml @@ -39,7 +39,7 @@ log_level: info logging_steps: 5 logging_strategy: steps lr_scheduler_type: cosine -max_seq_length: 2048 +max_length: 2048 max_steps: -1 num_train_epochs: 1 output_dir: data/zephyr-7b-sft-full diff --git a/recipes/zephyr-7b-beta/sft/config_qlora.yaml b/recipes/zephyr-7b-beta/sft/config_qlora.yaml index 4809f899..a436bba8 100644 --- a/recipes/zephyr-7b-beta/sft/config_qlora.yaml +++ b/recipes/zephyr-7b-beta/sft/config_qlora.yaml @@ -54,7 +54,7 @@ log_level: info logging_steps: 5 logging_strategy: steps lr_scheduler_type: cosine -max_seq_length: 2048 +max_length: 2048 max_steps: -1 num_train_epochs: 1 output_dir: data/zephyr-7b-sft-qlora diff --git a/recipes/zephyr-7b-gemma/sft/config_full.yaml b/recipes/zephyr-7b-gemma/sft/config_full.yaml index 7f97256f..5c2fc658 100644 --- a/recipes/zephyr-7b-gemma/sft/config_full.yaml +++ b/recipes/zephyr-7b-gemma/sft/config_full.yaml @@ -42,7 +42,7 @@ log_level: info logging_steps: 5 logging_strategy: steps lr_scheduler_type: cosine -max_seq_length: 2048 +max_length: 2048 max_steps: -1 num_train_epochs: 3 output_dir: data/zephyr-7b-gemma-sft diff --git a/scripts/sft.py b/scripts/sft.py index 80dbc641..591f6118 100644 --- a/scripts/sft.py +++ b/scripts/sft.py @@ -24,7 +24,7 @@ --learning_rate 2.0e-5 \ --num_train_epochs 1 \ --packing \ - --max_seq_length 4096 \ + --max_length 4096 \ --per_device_train_batch_size 2 \ --gradient_accumulation_steps 8 \ --gradient_checkpointing \ diff --git a/tests/fixtures/config_sft_full.yaml b/tests/fixtures/config_sft_full.yaml index cc0fa337..e28b37b9 100644 --- a/tests/fixtures/config_sft_full.yaml +++ b/tests/fixtures/config_sft_full.yaml @@ -25,7 +25,7 @@ log_level: info logging_steps: 5 logging_strategy: steps lr_scheduler_type: cosine -max_seq_length: 2048 +max_length: 2048 max_steps: -1 num_train_epochs: 1 output_dir: data/zephyr-7b-sft-full