Skip to content

Commit

Permalink
prepare for llama 3.2 1b training, lr=2.5-e4
Browse files Browse the repository at this point in the history
  • Loading branch information
tigranfah committed Oct 11, 2024
1 parent e8fd8a3 commit 5dc2dbb
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 3 deletions.
3 changes: 2 additions & 1 deletion submitit_train_hparam_tuning.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@

hparams = {
# "optimizer.lr": ["1.2e-3", "9e-4", "6e-4", "3e-4"],
"optimizer.lr": ["8e-4", "6e-4", "4e-4", "2e-4"],
# "optimizer.lr": ["8e-4", "6e-4", "4e-4", "2e-4"],
# "optimizer.lr": ["2.5e-4"],
# "optimizer.lr": ["1e-4", "8e-5", "6e-5", "4e-5", "2e-5"],
}

Expand Down
4 changes: 2 additions & 2 deletions train_configs/llama3.2_1b.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,13 @@ tokenizer_path = "torchtitan/tokenizers/Llama-3.2-chem-1B/"

[optimizer]
name = "AdamW"
lr = 6.0e-4
lr = 2.5e-4

[training]
batch_size = 10
gradient_accumulation_steps = 12
seq_len = 2048
warmup_steps = 1000 # lr scheduler warm up, normally 20% of the train steps
warmup_steps = 500 # lr scheduler warm up, normally 20% of the train steps
max_norm = 1.0 # grad norm clipping
steps = 20000
data_parallel_degree = -1
Expand Down

0 comments on commit 5dc2dbb

Please sign in to comment.