Skip to content

Commit

Permalink
adding l3 supercat config
Browse files Browse the repository at this point in the history
  • Loading branch information
dleemiller committed Jul 24, 2024
1 parent 54b8984 commit 194b1a3
Showing 1 changed file with 35 additions and 0 deletions.
35 changes: 35 additions & 0 deletions wordllama/config/l3_supercat.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
[model]
dim = 28672
n_vocab = 128256
hf_model_id = "meta-llama/Meta-Llama-3.1-405B"
pad_token = "<|end_of_text|>"

[tokenizer]
return_tensors = "pt"
return_attention_mask = true
max_length = 256
padding = "longest"
truncation = true
add_special_tokens = false

[tokenizer.inference]
use_local_config = true
config_filename = "l3_supercat_tokenizer_config.json"

[training]
output_dir = "output/matryoshka_supercat"
num_train_epochs = 2
per_device_train_batch_size = 256
warmup_steps = 256
evaluation_strategy = "steps"
eval_steps = 250
save_steps = 1000
fp16 = true
include_num_input_tokens_seen = false
learning_rate = 3e-4
multi_dataset_batch_sampler = "PROPORTIONAL"
binarizer_ste = "ste"

[matryoshka]
dims = [1024, 512, 256, 128, 64]

0 comments on commit 194b1a3

Please sign in to comment.