forked from allenai/OLMo
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathc4-extra-tiny-debug.yaml
112 lines (99 loc) · 2.55 KB
/
c4-extra-tiny-debug.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
run_name: extra-tiny-debug
seed: 6198
dry_run: false
model:
d_model: 256
n_heads: 4
n_layers: 5
mlp_ratio: 4
alibi: true
alibi_bias_max: 8.0
flash_attention: false
attention_dropout: 0.0
attention_layer_norm: false
residual_dropout: 0.0
embedding_dropout: 0.0
max_sequence_length: 2048
include_bias: true
vocab_size: 50257
eos_token_id: 50256
pad_token_id: 50256
init_device: null
init_std: 0.02
optimizer:
name: lionw
learning_rate: 3.0e-4
weight_decay: 0.01
betas:
- 0.9
- 0.95
scheduler:
name: cosine_with_warmup
t_warmup: 100
alpha_f: 0.1
data:
paths: ${path.glob:${path.choose:${oc.env:SCRATCH_DIR,no_exist}/pretraining_data/preprocessed,/net/nfs.cirrascale/allennlp/llm-data}/c4/en/c4-train.*.npy}
pad_direction: right
num_workers: 2
drop_last: true
pin_memory: true
prefetch_factor: 2
persistent_workers: true
timeout: 0
eval_interval: 50
eval_subset_num_batches: -1
device_eval_batch_size: ${device_train_microbatch_size}
evaluators:
- label: c4-validation
subset_num_batches: 10
data:
paths: ${path.glob:${path.choose:${oc.env:SCRATCH_DIR,no_exist}/pretraining_data/preprocessed,/net/nfs.cirrascale/allennlp/llm-data}/c4/en/c4-validation.*.npy}
drop_last: true
- label: rp-validation
subset_num_batches: 10
data:
paths: ${path.glob:${path.choose:${oc.env:SCRATCH_DIR,no_exist}/pretraining_data/preprocessed,/net/nfs.cirrascale/allennlp/llm-data}/redpajama/redpajama-validation.npy}
drop_last: true
- label: piqa
type: downstream
- label: hellaswag
type: downstream
- label: winogrande
type: downstream
- label: openbook_qa
type: downstream
# - label: boolq # requires implemention of the pmi_dc matrix
# type: downstream
- label: sciq
type: downstream
- label: arc_easy
type: downstream
# - label: arc_challenge # requires implemention of the pmi_dc matrix
# type: downstream
- label: copa
type: downstream
- label: rte
type: downstream
- label: commitment_bank
type: downstream
- label: mrpc
type: downstream
- label: sst2
type: downstream
tokenizer:
identifier: gpt2
truncate_direction: right
save_folder: ${path.choose:${oc.env:SCRATCH_DIR,no_exist}/checkpoints,/results}/${oc.env:SLURM_JOB_ID,${run_name}}
save_interval: 1000
save_num_checkpoints_to_keep: 2
save_overwrite: true
load_path: null
max_duration: 1708984 # 7B tokens
global_train_batch_size: 16
device_train_microbatch_size: 4
precision: fp32
wandb:
name: ${run_name}
speed_monitor:
window_size: 20
console_log_interval: 10