forked from karpathy/llama2.c
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patheval.sh
executable file
·55 lines (47 loc) · 1.46 KB
/
eval.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/bin/bash
# data
task_name="tinystories_repeat"
vocab_source="custom" # llama2|custom; use Lllama 2 vocab from Meta, or custom trained
vocab_size=4096 # the Llama 2 tokenizer has 32K tokens
# eval
batch_size=32 # if gradient_accumulation_steps > 1, this is the micro-batch size
eval_iters=1000
eval_last=False
repeat_tokens=False
# model
attention_type="attention"
extend_method="interpolation_logn"
key_norm=False
# I/O
out_dir=./out/retry2_repeat_custom4096_len256
# out_dir=./out/retry_repeat_custom4096_len256_nope
mkdir -p ${out_dir}
cp $0 ${out_dir}/eval.sh
for ((i=8; i<=12; i++))
do
if [ $i -ge 14 ]; then
batch_size=8
fi
if [ $i -ge 17 ]; then
batch_size=1
eval_iters=100
fi
max_seq_len=$((2 ** i))
# max_seq_len=$((32 * i))
echo "eval $max_seq_len"
date
python3 train.py \
--task_name=${task_name} \
--batch_size=${batch_size} --max_seq_len=${max_seq_len} \
--extend_method=${extend_method} \
--key_norm=${key_norm} \
--vocab_source=${vocab_source} --vocab_size=${vocab_size} \
--attention_type=${attention_type} \
--dtype="float32" \
--device="cuda" --compile=False \
--eval_only=True --init_from="resume" --always_save_checkpoint=False \
--eval_last=${eval_last} --eval_iters=${eval_iters} \
--repeat_tokens=${repeat_tokens} \
--out_dir=${out_dir} \
| tee -a ${out_dir}/log_${extend_method}_${key_norm}.txt
done