-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathsingle.sh
89 lines (77 loc) · 2.42 KB
/
single.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# This script is used to train on a single model
export CUDA_VISIBLE_DEVICES=0
PATH_NAME=path_name
MODEL_TYPE=model_type
MODEL_NAME=model_name
CLASSIFIER=src/single_task.py
STS_B_M=path/to/sts_b_merged_dataset
GEN_OUTPUT=output/$PATH_NAME/gen
STS_MODEL=$GEN_OUTPUT
CLINICAL_DIR=/path/to/clinical_dataset_for_5_fold_cross_validation
CV_OUTPUT=output/$PATH_NAME/tmp
STS_C=/path/to/clinical_sts_trainset
REFIT_OUTPUT=output/$PATH_NAME/clinical_refit
STS_C_TEST=/path/to/clinical_sts_testset
FINAL_OUTPUT=output/$PATH_NAME/clinical_pred
REFIT_MODEL=output/$PATH_NAME/clinical_refit/
# step 1: pretrain on general corpus
python $CLASSIFIER \
--data_dir $STS_B_M \
--model_type $MODEL_TYPE \
--model_name_or_path $MODEL_NAME \
--task_name sts-b \
--gradient_accumulation_steps 1 \
--output_dir $GEN_OUTPUT \
--max_seq_length 160 \
--do_train \
--overwrite_cache \
# step 2: conduct 5 fold cross validation on clinical dataset
for b in 4 8 16
do
for ep in 3 4 5
do
for i in 0 1 2 3 4
do
echo "current hp: ${b}, ${ep}"
python $CLASSIFIER \
--data_dir $CLINICAL_DIR/sample${i} \
--model_type $MODEL_TYPE \
--model_name_or_path $STS_MODEL \
--task_name sts-clinical \
--gradient_accumulation_steps 1 \
--output_dir $CV_OUTPUT/${b}_${ep}/sample${i} \
--max_seq_length 160 \
--do_train \
--per_gpu_train_batch_size ${b} \
--num_train_epochs ${ep} \
--do_eval \
--overwrite_cache \
# --overwrite_output_dir \
done
done
done
# step 3: refit on clinical dataset using 5f cv best hyperparameter
python $CLASSIFIER \
--data_dir $STS_C \
--model_type $MODEL_TYPE \
--model_name_or_path $STS_MODEL \
--task_name sts-clinical \
--output_dir $REFIT_OUTPUT \
--max_seq_length 160 \
--gradient_accumulation_steps 1 \
--do_train \
--per_gpu_train_batch_size 4 \
--num_train_epochs 4 \
--overwrite_cache \
# step 4: prediction
python $CLASSIFIER \
--data_dir $STS_C_TEST \
--model_type $MODEL_TYPE \
--model_name_or_path $REFIT_MODEL \
--task_name sts-clinical \
--output_dir $FINAL_OUTPUT \
--max_seq_length 160 \
--do_pred \
--per_gpu_train_batch_size 4 \
--num_train_epochs 3 \
--overwrite_cache \