-
Notifications
You must be signed in to change notification settings - Fork 305
/
Copy pathrun_dropout_sweep.yaml
84 lines (84 loc) · 1.84 KB
/
run_dropout_sweep.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
command:
- python3
- ${program}
- --do_train
- --do_eval
- --use_scan
- --gradient_checkpointing
- --overwrite_output_dir
- --predict_with_generate
- --streaming
- --use_auth_token
- ${args}
method: random
metric:
goal: minimize
name: eval/wer
parameters:
model_name_or_path:
value: distil-whisper/large-32-2
teacher_model_name_or_path:
value: openai/whisper-large-v2
train_dataset_name:
value: librispeech_asr+librispeech_asr+librispeech_asr+common_voice_13_0+voxpopuli+ami-ihm+ami-sdm+peoples_speech-clean+tedlium+switchboard-data+gigaspeech-l+spgispeech
train_dataset_config_name:
value: all+all+all+en+en+ihm+sdm+clean+release3+all+l+L
train_split_name:
value: train.clean.100+train.clean.360+train.other.500+train+train+train+train+train+train+train+train+train
train_dataset_samples:
value: 100+360+500+2300+450+90+90+12000+450+3600+2500+5000
eval_dataset_name:
value: "distil-whisper/gigaspeech-l"
eval_dataset_config_name:
value: "l"
cache_dir:
value: /home/sanchitgandhi/cache
dataset_cache_dir:
value: /home/sanchitgandhi/cache
output_dir:
value: ./
per_device_train_batch_size:
value: 32
per_device_eval_batch_size:
value: 64
dtype:
value: bfloat16
learning_rate:
value: 1e-4
lr_scheduler_type:
value: constant_with_warmup
warmup_steps:
value: 50
max_steps:
value: 1000
eval_steps:
value: 1000
save_steps:
value: 1000
dataloader_num_workers:
value: 16
logging_steps:
value: 5
wer_threshold:
value: 10
activation_dropout:
values:
- 0
- 0.05
- 0.1
attention_dropout:
values:
- 0
- 0.05
- 0.1
dropout:
values:
- 0
- 0.05
- 0.1
freeze_encoder:
values:
- true
- false
program: run_distillation.py
project: distil-whisper-sweeps