forked from facebookresearch/audiocraft
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdefault.yaml
160 lines (149 loc) · 2.89 KB
/
default.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# @package __global__
defaults:
- ../default
- override /dset: audio/default
- _self_
solver: compression
sample_rate: ???
channels: ???
# loss balancing
losses:
adv: 4.
feat: 4.
l1: 0.1
mel: 0.
msspec: 2.
sisnr: 0.
balancer:
balance_grads: true
ema_decay: 0.999
per_batch_item: true
total_norm: 1.
adversarial:
every: 1
adversaries: [msstftd]
adv_loss: hinge
feat_loss: l1
# losses hyperparameters
l1: {}
l2: {}
mrstft:
factor_sc: .5
factor_mag: .5
normalized: false
mel:
sample_rate: ${sample_rate}
n_fft: 1024
hop_length: 256
win_length: 1024
n_mels: 64
f_min: 64
f_max: null
normalized: false
floor_level: 1e-5
sisnr:
sample_rate: ${sample_rate}
segment: 5.
msspec:
sample_rate: ${sample_rate}
range_start: 6
range_end: 11
n_mels: 64
f_min: 64
f_max: null
normalized: true
alphas: false
floor_level: 1e-5
# metrics
metrics:
visqol:
mode: audio
bin: null # path to visqol install
model: tcdaudio14_aacvopus_coresv_svrnsim_n.68_g.01_c1.model # visqol v3
# adversaries hyperparameters
msstftd:
in_channels: 1
out_channels: 1
filters: 32
norm: weight_norm
n_ffts: [1024, 2048, 512, 256, 128]
hop_lengths: [256, 512, 128, 64, 32]
win_lengths: [1024, 2048, 512, 256, 128]
activation: LeakyReLU
activation_params: {negative_slope: 0.3}
msd:
in_channels: 1
out_channels: 1
scale_norms: [spectral_norm, weight_norm, weight_norm]
kernel_sizes: [5, 3]
filters: 16
max_filters: 1024
downsample_scales: [4, 4, 4, 4]
inner_kernel_sizes: null
groups: [4, 4, 4, 4]
strides: null
paddings: null
activation: LeakyReLU
activation_params: {negative_slope: 0.3}
mpd:
in_channels: 1
out_channels: 1
periods: [2, 3, 5, 7, 11]
n_layers: 5
kernel_size: 5
stride: 3
filters: 8
filter_scales: 4
max_filters: 1024
activation: LeakyReLU
activation_params: {negative_slope: 0.3}
norm: weight_norm
# data hyperparameters
dataset:
batch_size: 64
num_workers: 10
segment_duration: 1
train:
num_samples: 500000
valid:
num_samples: 10000
evaluate:
batch_size: 32
num_samples: 10000
generate:
batch_size: 32
num_samples: 50
segment_duration: 10
# solver hyperparameters
evaluate:
every: 25
num_workers: 5
metrics:
visqol: false
sisnr: true
generate:
every: 25
num_workers: 5
audio:
sample_rate: ${sample_rate}
# checkpointing schedule
checkpoint:
save_last: true
save_every: 25
keep_last: 10
keep_every_states: null
# optimization hyperparameters
optim:
epochs: 200
updates_per_epoch: 2000
lr: 3e-4
max_norm: 0.
optimizer: adam
adam:
betas: [0.5, 0.9]
weight_decay: 0.
ema:
use: true # whether to use EMA or not
updates: 1 # update at every step
device: ${device} # device for EMA, can be put on GPU if more frequent updates
decay: 0.99 # EMA decay value, if null, no EMA is used