Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bugfixes after merge. operational different-song training #17

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 14 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,20 +60,30 @@ First update the paths in the configuration file for both the logger and the dat
Then call the `main.py` script passing in the configuration file.
```
# new model configuration with audio feature loss
CUDA_VISIBLE_DEVICES=0 python main.py fit \
CUDA_VISIBLE_DEVICES=2,4 python main.py fit \
-c configs/config_cjs.yaml \
-c configs/optimizer.yaml \
-c configs/data/medley+cambridge+jamendo-8.yaml \
-c configs/data/medley+cambridge+jamendo-16.yaml \
-c configs/models/gain+eq+comp-feat.yaml

# new model configuration with CLAP loss
CUDA_VISIBLE_DEVICES=0 python main.py fit \
CUDA_VISIBLE_DEVICES=7 python main.py fit \
-c configs/config_cjs.yaml \
-c configs/optimizer.yaml \
-c configs/data/medley+cambridge+jamendo-8.yaml \
-c configs/data/medley+cambridge+jamendo-4.yaml \
-c configs/models/gain+eq+comp-clap.yaml
```

```
CUDA_VISIBLE_DEVICES=5 python main.py fit \
-c configs/config_quality.yaml \
-c configs/optimizer.yaml \
-c configs/data/jamendo.yaml \
-c configs/models/quality-estim.yaml
```

```
```

# Stability (ignore)
```
Expand Down
10 changes: 4 additions & 6 deletions configs/config_cjs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ trainer:
class_path: pytorch_lightning.loggers.WandbLogger
init_args:
project: DiffMST
save_dir: /import/c4dm-datasets-ext/diffmst_logs_soum
save_dir: /import/c4dm-datasets-ext/Diff-MST
enable_checkpointing: true
callbacks:
- class_path: mst.callbacks.audio.LogAudioCallback
Expand All @@ -31,11 +31,9 @@ trainer:
# - /import/c4dm-datasets-ext/diffmst_validation/validation set/song2/ref/Taylor Swift - Shake It Off.wav
default_root_dir: null
gradient_clip_val: 10.0
devices: 1
check_val_every_n_epoch: 1

devices: 2
check_val_every_n_epoch: 5
max_epochs: 800

log_every_n_steps: 50
accelerator: gpu
strategy: ddp_find_unused_parameters_true
Expand All @@ -44,6 +42,6 @@ trainer:
enable_model_summary: true
num_sanity_val_steps: 2
benchmark: true
accumulate_grad_batches: 1
accumulate_grad_batches: 2
reload_dataloaders_every_n_epochs: 1

29 changes: 29 additions & 0 deletions configs/config_quality.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
seed_everything: 42
#ckpt_path: /import/c4dm-datasets-ext/Diff-MST/DiffMST/4bjbp29c/checkpoints/epoch=118-step=148750.ckpt

trainer:
logger:
class_path: pytorch_lightning.loggers.WandbLogger
init_args:
project: DiffMST-Quality
save_dir: /import/c4dm-datasets-ext/Diff-MST
enable_checkpointing: true
callbacks:
- class_path: pytorch_lightning.callbacks.ModelSummary
init_args:
max_depth: 2
default_root_dir: null
gradient_clip_val: 10.0
devices: 1
check_val_every_n_epoch: 1
max_epochs: 500
log_every_n_steps: 50
accelerator: gpu
strategy: ddp_find_unused_parameters_true
sync_batchnorm: true
precision: 32
enable_model_summary: true
num_sanity_val_steps: 2
benchmark: true
accumulate_grad_batches: 1

4 changes: 2 additions & 2 deletions configs/data/jamendo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ data:
init_args:
root_dir: /import/c4dm-datasets-ext/mtg-jamendo
length: 262144
batch_size: 4
num_workers: 4
batch_size: 8
num_workers: 8
26 changes: 26 additions & 0 deletions configs/data/medley+cambridge+jamendo-4.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
data:
class_path: mst.dataloader.MultitrackDataModule
init_args:
track_root_dirs:
- /import/c4dm-datasets-ext/mixing-secrets/
- /import/c4dm-datasets/

mix_root_dirs:
- /import/c4dm-datasets-ext/mtg-jamendo

metadata_files:
- ./data/cambridge.yaml
- ./data/medley.yaml
length: 262144

min_tracks: 4
max_tracks: 4
batch_size: 4
num_workers: 4
num_train_passes: 4
num_val_passes: 1
train_buffer_size_gb: 4.0
val_buffer_size_gb: 0.5
target_track_lufs_db: -48.0
randomize_ref_mix_gain: False

4 changes: 2 additions & 2 deletions configs/data/medley+cambridge+jamendo-8.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ data:
- ./data/medley.yaml
length: 262144

min_tracks: 4
max_tracks: 4
min_tracks: 8
max_tracks: 8
batch_size: 2
num_workers: 4
num_train_passes: 4
Expand Down
68 changes: 68 additions & 0 deletions configs/models/gain+eq+comp-feat+quality.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
model:
class_path: mst.system.System
init_args:
generate_mix: false
active_eq_epoch: 0
active_compressor_epoch: 0
active_fx_bus_epoch: 1000
active_master_bus_epoch: 0
mix_fn: mst.mixing.naive_random_mix
mix_console:
class_path: mst.modules.AdvancedMixConsole
init_args:
sample_rate: 44100
input_min_gain_db: -48.0
input_max_gain_db: 48.0
output_min_gain_db: -48.0
output_max_gain_db: 48.0
eq_min_gain_db: -12.0
eq_max_gain_db: 12.0
min_pan: 0.0
max_pan: 1.0
model:
class_path: mst.modules.MixStyleTransferModel
init_args:
track_encoder:
class_path: mst.modules.SpectrogramEncoder
init_args:
n_inputs: 1
embed_dim: 256
n_fft: 2048
hop_length: 512
input_batchnorm: false
encoder_batchnorm: false
model_size: small
mix_encoder:
class_path: mst.modules.SpectrogramEncoder
init_args:
n_inputs: 1
embed_dim: 256
n_fft: 2048
hop_length: 512
input_batchnorm: false
encoder_batchnorm: false
model_size: small
controller:
class_path: mst.modules.TransformerController
init_args:
embed_dim: 256
num_track_control_params: 27
num_fx_bus_control_params: 25
num_master_bus_control_params: 26
num_layers: 12
nhead: 8

loss:
class_path: mst.loss.FeatureAndQualityLoss
init_args:
sample_rate: 44100
stem_separation: false
use_clap: false
weights:
- 0.1 # rms
- 0.001 # crest factor
- 1.0 # stereo width
- 1.0 # stereo imbalance
- 0.1 # bark spectrum
quality_ckpt_path: /import/c4dm-datasets-ext/Diff-MST/DiffMST-Quality/q60vbm8l/checkpoints/epoch=499-step=903500.ckpt
quality_weight: 0.001
12 changes: 9 additions & 3 deletions configs/models/gain+eq+comp-feat.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,21 +25,27 @@ model:
track_encoder:
class_path: mst.modules.SpectrogramEncoder
init_args:
embed_dim: 512
n_inputs: 1
embed_dim: 256
n_fft: 2048
hop_length: 512
input_batchnorm: false
encoder_batchnorm: false
model_size: small
mix_encoder:
class_path: mst.modules.SpectrogramEncoder
init_args:
embed_dim: 512
n_inputs: 1
embed_dim: 256
n_fft: 2048
hop_length: 512
input_batchnorm: false
encoder_batchnorm: false
model_size: small
controller:
class_path: mst.modules.TransformerController
init_args:
embed_dim: 512
embed_dim: 256
num_track_control_params: 27
num_fx_bus_control_params: 25
num_master_bus_control_params: 26
Expand Down
58 changes: 58 additions & 0 deletions configs/models/gain+eq+comp-quality.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
model:
class_path: mst.system.System
init_args:
generate_mix: false
active_eq_epoch: 0
active_compressor_epoch: 0
active_fx_bus_epoch: 1000
active_master_bus_epoch: 0
mix_fn: mst.mixing.naive_random_mix
mix_console:
class_path: mst.modules.AdvancedMixConsole
init_args:
sample_rate: 44100
input_min_gain_db: -48.0
input_max_gain_db: 48.0
output_min_gain_db: -48.0
output_max_gain_db: 48.0
eq_min_gain_db: -12.0
eq_max_gain_db: 12.0
min_pan: 0.0
max_pan: 1.0
model:
class_path: mst.modules.MixStyleTransferModel
init_args:
track_encoder:
class_path: mst.modules.SpectrogramEncoder
init_args:
n_inputs: 1
embed_dim: 256
n_fft: 2048
hop_length: 512
input_batchnorm: false
encoder_batchnorm: false
model_size: small
mix_encoder:
class_path: mst.modules.SpectrogramEncoder
init_args:
n_inputs: 1
embed_dim: 256
n_fft: 2048
hop_length: 512
input_batchnorm: false
encoder_batchnorm: false
model_size: small
controller:
class_path: mst.modules.TransformerController
init_args:
embed_dim: 256
num_track_control_params: 27
num_fx_bus_control_params: 25
num_master_bus_control_params: 26
num_layers: 12
nhead: 8

loss:
class_path: mst.loss.QualityLoss
init_args:
ckpt_path: /import/c4dm-datasets-ext/Diff-MST/DiffMST-Quality/q60vbm8l/checkpoints/epoch=351-step=636064.ckpt
11 changes: 11 additions & 0 deletions configs/models/quality-estim.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
model:
class_path: mst.quality_system.QualityEstimationSystem
init_args:
encoder:
class_path: mst.modules.SpectrogramEncoder
init_args:
embed_dim: 512
n_inputs: 1
l2_norm: true
input_batchnorm: false
encoder_batchnorm: false
Loading