Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixing augmentations and enable pretrained HDemucs #44

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -217,3 +217,5 @@ build/
dist/
.idea/
out/
env/
.vscode
16 changes: 16 additions & 0 deletions aimless/models/hdemucs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import torch
import torchaudio
from torchaudio.pipelines import HDEMUCS_HIGH_MUSDB_PLUS


class PretrainedHDemucs(torch.nn.Module):
def __init__(self, sources, download_weights: bool = False) -> None:
super().__init__()
if download_weights:
bundle = HDEMUCS_HIGH_MUSDB_PLUS
self.model = bundle.get_model()
else:
self.model = torchaudio.models.hdemucs_high(sources)

def forward(self, x: torch.Tensor):
return self.model(x)
105 changes: 105 additions & 0 deletions cfg/hdemucs_cjs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
# pytorch_lightning==1.8.5.post0
seed_everything: true
trainer:
logger: true
enable_checkpointing: true
callbacks: pytorch_lightning.callbacks.LearningRateMonitor
default_root_dir: "/import/c4dm-datasets-ext/sdx-2023/logs-cjs"
gradient_clip_val: null
gradient_clip_algorithm: null
num_nodes: 1
num_processes: null
devices: 3
gpus: null
auto_select_gpus: false
tpu_cores: null
ipus: null
enable_progress_bar: true
overfit_batches: 0.0
track_grad_norm: -1
check_val_every_n_epoch: 1
fast_dev_run: false
accumulate_grad_batches: 8
max_epochs: null
min_epochs: null
max_steps: -1
min_steps: null
max_time: null
limit_train_batches: null
limit_val_batches: null
limit_test_batches: null
limit_predict_batches: null
val_check_interval: null
log_every_n_steps: 1
accelerator: gpu
strategy: ddp
sync_batchnorm: true
precision: 32
enable_model_summary: true
num_sanity_val_steps: 2
resume_from_checkpoint: null
profiler: null
benchmark: null
deterministic: null
reload_dataloaders_every_n_epochs: 0
auto_lr_find: false
replace_sampler_ddp: true
detect_anomaly: false
auto_scale_batch_size: false
plugins: null
amp_backend: native
amp_level: null
move_metrics_to_cpu: false
multiple_trainloader_mode: max_size_cycle
inference_mode: true
ckpt_path: null
model:
class_path: aimless.lightning.waveform.WaveformSeparator
init_args:
model:
class_path: torchaudio.models.HDemucs
init_args:
sources:
- vocals
- drums
- bass
- other
channels: 64
criterion:
class_path: aimless.loss.time.L1Loss
transforms:
- class_path: aimless.augment.SpeedPerturb
init_args:
orig_freq: 44100
speeds:
- 90
- 100
- 110
p: 0.2
- class_path: aimless.augment.RandomPitch
init_args:
semitones:
- -1
- 1
- 0
- 1
- 2
p: 0.2
targets: {vocals, drums, bass, other}
data:
class_path: data.lightning.musdb.MUSDB
init_args:
root: /import/c4dm-datasets-ext/musdb18hq/
seq_duration: 8.0
samples_per_track: 500
random: true
random_track_mix: true
batch_size: 2
optimizer:
class_path: torch.optim.AdamW
init_args:
lr: 0.0001
lr_scehduler:
class_path: torch.optim.lr_scheduler.CosineAnnealingLR
init_args:
T_max: 100
108 changes: 108 additions & 0 deletions cfg/hdemucs_cjs_pretrained.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
# pytorch_lightning==1.8.5.post0
seed_everything: true
trainer:
logger: true
enable_checkpointing: true
callbacks: pytorch_lightning.callbacks.LearningRateMonitor
default_root_dir: "/import/c4dm-datasets-ext/sdx-2023/logs-cjs"
gradient_clip_val: null
gradient_clip_algorithm: null
num_nodes: 1
num_processes: null
devices: 3
gpus: null
auto_select_gpus: false
tpu_cores: null
ipus: null
enable_progress_bar: true
overfit_batches: 0.0
track_grad_norm: -1
check_val_every_n_epoch: 1
fast_dev_run: false
accumulate_grad_batches: 8
max_epochs: null
min_epochs: null
max_steps: -1
min_steps: null
max_time: null
limit_train_batches: null
limit_val_batches: null
limit_test_batches: null
limit_predict_batches: null
val_check_interval: null
log_every_n_steps: 1
accelerator: gpu
strategy: ddp
sync_batchnorm: true
precision: 32
enable_model_summary: true
num_sanity_val_steps: 2
resume_from_checkpoint: null
profiler: null
benchmark: null
deterministic: null
reload_dataloaders_every_n_epochs: 0
auto_lr_find: false
replace_sampler_ddp: true
detect_anomaly: false
auto_scale_batch_size: false
plugins: null
amp_backend: native
amp_level: null
move_metrics_to_cpu: false
multiple_trainloader_mode: max_size_cycle
inference_mode: true
ckpt_path: null
model:
class_path: aimless.lightning.waveform.WaveformSeparator
init_args:
model:
class_path: aimless.models.hdemucs.PretrainedHDemucs
init_args:
sources:
- vocals
- drums
- bass
- other
download_weights: true
criterion:
class_path: aimless.loss.time.L1Loss
transforms:
- class_path: aimless.augment.SpeedPerturb
init_args:
orig_freq: 44100
speeds:
- 90
- 100
- 110
p: 0.2
- class_path: aimless.augment.RandomPitch
init_args:
semitones:
- -1
- 1
- 0
- 1
- 2
p: 0.2
targets: {vocals, drums, bass, other}
data:
class_path: data.lightning.musdb.MUSDB
init_args:
root: /import/c4dm-datasets-ext/musdb18hq/
seq_duration: 8.0
samples_per_track: 500
random: true
random_track_mix: true
transforms:
- class_path: data.augment.RandomGain
- class_path: data.augment.RandomFlipPhase
init_args:
p: 0.5
- class_path: data.augment.RandomSwapLR
init_args:
p: 0.5
- class_path: data.augment.LimitAug
init_args:
sample_rate: 44100
batch_size: 3
150 changes: 150 additions & 0 deletions cfg/hdemucs_cjs_pretrained_fx.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
# pytorch_lightning==1.8.5.post0
seed_everything: true
trainer:
logger: true
enable_checkpointing: true
callbacks: pytorch_lightning.callbacks.LearningRateMonitor
default_root_dir: "/import/c4dm-datasets-ext/sdx-2023/logs-cjs"
gradient_clip_val: null
gradient_clip_algorithm: null
num_nodes: 1
num_processes: null
devices: 3
gpus: null
auto_select_gpus: false
tpu_cores: null
ipus: null
enable_progress_bar: true
overfit_batches: 0.0
track_grad_norm: -1
check_val_every_n_epoch: 1
fast_dev_run: false
accumulate_grad_batches: 8
max_epochs: 100
min_epochs: null
max_steps: -1
min_steps: null
max_time: null
limit_train_batches: null
limit_val_batches: null
limit_test_batches: null
limit_predict_batches: null
val_check_interval: null
log_every_n_steps: 1
accelerator: gpu
strategy: ddp
sync_batchnorm: true
precision: 32
enable_model_summary: true
num_sanity_val_steps: 2
resume_from_checkpoint: null
profiler: null
benchmark: null
deterministic: null
reload_dataloaders_every_n_epochs: 0
auto_lr_find: false
replace_sampler_ddp: true
detect_anomaly: false
auto_scale_batch_size: false
plugins: null
amp_backend: native
amp_level: null
move_metrics_to_cpu: false
multiple_trainloader_mode: max_size_cycle
inference_mode: true
ckpt_path: null
model:
class_path: aimless.lightning.waveform.WaveformSeparator
init_args:
model:
class_path: aimless.models.hdemucs.PretrainedHDemucs
init_args:
sources:
- vocals
- drums
- bass
- other
download_weights: true
criterion:
class_path: aimless.loss.time.L1Loss
transforms:
- class_path: aimless.augment.SpeedPerturb
init_args:
orig_freq: 44100
speeds:
- 90
- 100
- 110
p: 0.2
- class_path: aimless.augment.RandomPitch
init_args:
semitones:
- -1
- 1
- 0
- 1
- 2
p: 0.2
targets: {vocals, drums, bass, other}
data:
class_path: data.lightning.musdb.MUSDB
init_args:
root: /import/c4dm-datasets-ext/musdb18hq/
seq_duration: 8.0
samples_per_track: 500
transforms:
- class_path: data.augment.RandomParametricEQ
init_args:
sample_rate: 44100
p: 0.7
- class_path: data.augment.RandomPedalboardDistortion
init_args:
sample_rate: 44100
p: 0.01
- class_path: data.augment.RandomPedalboardDelay
init_args:
sample_rate: 44100
p: 0.02
- class_path: data.augment.RandomPedalboardChorus
init_args:
sample_rate: 44100
p: 0.01
- class_path: data.augment.RandomPedalboardPhaser
init_args:
sample_rate: 44100
p: 0.01
- class_path: data.augment.RandomPedalboardCompressor
init_args:
sample_rate: 44100
p: 0.5
- class_path: data.augment.RandomPedalboardReverb
init_args:
sample_rate: 44100
p: 0.2
- class_path: data.augment.RandomStereoWidener
init_args:
sample_rate: 44100
p: 0.3
- class_path: data.augment.RandomPedalboardLimiter
init_args:
sample_rate: 44100
p: 0.1
- class_path: data.augment.LoudnessNormalize
init_args:
sample_rate: 44100
target_lufs_db: -32.0
p: 1.0
- class_path: data.augment.LimitAug
init_args:
sample_rate: 44100
random: true
random_track_mix: true
batch_size: 3
optimizer:
class_path: torch.optim.AdamW
init_args:
lr: 0.0001
lr_scheduler:
class_path: torch.optim.lr_scheduler.CosineAnnealingLR
init_args:
T_max: 100