Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

This would directly work if training on Colab/Kaggle. Fixed some issues with the deprecated libraries. #167

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
name: hifi-gan
channels:
- defaults
dependencies:
- pip
- pip:
- torch==1.4.0
- numpy==1.17.4
- librosa==0.7.2
- scipy==1.4.1
- tensorboard==2.0
- soundfile==0.10.3.post1
- matplotlib==3.1.3
17 changes: 3 additions & 14 deletions meldataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,42 +10,33 @@

MAX_WAV_VALUE = 32768.0


def load_wav(full_path):
sampling_rate, data = read(full_path)
return data, sampling_rate


def dynamic_range_compression(x, C=1, clip_val=1e-5):
return np.log(np.clip(x, a_min=clip_val, a_max=None) * C)


def dynamic_range_decompression(x, C=1):
return np.exp(x) / C


def dynamic_range_compression_torch(x, C=1, clip_val=1e-5):
return torch.log(torch.clamp(x, min=clip_val) * C)


def dynamic_range_decompression_torch(x, C=1):
return torch.exp(x) / C


def spectral_normalize_torch(magnitudes):
output = dynamic_range_compression_torch(magnitudes)
return output


def spectral_de_normalize_torch(magnitudes):
output = dynamic_range_decompression_torch(magnitudes)
return output


mel_basis = {}
hann_window = {}


def mel_spectrogram(y, n_fft, num_mels, sampling_rate, hop_size, win_size, fmin, fmax, center=False):
if torch.min(y) < -1.:
print('min value is ', torch.min(y))
Expand All @@ -54,24 +45,23 @@ def mel_spectrogram(y, n_fft, num_mels, sampling_rate, hop_size, win_size, fmin,

global mel_basis, hann_window
if fmax not in mel_basis:
mel = librosa_mel_fn(sampling_rate, n_fft, num_mels, fmin, fmax)
mel = librosa_mel_fn(sr=sampling_rate, n_fft=n_fft, n_mels=num_mels, fmin=fmin, fmax=fmax)
mel_basis[str(fmax)+'_'+str(y.device)] = torch.from_numpy(mel).float().to(y.device)
hann_window[str(y.device)] = torch.hann_window(win_size).to(y.device)

y = torch.nn.functional.pad(y.unsqueeze(1), (int((n_fft-hop_size)/2), int((n_fft-hop_size)/2)), mode='reflect')
y = y.squeeze(1)

spec = torch.stft(y, n_fft, hop_length=hop_size, win_length=win_size, window=hann_window[str(y.device)],
center=center, pad_mode='reflect', normalized=False, onesided=True)
center=center, pad_mode='reflect', normalized=False, onesided=True, return_complex=True)

spec = torch.sqrt(spec.pow(2).sum(-1)+(1e-9))
spec = torch.abs(spec)

spec = torch.matmul(mel_basis[str(fmax)+'_'+str(y.device)], spec)
spec = spectral_normalize_torch(spec)

return spec


def get_dataset_filelist(a):
with open(a.input_training_file, 'r', encoding='utf-8') as fi:
training_files = [os.path.join(a.input_wavs_dir, x.split('|')[0] + '.wav')
Expand All @@ -82,7 +72,6 @@ def get_dataset_filelist(a):
for x in fi.read().split('\n') if len(x) > 0]
return training_files, validation_files


class MelDataset(torch.utils.data.Dataset):
def __init__(self, training_files, segment_size, n_fft, num_mels,
hop_size, win_size, sampling_rate, fmin, fmax, split=True, shuffle=True, n_cache_reuse=1,
Expand Down
14 changes: 7 additions & 7 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
torch==1.4.0
numpy==1.17.4
librosa==0.7.2
scipy==1.4.1
tensorboard==2.0
soundfile==0.10.3.post1
matplotlib==3.1.3
torch
numpy
librosa
scipy
tensorboardX
soundfile
matplotlib