Skip to content

Commit

Permalink
add length filtering
Browse files Browse the repository at this point in the history
  • Loading branch information
lexkoro committed Apr 18, 2024
1 parent 1d52473 commit a96e9c8
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 10 deletions.
4 changes: 3 additions & 1 deletion configs/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
"c_kl_r": 0.01,
"use_sr": true,
"max_speclen": 512,
"port": "8001",
"port": "64435",
"keep_ckpts": 2,
"all_in_mem": false,
"vol_aug": false
Expand All @@ -38,6 +38,8 @@
"n_mel_channels": 128,
"mel_fmin": 0.0,
"mel_fmax": null,
"min_file_length": 0.3,
"max_file_length": 10.0,
"unit_interpolate_mode": "nearest"
},
"model": {
Expand Down
12 changes: 5 additions & 7 deletions data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ def __init__(self, audiopaths, hparams, all_in_mem: bool = False):
self.num_mels = hparams.data.n_mel_channels
self.mel_fmin = hparams.data.mel_fmin
self.mel_fmax = hparams.data.mel_fmax
self.min_file_length = hparams.data.min_file_length * self.sampling_rate
self.max_file_length = hparams.data.max_file_length * self.sampling_rate
# self.spk_map_inv = {v: k for k, v in self.spk_map.items()}

random.seed(1234)
Expand All @@ -52,9 +54,9 @@ def __init__(self, audiopaths, hparams, all_in_mem: bool = False):

def _filter_long_files(self, audio_paths):
filtered = []
max_length = 22050 * 9.0 # 10 seconds

for p, speaker in audio_paths:
if (Path(p).stat().st_size // 2) < max_length:
if self.min_file_length <(Path(p).stat().st_size // 2) < self.max_file_length:
filtered.append([p, speaker])

print("Audiopaths before filtering:", len(audio_paths))
Expand Down Expand Up @@ -125,11 +127,7 @@ def get_audio(self, filename):
)
audio_norm = audio_norm[:, : lmin * self.hop_length]

if spec.shape[1] < 30:
print("skip too short audio:", filename)
return None
else:
return c, f0, spec, audio_norm, uv, ppg
return c, f0, spec, audio_norm, uv, ppg

def random_slice(self, c, f0, spec, audio_norm, uv, ppg):
if spec.shape[1] > 800:
Expand Down
8 changes: 6 additions & 2 deletions preprocess_f0_hubert.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,11 @@ def parallel_process(filenames, num_processes, f0p, device):
with open("/workspace/vc_train.csv", "r") as f:
for line in f:
file_path = line.split("|")[0]
wav_paths.append(file_path.strip())
soft_path = file_path.replace(".wav", ".soft.pt")
f0_path = file_path.replace(".wav", ".rmvpe.pt")

if not os.path.exists(soft_path) and not os.path.exists(f0_path):
wav_paths.append(file_path.strip())

# preprocess f0 and hubert
parallel_process(wav_paths, 4, f0p, device)
parallel_process(wav_paths, 6, f0p, device)

0 comments on commit a96e9c8

Please sign in to comment.