add length filtering

lexkoro · Apr 18, 2024 · a96e9c8 · a96e9c8
1 parent 1d52473
commit a96e9c8
Show file tree

Hide file tree

Showing 3 changed files with 14 additions and 10 deletions.
diff --git a/configs/config.json b/configs/config.json
@@ -22,7 +22,7 @@
     "c_kl_r": 0.01,
     "use_sr": true,
     "max_speclen": 512,
-    "port": "8001",
+    "port": "64435",
     "keep_ckpts": 2,
     "all_in_mem": false,
     "vol_aug": false
@@ -38,6 +38,8 @@
     "n_mel_channels": 128,
     "mel_fmin": 0.0,
     "mel_fmax": null,
+    "min_file_length": 0.3,
+    "max_file_length": 10.0,
     "unit_interpolate_mode": "nearest"
   },
   "model": {

diff --git a/data_utils.py b/data_utils.py
@@ -39,6 +39,8 @@ def __init__(self, audiopaths, hparams, all_in_mem: bool = False):
         self.num_mels = hparams.data.n_mel_channels
         self.mel_fmin = hparams.data.mel_fmin
         self.mel_fmax = hparams.data.mel_fmax
+        self.min_file_length = hparams.data.min_file_length * self.sampling_rate
+        self.max_file_length = hparams.data.max_file_length * self.sampling_rate
         # self.spk_map_inv = {v: k for k, v in self.spk_map.items()}
 
         random.seed(1234)
@@ -52,9 +54,9 @@ def __init__(self, audiopaths, hparams, all_in_mem: bool = False):
 
     def _filter_long_files(self, audio_paths):
         filtered = []
-        max_length = 22050 * 9.0  # 10 seconds
+
         for p, speaker in audio_paths:
-            if (Path(p).stat().st_size // 2) < max_length:
+            if self.min_file_length <(Path(p).stat().st_size // 2) < self.max_file_length:
                 filtered.append([p, speaker])
 
         print("Audiopaths before filtering:", len(audio_paths))
@@ -125,11 +127,7 @@ def get_audio(self, filename):
         )
         audio_norm = audio_norm[:, : lmin * self.hop_length]
 
-        if spec.shape[1] < 30:
-            print("skip too short audio:", filename)
-            return None
-        else:
-            return c, f0, spec, audio_norm, uv, ppg
+        return c, f0, spec, audio_norm, uv, ppg
 
     def random_slice(self, c, f0, spec, audio_norm, uv, ppg):
         if spec.shape[1] > 800:

diff --git a/preprocess_f0_hubert.py b/preprocess_f0_hubert.py
@@ -81,7 +81,11 @@ def parallel_process(filenames, num_processes, f0p, device):
     with open("/workspace/vc_train.csv", "r") as f:
         for line in f:
             file_path = line.split("|")[0]
-            wav_paths.append(file_path.strip())
+            soft_path = file_path.replace(".wav", ".soft.pt")
+            f0_path = file_path.replace(".wav", ".rmvpe.pt")
+
+            if not os.path.exists(soft_path) and not os.path.exists(f0_path):
+                wav_paths.append(file_path.strip())
 
     # preprocess f0 and hubert
-    parallel_process(wav_paths, 4, f0p, device)
+    parallel_process(wav_paths, 6, f0p, device)