forked from xcmyz/FastSpeech
-
Notifications
You must be signed in to change notification settings - Fork 0
/
preprocess.py
59 lines (43 loc) · 1.54 KB
/
preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import torch
import numpy as np
import shutil
import os
from utils import load_data, get_Tacotron2, get_WaveGlow
from utils import process_text, load_data
from data import ljspeech
import hparams as hp
import waveglow
import Audio
def preprocess_ljspeech(filename):
in_dir = filename
out_dir = hp.mel_ground_truth
if not os.path.exists(out_dir):
os.makedirs(out_dir, exist_ok=True)
metadata = ljspeech.build_from_path(in_dir, out_dir)
write_metadata(metadata, out_dir)
shutil.move(os.path.join(hp.mel_ground_truth, "train.txt"),
os.path.join("data", "train.txt"))
def write_metadata(metadata, out_dir):
with open(os.path.join(out_dir, 'train.txt'), 'w', encoding='utf-8') as f:
for m in metadata:
f.write(m + '\n')
def main():
path = os.path.join("data", "LJSpeech-1.1")
preprocess_ljspeech(path)
text_path = os.path.join("data", "train.txt")
texts = process_text(text_path)
if not os.path.exists(hp.alignment_path):
os.mkdir(hp.alignment_path)
tacotron2 = get_Tacotron2()
num = 0
for ind, text in enumerate(texts[num:]):
print(ind)
character = text[0:len(text)-1]
mel_gt_name = os.path.join(
hp.mel_ground_truth, "ljspeech-mel-%05d.npy" % (ind+num+1))
mel_gt_target = np.load(mel_gt_name)
_, _, D = load_data(character, mel_gt_target, tacotron2)
np.save(os.path.join(hp.alignment_path, str(
ind+num) + ".npy"), D, allow_pickle=False)
if __name__ == "__main__":
main()