-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdataset_visualize.py
executable file
·87 lines (70 loc) · 3 KB
/
dataset_visualize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import os
import saber
import pickle
import numpy as np
import saber.data.mesh.io as meshio
import saber.utils.filesystem as fs
import saber.data.audio as saber_audio
from speech_anime import viewer
from speech_anime.datasets import vocaset
root = "assets/voca-sr8k/dgrad"
offs_root = "assets/voca-sr8k/offsets"
output_root = "assets/voca_videos/"
speakers = ["f0", "m0", "m1", "m2", "m3", "f1", "f2", "f3", "m4", "f4"]
sample_rate = 8000
def dump_video(template, dg_prefix, of_prefix, output_path):
if os.path.exists(output_path):
return
sources = []
if os.path.exists(dg_prefix + "_audio"):
dg_src = dict(
title="dg",
audio=None,
dgrad_3d=[],
tslist=[],
)
with open(dg_prefix + '_audio', "rb") as fp:
data = pickle.load(fp)
dg_src["audio"] = data["audio"]
start_ts = data["start_ts"]
npy_list = fs.find_files(dg_prefix, r"^\d+\.npy$", False, True)
for i, npy_file in enumerate(saber.log.tqdm(npy_list, leave=False)):
frame_id = int(os.path.splitext(os.path.basename(npy_file))[0])
dg_src["dgrad_3d"].append(np.load(npy_file))
dg_src["tslist"].append(float(frame_id * 1000.0) / 60.0 - start_ts)
sources.append(dg_src)
if os.path.exists(of_prefix + "_audio"):
offs_src = dict(
title="offsets",
audio=None,
verts_off_3d=[],
tslist=[],
)
with open(of_prefix + '_audio', "rb") as fp:
data = pickle.load(fp)
offs_src["audio"] = data["audio"]
start_ts = data["start_ts"]
npy_list = fs.find_files(of_prefix, r"\d+\.npy", False, True)
for i, npy_file in enumerate(saber.log.tqdm(npy_list, leave=False)):
frame_id = int(os.path.splitext(os.path.basename(npy_file))[0])
offs_src["verts_off_3d"].append(np.load(npy_file))
offs_src["tslist"].append(float(frame_id * 1000.0) / 60.0 - start_ts)
sources.append(offs_src)
if len(sources) == 0:
return
viewer.render_video(sources, 60, sample_rate, save_video=True, video_path=output_path, grid_w=780, grid_h=780)
for spk in speakers:
print(spk)
alias = vocaset.get_speaker_alias(spk)
template_path = os.path.join(vocaset.root, f"templates/{alias}.ply")
assert os.path.exists(template_path)
verts, faces = saber.mesh.read_mesh(template_path)
saber.mesh.write_obj(os.path.splitext(template_path)[0] + '.obj', verts, faces)
viewer.set_template_mesh(os.path.splitext(template_path)[0] + '.obj')
for sid in range(0, 40):
print(f"- render {sid+1}")
dg_prefix = os.path.join(root, "data", spk, "neutral", f"{sid:03d}")
of_prefix = os.path.join(offs_root, "data", spk, "neutral", f"{sid:03d}")
output_path = os.path.join(output_root, spk, f"sentence{sid+1:02d}.mp4")
os.makedirs(os.path.dirname(output_path), exist_ok=True)
dump_video(verts, dg_prefix, of_prefix, output_path)