Merge pull request #96 from hyama5/feature/fix_mlpg

Fix MLPG
r9y9 · Feb 28, 2020 · d434de8 · d434de8
2 parents a011f55 + e63c6f9
commit d434de8
Show file tree

Hide file tree

Showing 4 changed files with 53 additions and 5 deletions.
diff --git a/docs/changelog.rst b/docs/changelog.rst
@@ -1,8 +1,13 @@
 Change log
 ==========
 
-v0.0.20 <2020-xx-xx>
+v0.0.21 <2020-xx-xx>
+--------------------
+- `#95`_: Fixed parameter generation error of MLPG.
+
 
+v0.0.20 <2020-xx-xx>
+--------------------
 - `#90`_: Expand available speakers list for CMU Arctic dataset
 - `#91`_: Add support for JVS dataset: a free Japanese multi-speaker voice corpus.
 

diff --git a/nnmnkwii/paramgen/_mlpg.py b/nnmnkwii/paramgen/_mlpg.py
@@ -176,6 +176,8 @@ def mlpg(mean_frames, variance_frames, windows):
     num_windows = len(windows)
     win_mats = build_win_mats(windows, T)
 
+    max_win_width = np.max([max(win_mat.l, win_mat.u) for win_mat in win_mats])
+
     # workspaces; those will be updated in the following generation loop
     means = np.zeros((T, num_windows))
     precisions = np.zeros((T, num_windows))
@@ -188,6 +190,11 @@ def mlpg(mean_frames, variance_frames, windows):
             precisions[:, win_idx] = 1 / \
                 variance_frames[:, win_idx * static_dim + d]
 
+            # use zero precisions at edge frames for dynamic features
+            if win_idx != 0:
+                precisions[:max_win_width, win_idx] = 0
+                precisions[-max_win_width:, win_idx] = 0
+
         bs = precisions * means
         b, P = build_poe(bs, precisions, win_mats)
         y[:, d] = bla.solveh(P, b)
@@ -239,6 +246,8 @@ def mlpg_grad(mean_frames, variance_frames, windows, grad_output):
     win_mats = build_win_mats(windows, T)
     static_dim = D // len(windows)
 
+    max_win_width = np.max([max(win_mat.l, win_mat.u) for win_mat in win_mats])
+
     grads = np.zeros((T, D), dtype=np.float32)
     for d in range(static_dim):
         sdw = max([win_mat.l + win_mat.u for win_mat in win_mats])
@@ -253,6 +262,11 @@ def mlpg_grad(mean_frames, variance_frames, windows, grad_output):
             precisions[win_idx] = 1 / \
                 variance_frames[:, win_idx * static_dim + d]
 
+            # use zero precisions at edge frames for dynamic features
+            if win_idx != 0:
+                precisions[win_idx, :max_win_width] = 0
+                precisions[win_idx, -max_win_width:] = 0
+
             bm.dot_mm_plus_equals(win_mat.T, win_mat,
                                   target_bm=R, diag=precisions[win_idx])
 
@@ -334,14 +348,31 @@ def unit_variance_mlpg_matrix(windows, T):
     """
     win_mats = build_win_mats(windows, T)
     sdw = np.max([win_mat.l + win_mat.u for win_mat in win_mats])
+    max_win_width = np.max([max(win_mat.l, win_mat.u) for win_mat in win_mats])
 
     P = bm.zeros(sdw, sdw, T)
+
+    # set edge precitions to zero
+    precisions = bm.zeros(0, 0, T)
+    precisions.data[:, max_win_width:-max_win_width] += 1.0
+
+    mod_win_mats = []
     for win_index, win_mat in enumerate(win_mats):
-        bm.dot_mm_plus_equals(win_mat.T, win_mat, target_bm=P)
+        if win_index != 0:
+            # use zero precisions for dynamic features
+            mod_win_mat = bm.dot_mm(precisions, win_mat)
+            bm.dot_mm_plus_equals(mod_win_mat.T, win_mat, target_bm=P)
+
+            mod_win_mats.append(mod_win_mat)
+        else:
+            # static features
+            bm.dot_mm_plus_equals(win_mat.T, win_mat, target_bm=P)
+            mod_win_mats.append(win_mat)
+
     chol_bm = bla.cholesky(P, lower=True)
     Pinv = cholesky_inv_banded(chol_bm.full(), width=chol_bm.l + chol_bm.u + 1)
 
-    cocatenated_window = full_window_mat(win_mats, T)
+    cocatenated_window = full_window_mat(mod_win_mats, T)
     return Pinv.dot(cocatenated_window.T).astype(np.float32)
 
 

diff --git a/tests/test_autograd.py b/tests/test_autograd.py
@@ -34,6 +34,12 @@ def _get_windows_set():
             (1, 1, np.array([-0.5, 0.0, 0.5])),
             (1, 1, np.array([1.0, -2.0, 1.0])),
         ],
+        # Static + delta + deltadelta (wide window)
+        [
+            (0, 0, np.array([1.0])),
+            (2, 2, np.array([1.0, -8.0, 0.0, 8.0, -1.0]) / 12.0),
+            (2, 2, np.array([-1.0, 16.0, -30.0, 16.0, -1.0]) / 12.0),
+        ]
     ]
     return windows_set
 
@@ -161,7 +167,7 @@ def test_minibatch_unit_variance_mlpg_gradcheck():
         for i in range(batch_size):
             grad1 = reshaped_means.grad.data.numpy()
             grad2 = reshaped_means_expanded.grad[i].data.numpy()
-            assert np.allclose(grad1, grad2, atol=1.05e-08)
+            assert np.allclose(grad1, grad2, atol=1.00e-06)
 
         # Case 3: 2d with non-reshaped input
         y_hat3 = AF.unit_variance_mlpg(R, means)
@@ -180,7 +186,7 @@ def test_minibatch_unit_variance_mlpg_gradcheck():
         for i in range(batch_size):
             grad1 = means.grad.data.numpy()
             grad2 = means_expanded.grad[i].data.numpy()
-            assert np.allclose(grad1, grad2, atol=1.05e-08)
+            assert np.allclose(grad1, grad2, atol=1.00e-06)
 
 
 def test_mlpg_gradcheck():

diff --git a/tests/test_paramgen.py b/tests/test_paramgen.py
@@ -22,6 +22,12 @@ def _get_windows_set():
             (1, 1, np.array([-0.5, 0.0, 0.5])),
             (1, 1, np.array([1.0, -2.0, 1.0])),
         ],
+        # Static + delta + deltadelta (wide window)
+        [
+            (0, 0, np.array([1.0])),
+            (2, 2, np.array([1.0, -8.0, 0.0, 8.0, -1.0]) / 12.0),
+            (2, 2, np.array([-1.0, 16.0, -30.0, 16.0, -1.0]) / 12.0),
+        ]
     ]
     return windows_set