Merge branch 'release/4.4'

epratheeban · Mar 9, 2022 · ffbbb37 · ffbbb37
2 parents 424b479 + 94b5151
commit ffbbb37
Show file tree

Hide file tree

Showing 9 changed files with 166 additions and 64 deletions.
diff --git a/.gitignore b/.gitignore
@@ -58,4 +58,7 @@ doc/.ipynb_checkpoints
 # emacs temporary files
 *~
 
+# PyCharm
+.idea/
+
 .mypy_cache/
diff --git a/doc/source/changelog.rst b/doc/source/changelog.rst
@@ -2,6 +2,16 @@
 Changelog
 #########
 
+Version 4.4 (2022-03-09)
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+- BREAKING: remove empty segments in Timeline.__init__
+- BREAKING: Timeline.extent() returns Segment(0.0, 0.0) for empty timelines
+- feat: add "duration" option to Annotation.discretize
+- fix: handle various corner cases in 1D pdist and cdist
+- fix: fix documentation of {Timeline | Annotation}.__bool__
+- test: check robustness to Segment.set_precision
+
 Version 4.3 (2021-10-11)
 ~~~~~~~~~~~~~~~~~~~~~~~~
 

diff --git a/pyannote/core/annotation.py b/pyannote/core/annotation.py
@@ -244,9 +244,9 @@ def __bool__(self):
         """Emptiness
 
         >>> if annotation:
-        ...    # annotation is empty
-        ... else:
         ...    # annotation is not empty
+        ... else:
+        ...    # annotation is empty
         """
         return len(self._tracks) > 0
 
@@ -1375,9 +1375,10 @@ def __mul__(self, other: "Annotation") -> np.ndarray:
 
     def discretize(
         self,
-        support: Segment = None,
-        resolution: Union[float, SlidingWindow] = 0.1,
-        labels: List[Hashable] = None,
+        support: Optional[Segment] = None,
+        resolution: Union[float, SlidingWindow] = 0.01,
+        labels: Optional[List[Hashable]] = None,
+        duration: Optional[float] = None,
     ):
         """Discretize
         
@@ -1390,6 +1391,10 @@ def discretize(
             Defaults to 10ms frames.
         labels : list of labels, optional
             Defaults to self.labels()
+        duration : float, optional
+            Overrides support duration and ensures that the number of
+            returned frames is fixed (which might otherwise not be the case
+            because of rounding errors).
 
         Returns
         -------
@@ -1416,8 +1421,11 @@ def discretize(
             )
 
         start_frame = resolution.closest_frame(start_time)
-        end_frame = resolution.closest_frame(end_time)
-        num_frames = end_frame - start_frame
+        if duration is None:
+            end_frame = resolution.closest_frame(end_time)
+            num_frames = end_frame - start_frame
+        else:
+            num_frames = int(round(duration / resolution.step))
 
         data = np.zeros((num_frames, len(labels)), dtype=np.uint8)
         for k, label in enumerate(labels):

diff --git a/pyannote/core/timeline.py b/pyannote/core/timeline.py
@@ -146,11 +146,9 @@ def __init__(self,
         if segments is None:
             segments = ()
 
-        # set of segments  (used for checking inclusion)
-        segments_set = set(segments)
-
-        if any(not segment for segment in segments_set):
-            raise ValueError('Segments must not be empty.')
+        # set of segments (used for checking inclusion)
+        # Store only non-empty Segments.
+        segments_set = set([segment for segment in segments if segment])
 
         self.segments_set_ = segments_set
 
@@ -179,9 +177,9 @@ def __bool__(self):
         """Emptiness
 
         >>> if timeline:
-        ...    # timeline is empty
-        ... else:
         ...    # timeline is not empty
+        ... else:
+        ...    # timeline is empty
         """
         return len(self.segments_set_) > 0
 
@@ -796,11 +794,10 @@ def extent(self) -> Segment:
             start = segments_boundaries_[0]
             end = segments_boundaries_[-1]
             return Segment(start=start, end=end)
-        else:
-            import numpy as np
-            return Segment(start=np.inf, end=-np.inf)
 
-    def support_iter(self, collar: float = 0.) -> Iterator[Segment]:
+        return Segment(start=0.0, end=0.0)
+
+    def support_iter(self, collar: float = 0.0) -> Iterator[Segment]:
         """Like `support` but returns a segment generator instead
 
         See also

diff --git a/pyannote/core/utils/distance.py b/pyannote/core/utils/distance.py
@@ -47,11 +47,11 @@ def l2_normalize(X: np.ndarray):
     """
 
     norm = np.sqrt(np.sum(X ** 2, axis=1))
-    norm[norm == 0] = 1.
+    norm[norm == 0] = 1.0
     return (X.T / norm).T
 
 
-def dist_range(metric='euclidean', normalize=False):
+def dist_range(metric="euclidean", normalize=False):
     """Return range of possible distance between two vectors
 
     Parameters
@@ -67,42 +67,44 @@ def dist_range(metric='euclidean', normalize=False):
         Range of possible distance.
     """
 
-    if metric == 'euclidean':
+    if metric == "euclidean":
         if normalize:
-            return (0., 2.)
-        return (0., np.inf)
+            return (0.0, 2.0)
+        return (0.0, np.inf)
 
-    if metric == 'sqeuclidean':
+    if metric == "sqeuclidean":
         if normalize:
-            return (0., 4.)
-        return (0., np.inf)
+            return (0.0, 4.0)
+        return (0.0, np.inf)
 
-    if metric == 'cosine':
-        return (0., 2.)
+    if metric == "cosine":
+        return (0.0, 2.0)
 
-    if metric == 'angular':
-        return (0., np.pi)
+    if metric == "angular":
+        return (0.0, np.pi)
 
-    msg = f'dist_range does not support {metric} metric.'
+    msg = f"dist_range does not support {metric} metric."
     raise NotImplementedError(msg)
 
 
 def _pdist_func_1D(X, func):
     """Helper function for pdist"""
 
-    X = X.squeeze()
-    n_items, = X.shape
+    (n_items,) = X.shape
+
+    if n_items < 2:
+        return np.array([])
 
     distances = []
 
     for i in range(n_items - 1):
-        distance = func(X[i], X[i+1:])
+        distance = func(X[i], X[i + 1 :])
         distances.append(distance)
 
     return np.hstack(distances)
 
 
-def pdist(fX, metric='euclidean', **kwargs):
+def pdist(fX, metric="euclidean", **kwargs):
     """Same as scipy.spatial.distance with support for additional metrics
 
     * 'angular': pairwise angular distance
@@ -112,35 +114,36 @@ def pdist(fX, metric='euclidean', **kwargs):
     * 'average': pairwise average (only for 1-dimensional fX)
     """
 
-    if metric == 'angular':
-        cosine = scipy.spatial.distance.pdist(
-            fX, metric='cosine', **kwargs)
+    if metric == "angular":
+        cosine = scipy.spatial.distance.pdist(fX, metric="cosine", **kwargs)
         return np.arccos(np.clip(1.0 - cosine, -1.0, 1.0))
 
-    elif metric == 'equal':
+    elif metric == "equal":
+        assert fX.ndim == 1, f"'{metric}' metric only supports 1-dimensional fX."
         return _pdist_func_1D(fX, lambda x, X: x == X)
 
-    elif metric == 'minimum':
+    elif metric == "minimum":
+        assert fX.ndim == 1, f"'{metric}' metric only supports 1-dimensional fX."
         return _pdist_func_1D(fX, np.minimum)
 
-    elif metric == 'maximum':
+    elif metric == "maximum":
+        assert fX.ndim == 1, f"'{metric}' metric only supports 1-dimensional fX."
         return _pdist_func_1D(fX, np.maximum)
 
-    elif metric == 'average':
-        return _pdist_func_1D(fX, lambda x, X: .5 * (x + X))
+    elif metric == "average":
+        assert fX.ndim == 1, f"'{metric}' metric only supports 1-dimensional fX."
+        return _pdist_func_1D(fX, lambda x, X: 0.5 * (x + X))
 
     else:
         return scipy.spatial.distance.pdist(fX, metric=metric, **kwargs)
 
 
 def _cdist_func_1D(X_trn, X_tst, func):
     """Helper function for cdist"""
-    X_trn = X_trn.squeeze()
-    X_tst = X_tst.squeeze()
     return np.vstack(func(x_trn, X_tst) for x_trn in iter(X_trn))
 
 
-def cdist(fX_trn, fX_tst, metric='euclidean', **kwargs):
+def cdist(fX_trn, fX_tst, metric="euclidean", **kwargs):
     """Same as scipy.spatial.distance.cdist with support for additional metrics
 
     * 'angular': pairwise angular distance
@@ -150,28 +153,38 @@ def cdist(fX_trn, fX_tst, metric='euclidean', **kwargs):
     * 'average': pairwise average (only for 1-dimensional fX)
     """
 
-    if metric == 'angular':
-        cosine = scipy.spatial.distance.cdist(
-            fX_trn, fX_tst, metric='cosine', **kwargs)
+    if metric == "angular":
+        cosine = scipy.spatial.distance.cdist(fX_trn, fX_tst, metric="cosine", **kwargs)
         return np.arccos(np.clip(1.0 - cosine, -1.0, 1.0))
 
-    elif metric == 'equal':
-        return _cdist_func_1D(fX_trn, fX_tst,
-                              lambda x_trn, X_tst: x_trn == X_tst)
+    elif metric == "equal":
+        assert (
+            fX_trn.ndim == 1 and fX_tst.ndim == 1
+        ), f"'{metric}' metric only supports 1-dimensional fX_trn and fX_tst."
+        return _cdist_func_1D(fX_trn, fX_tst, lambda x_trn, X_tst: x_trn == X_tst)
 
-    elif metric == 'minimum':
+    elif metric == "minimum":
+        assert (
+            fX_trn.ndim == 1 and fX_tst.ndim == 1
+        ), f"'{metric}' metric only supports 1-dimensional fX_trn and fX_tst."
         return _cdist_func_1D(fX_trn, fX_tst, np.minimum)
 
-    elif metric == 'maximum':
+    elif metric == "maximum":
+        assert (
+            fX_trn.ndim == 1 and fX_tst.ndim == 1
+        ), f"'{metric}' metric only supports 1-dimensional fX_trn and fX_tst."
         return _cdist_func_1D(fX_trn, fX_tst, np.maximum)
 
-    elif metric == 'average':
-        return _cdist_func_1D(fX_trn, fX_tst,
-                              lambda x_trn, X_tst: .5 * (x_trn + X_tst))
+    elif metric == "average":
+        assert (
+            fX_trn.ndim == 1 and fX_tst.ndim == 1
+        ), f"'{metric}' metric only supports 1-dimensional fX_trn and fX_tst."
+        return _cdist_func_1D(
+            fX_trn, fX_tst, lambda x_trn, X_tst: 0.5 * (x_trn + X_tst)
+        )
 
     else:
-        return scipy.spatial.distance.cdist(
-            fX_trn, fX_tst, metric=metric, **kwargs)
+        return scipy.spatial.distance.cdist(fX_trn, fX_tst, metric=metric, **kwargs)
 
 
 def to_condensed(n, i, j):
@@ -200,7 +213,7 @@ def to_condensed(n, i, j):
     """
     i, j = np.array(i), np.array(j)
     if np.any(i == j):
-        raise ValueError('i and j should be different.')
+        raise ValueError("i and j should be different.")
     i, j = np.minimum(i, j), np.maximum(i, j)
     return np.int64(i * n - i * i / 2 - 3 * i / 2 + j - 1)
 
@@ -222,6 +235,6 @@ def to_squared(n, k):
 
     """
     k = np.array(k)
-    i = np.int64(n - np.sqrt(-8*k + 4*n**2 - 4*n + 1)/2 - 1/2)
-    j = np.int64(i**2/2 - i*n + 3*i/2 + k + 1)
+    i = np.int64(n - np.sqrt(-8 * k + 4 * n ** 2 - 4 * n + 1) / 2 - 1 / 2)
+    j = np.int64(i ** 2 / 2 - i * n + 3 * i / 2 + k + 1)
     return i, j
diff --git a/tests/__init__.py b/tests/__init__.py
diff --git a/tests/test_segment.py b/tests/test_segment.py
@@ -1,4 +1,5 @@
 from pyannote.core import Segment
+from tests.utils import preserve_segment_state
 
 
 def test_creation():
@@ -46,7 +47,9 @@ def test_other_operation():
     assert segment ^ other_segment == Segment(9, 14)
 
 
+@preserve_segment_state
 def test_segment_precision_mode():
-    assert not Segment(90/1000, 90/1000+240/1000) == Segment(90/1000, 330/1000)
+    Segment.set_precision(None)
+    assert not Segment(90 / 1000, 90 / 1000 + 240 / 1000) == Segment(90 / 1000, 330 / 1000)
     Segment.set_precision(4)
-    assert Segment(90/1000, 90/1000+240/1000) == Segment(90/1000, 330/1000)
+    assert Segment(90 / 1000, 90 / 1000 + 240 / 1000) == Segment(90 / 1000, 330 / 1000)
diff --git a/tests/test_timeline.py b/tests/test_timeline.py
@@ -31,6 +31,8 @@
 from pyannote.core import Annotation
 from pyannote.core import Segment
 from pyannote.core import Timeline
+from pyannote.core import segment
+from tests.utils import preserve_segment_state
 
 
 @pytest.fixture
@@ -118,6 +120,14 @@ def test_gaps(timeline):
                                      Segment(8, 8.5)]
 
 
+@preserve_segment_state
+def test_empty_gaps():
+    empty_timeline = Timeline(uri='MyEmptyGaps')
+    assert list(empty_timeline.gaps()) == []
+    Segment.set_precision(3)
+    assert list(empty_timeline.gaps()) == []
+
+
 def test_crop(timeline):
     selection = Segment(3, 7)
 
@@ -223,3 +233,43 @@ def test_extrude():
     expected_answer.add(Segment(6, 7))
 
     assert timeline.extrude(removed, mode='loose') == expected_answer
+
+def test_initialized_with_empty_segments():
+  # The first timeline includes empty segments.
+  first_timeline = Timeline([Segment(1, 5), Segment(6, 6), Segment(7, 7), Segment(8, 10)])
+
+  # The second has no empty segments.
+  second_timeline = Timeline([Segment(1, 5), Segment(8, 10)])
+
+  assert first_timeline == second_timeline
+
+
+def test_added_empty_segments():
+  # The first timeline includes empty segments.
+  first_timeline = Timeline()
+  first_timeline.add(Segment(1, 5))
+  first_timeline.add(Segment(6, 6))
+  first_timeline.add(Segment(7, 7))
+  first_timeline.add(Segment(8, 10))
+
+  # The second has no empty segments.
+  second_timeline = Timeline()
+  second_timeline.add(Segment(1, 5))
+  second_timeline.add(Segment(8, 10))
+
+  assert first_timeline == second_timeline
+
+
+def test_consistent_timelines_with_empty_segments():
+  # The first timeline is initialized with Segments, some empty.
+  first_timeline = Timeline([Segment(1, 5), Segment(6, 6), Segment(7, 7), Segment(8, 10)])
+
+  # The second timeline adds one Segment at a time, including empty ones.
+  second_timeline = Timeline()
+  second_timeline.add(Segment(1, 5))
+  second_timeline.add(Segment(6, 6))
+  second_timeline.add(Segment(7, 7))
+  second_timeline.add(Segment(8, 10))
+
+  assert first_timeline == second_timeline
+