Merge pull request #150 from yfukai/data_conversion_apis

Data conversion apis
yfukai · Aug 2, 2022 · f0e7b5e · f0e7b5e
2 parents 682a1e5 + 59f76e8
commit f0e7b5e
Show file tree

Hide file tree

Showing 7 changed files with 241 additions and 6 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "laptrack"
-version = "0.1.7-alpha.5"
+version = "0.1.7-alpha.3"
 description = "LapTrack"
 authors = ["Yohsuke Fukai <[email protected]>"]
 license = "BSD-3-Clause"

diff --git a/src/laptrack/__init__.py b/src/laptrack/__init__.py
@@ -4,5 +4,6 @@
 __email__ = "[email protected]"
 
 from ._tracking import laptrack, LapTrack, LapTrackMulti
+from . import data_conversion
 
-__all__ = ["laptrack", "LapTrack", "LapTrackMulti"]
+__all__ = ["laptrack", "LapTrack", "LapTrackMulti", "data_conversion"]
diff --git a/src/laptrack/_utils.py → src/laptrack/_coo_matrix_builder.py b/src/laptrack/_utils.py → src/laptrack/_coo_matrix_builder.py
diff --git a/src/laptrack/_cost_matrix.py b/src/laptrack/_cost_matrix.py
@@ -4,9 +4,9 @@
 import numpy as np
 from scipy.sparse import coo_matrix
 
+from ._coo_matrix_builder import coo_matrix_builder
 from ._typing_utils import Float
 from ._typing_utils import Matrix
-from ._utils import coo_matrix_builder
 
 
 def build_frame_cost_matrix(

diff --git a/src/laptrack/_tracking.py b/src/laptrack/_tracking.py
@@ -33,7 +33,7 @@
 from ._optimization import lap_optimization
 from ._typing_utils import FloatArray
 from ._typing_utils import Int
-from ._utils import coo_matrix_builder
+from ._coo_matrix_builder import coo_matrix_builder
 
 logger = logging.getLogger(__name__)
 
@@ -284,10 +284,10 @@ def _link_frames(self, coords) -> nx.Graph:
         """Link particles between frames according to the cost function
 
         Args:
-            coords (_type_): _description_
+            coords (List[np.ndarray]): the input coordinates
 
         Returns:
-            nx.Graph: _description_
+            nx.Graph: the resulted tree
         """
         # initialize tree
         track_tree = nx.Graph()

diff --git a/src/laptrack/data_conversion.py b/src/laptrack/data_conversion.py
@@ -0,0 +1,146 @@
+from typing import List
+from typing import Tuple
+
+import networkx as nx
+import numpy as np
+import pandas as pd
+
+from ._typing_utils import Int
+from ._typing_utils import NumArray
+
+IntTuple = Tuple[Int, Int]
+
+
+def convert_dataframe_to_coords(
+    df: pd.DataFrame,
+    coordinate_cols: List[str],
+    frame_col: str = "frame",
+    validate_frame: bool = True,
+) -> List[NumArray]:
+    """Convert a track dataframe to a list of coordinates for input.
+
+    Parameters
+    ----------
+    df : pd.DataFrame
+        the input dataframe
+    coordinate_cols : List[str]
+        the list of columns to use for coordinates
+    frame_col : str, optional
+        The column name to use for the frame index. Defaults to "frame".
+    validate_frame : bool, optional
+        whether to validate the frame. Defaults to True.
+
+    Returns
+    -------
+    coords : List[np.ndarray]
+        the list of coordinates
+    """
+
+    grps = list(df.groupby(frame_col, sort=True))
+    if validate_frame:
+        assert np.array_equal(np.arange(df[frame_col].max() + 1), [g[0] for g in grps])
+    coords = [grp[coordinate_cols].values for _frame, grp in grps]
+    return coords
+
+
+def convert_tree_to_dataframe(
+    tree: nx.Graph,
+) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
+    """Convert the track tree to dataframes
+
+    Parameters
+    ----------
+    tree : nx.Graph
+        The track tree, resulted from the traking
+
+    Returns
+    -------
+    df : pd.DataFrame
+        the track dataframe, with the following columns:
+        - "frame" : the frame index
+        - "index" : the coordinate index
+        - "track_id" : the track id
+        - "tree_id" : the tree id
+    split_df : pd.DataFrame
+        the splitting dataframe, with the following columns:
+        - "parent_track_id" : the track id of the parent
+        - "child_track_id" : the track id of the parent
+    merge_df : pd.DataFrame
+        the splitting dataframe, with the following columns:
+        - "parent_track_id" : the track id of the parent
+        - "child_track_id" : the track id of the parent
+    """
+    df_data = []
+    node_values = np.array(list(tree.nodes))
+    frames = np.unique(node_values[:, 0])
+    for frame in frames:
+        indices = node_values[node_values[:, 0] == frame, 1]
+        df_data.append(
+            pd.DataFrame(
+                {
+                    "frame": [frame] * len(indices),
+                    "index": indices,
+                }
+            )
+        )
+    df = pd.concat(df_data).set_index(["frame", "index"])
+    connected_components = list(nx.connected_components(tree))
+    for track_id, nodes in enumerate(connected_components):
+        for (frame, index) in nodes:
+            df.loc[(frame, index), "tree_id"] = track_id
+    #            tree.nodes[(frame, index)]["tree_id"] = track_id
+    tree2 = tree.copy()
+
+    splits: List[Tuple[IntTuple, List[IntTuple]]] = []
+    merges: List[Tuple[IntTuple, List[IntTuple]]] = []
+    for node in tree.nodes:
+        frame0, _index0 = node
+        neighbors = list(tree.neighbors(node))
+        children = [(frame, index) for (frame, index) in neighbors if frame > frame0]
+        parents = [(frame, index) for (frame, index) in neighbors if frame < frame0]
+        assert len(children) + len(parents) == len(neighbors)
+        if len(children) > 1:
+            for child in children:
+                if tree2.has_edge(node, child):
+                    tree2.remove_edge(node, child)
+            if node not in [p[0] for p in splits]:
+                splits.append((node, children))
+        if len(parents) > 1:
+            for parent in parents:
+                if tree2.has_edge(node, parent):
+                    tree2.remove_edge(node, parent)
+            if node not in [p[0] for p in merges]:
+                merges.append((node, parents))
+
+    connected_components = list(nx.connected_components(tree2))
+    for track_id, nodes in enumerate(connected_components):
+        for (frame, index) in nodes:
+            df.loc[(frame, index), "track_id"] = track_id
+    #            tree.nodes[(frame, index)]["track_id"] = track_id
+
+    for k in ["tree_id", "track_id"]:
+        df[k] = df[k].astype(int)
+
+    split_df_data = []
+    for (node, children) in splits:
+        for child in children:
+            split_df_data.append(
+                {
+                    "parent_track_id": df.loc[node, "track_id"],
+                    "child_track_id": df.loc[child, "track_id"],
+                }
+            )
+    split_df = pd.DataFrame.from_records(split_df_data).astype(int)
+
+    merge_df_data = []
+    for (node, parents) in merges:
+        for parent in parents:
+            merge_df_data.append(
+                {
+                    "parent_track_id": df.loc[parent, "track_id"],
+                    "child_track_id": df.loc[node, "track_id"],
+                }
+            )
+    merge_df = pd.DataFrame.from_records(merge_df_data).astype(int)
+
+    return df, split_df, merge_df
diff --git a/tests/test_data_conversion.py b/tests/test_data_conversion.py
@@ -0,0 +1,88 @@
+import networkx as nx
+import numpy as np
+import pandas as pd
+
+from laptrack import data_conversion
+
+
+def test_convert_dataframe_to_coords():
+    df = pd.DataFrame(
+        {
+            "frame": [0, 0, 0, 1, 1, 2, 2, 2, 2, 2],
+            "x": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
+            "y": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
+            "z": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
+        }
+    )
+    coords_target = [
+        np.array([[0, 0], [1, 1], [2, 2]]),
+        np.array([[3, 3], [4, 4]]),
+        np.array([[5, 5], [6, 6], [7, 7], [8, 8], [9, 9]]),
+    ]
+
+    coords = data_conversion.convert_dataframe_to_coords(df, ["x", "y"])
+    assert len(coords) == len(df["frame"].unique())
+    assert all([np.all(c1 == c2) for c1, c2 in zip(coords, coords_target)])
+
+
+def test_convert_tree_to_dataframe():
+    tree = nx.from_edgelist(
+        [
+            ((0, 0), (1, 0)),
+            ((1, 0), (2, 0)),
+            ((2, 0), (3, 0)),
+            ((3, 0), (4, 0)),
+            ((4, 0), (5, 0)),
+            ((2, 0), (3, 1)),
+            ((3, 1), (4, 1)),
+            ((4, 1), (5, 1)),
+            ((1, 2), (2, 2)),
+            ((2, 2), (3, 2)),
+            ((3, 2), (4, 2)),
+            ((1, 3), (2, 2)),
+        ]
+    )
+    segments = [
+        [(0, 0), (1, 0), (2, 0)],
+        [(3, 0), (4, 0), (5, 0)],
+        [(3, 1), (4, 1), (5, 1)],
+        [(1, 2)],
+        [(2, 2), (3, 2), (4, 2)],
+        [(1, 3)],
+    ]
+    clones = [segments[:3], segments[3:]]
+    # 0-0-0-0-0-0
+    #      |
+    #      -1-1-1
+    #   2-2-2-2
+    #    |
+    #   3-
+    df, split_df, merge_df = data_conversion.convert_tree_to_dataframe(tree)
+    len(set(df["track_id"])) == len(segments)
+    segment_ids = []
+    for segment in segments:
+        len(set(df.loc[segment, "track_id"])) == 1  # unique track id
+        segment_ids.append(df.loc[segment, "track_id"].iloc[0])
+    for clone in clones:
+        clone_all = sum(clone, [])
+        len(set(df.loc[clone_all, "tree_id"])) == 1  # unique track id
+
+    split_df_target = np.array(
+        [
+            [segment_ids[0], segment_ids[1]],
+            [segment_ids[0], segment_ids[2]],
+        ]
+    )
+    assert np.all(
+        split_df[["parent_track_id", "child_track_id"]].values == split_df_target
+    )
+
+    merge_df_target = np.array(
+        [
+            [segment_ids[3], segment_ids[4]],
+            [segment_ids[5], segment_ids[4]],
+        ]
+    )
+    assert np.all(
+        merge_df[["parent_track_id", "child_track_id"]].values == merge_df_target
+    )