diff --git a/pyproject.toml b/pyproject.toml index 0485f4a7..cc776351 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "laptrack" -version = "0.1.7-alpha.5" +version = "0.1.7-alpha.3" description = "LapTrack" authors = ["Yohsuke Fukai "] license = "BSD-3-Clause" diff --git a/src/laptrack/__init__.py b/src/laptrack/__init__.py index cee91112..7e288852 100644 --- a/src/laptrack/__init__.py +++ b/src/laptrack/__init__.py @@ -4,5 +4,6 @@ __email__ = "ysk@yfukai.net" from ._tracking import laptrack, LapTrack, LapTrackMulti +from . import data_conversion -__all__ = ["laptrack", "LapTrack", "LapTrackMulti"] +__all__ = ["laptrack", "LapTrack", "LapTrackMulti", "data_conversion"] diff --git a/src/laptrack/_utils.py b/src/laptrack/_coo_matrix_builder.py similarity index 100% rename from src/laptrack/_utils.py rename to src/laptrack/_coo_matrix_builder.py diff --git a/src/laptrack/_cost_matrix.py b/src/laptrack/_cost_matrix.py index 448226da..8078c357 100644 --- a/src/laptrack/_cost_matrix.py +++ b/src/laptrack/_cost_matrix.py @@ -4,9 +4,9 @@ import numpy as np from scipy.sparse import coo_matrix +from ._coo_matrix_builder import coo_matrix_builder from ._typing_utils import Float from ._typing_utils import Matrix -from ._utils import coo_matrix_builder def build_frame_cost_matrix( diff --git a/src/laptrack/_tracking.py b/src/laptrack/_tracking.py index 5643931f..69f9c1a7 100644 --- a/src/laptrack/_tracking.py +++ b/src/laptrack/_tracking.py @@ -33,7 +33,7 @@ from ._optimization import lap_optimization from ._typing_utils import FloatArray from ._typing_utils import Int -from ._utils import coo_matrix_builder +from ._coo_matrix_builder import coo_matrix_builder logger = logging.getLogger(__name__) @@ -284,10 +284,10 @@ def _link_frames(self, coords) -> nx.Graph: """Link particles between frames according to the cost function Args: - coords (_type_): _description_ + coords (List[np.ndarray]): the input coordinates Returns: - nx.Graph: _description_ + nx.Graph: the resulted tree """ # initialize tree track_tree = nx.Graph() diff --git a/src/laptrack/data_conversion.py b/src/laptrack/data_conversion.py new file mode 100644 index 00000000..c08a352d --- /dev/null +++ b/src/laptrack/data_conversion.py @@ -0,0 +1,146 @@ +from typing import List +from typing import Tuple + +import networkx as nx +import numpy as np +import pandas as pd + +from ._typing_utils import Int +from ._typing_utils import NumArray + +IntTuple = Tuple[Int, Int] + + +def convert_dataframe_to_coords( + df: pd.DataFrame, + coordinate_cols: List[str], + frame_col: str = "frame", + validate_frame: bool = True, +) -> List[NumArray]: + """Convert a track dataframe to a list of coordinates for input. + + Parameters + ---------- + df : pd.DataFrame + the input dataframe + coordinate_cols : List[str] + the list of columns to use for coordinates + frame_col : str, optional + The column name to use for the frame index. Defaults to "frame". + validate_frame : bool, optional + whether to validate the frame. Defaults to True. + + Returns + ------- + coords : List[np.ndarray] + the list of coordinates + """ + + grps = list(df.groupby(frame_col, sort=True)) + if validate_frame: + assert np.array_equal(np.arange(df[frame_col].max() + 1), [g[0] for g in grps]) + coords = [grp[coordinate_cols].values for _frame, grp in grps] + return coords + + +def convert_tree_to_dataframe( + tree: nx.Graph, +) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: + """Convert the track tree to dataframes + + Parameters + ---------- + tree : nx.Graph + The track tree, resulted from the traking + + Returns + ------- + df : pd.DataFrame + the track dataframe, with the following columns: + - "frame" : the frame index + - "index" : the coordinate index + - "track_id" : the track id + - "tree_id" : the tree id + split_df : pd.DataFrame + the splitting dataframe, with the following columns: + - "parent_track_id" : the track id of the parent + - "child_track_id" : the track id of the parent + merge_df : pd.DataFrame + the splitting dataframe, with the following columns: + - "parent_track_id" : the track id of the parent + - "child_track_id" : the track id of the parent + """ + df_data = [] + node_values = np.array(list(tree.nodes)) + frames = np.unique(node_values[:, 0]) + for frame in frames: + indices = node_values[node_values[:, 0] == frame, 1] + df_data.append( + pd.DataFrame( + { + "frame": [frame] * len(indices), + "index": indices, + } + ) + ) + df = pd.concat(df_data).set_index(["frame", "index"]) + connected_components = list(nx.connected_components(tree)) + for track_id, nodes in enumerate(connected_components): + for (frame, index) in nodes: + df.loc[(frame, index), "tree_id"] = track_id + # tree.nodes[(frame, index)]["tree_id"] = track_id + tree2 = tree.copy() + + splits: List[Tuple[IntTuple, List[IntTuple]]] = [] + merges: List[Tuple[IntTuple, List[IntTuple]]] = [] + for node in tree.nodes: + frame0, _index0 = node + neighbors = list(tree.neighbors(node)) + children = [(frame, index) for (frame, index) in neighbors if frame > frame0] + parents = [(frame, index) for (frame, index) in neighbors if frame < frame0] + assert len(children) + len(parents) == len(neighbors) + if len(children) > 1: + for child in children: + if tree2.has_edge(node, child): + tree2.remove_edge(node, child) + if node not in [p[0] for p in splits]: + splits.append((node, children)) + if len(parents) > 1: + for parent in parents: + if tree2.has_edge(node, parent): + tree2.remove_edge(node, parent) + if node not in [p[0] for p in merges]: + merges.append((node, parents)) + + connected_components = list(nx.connected_components(tree2)) + for track_id, nodes in enumerate(connected_components): + for (frame, index) in nodes: + df.loc[(frame, index), "track_id"] = track_id + # tree.nodes[(frame, index)]["track_id"] = track_id + + for k in ["tree_id", "track_id"]: + df[k] = df[k].astype(int) + + split_df_data = [] + for (node, children) in splits: + for child in children: + split_df_data.append( + { + "parent_track_id": df.loc[node, "track_id"], + "child_track_id": df.loc[child, "track_id"], + } + ) + split_df = pd.DataFrame.from_records(split_df_data).astype(int) + + merge_df_data = [] + for (node, parents) in merges: + for parent in parents: + merge_df_data.append( + { + "parent_track_id": df.loc[parent, "track_id"], + "child_track_id": df.loc[node, "track_id"], + } + ) + merge_df = pd.DataFrame.from_records(merge_df_data).astype(int) + + return df, split_df, merge_df diff --git a/tests/test_data_conversion.py b/tests/test_data_conversion.py new file mode 100644 index 00000000..14748616 --- /dev/null +++ b/tests/test_data_conversion.py @@ -0,0 +1,88 @@ +import networkx as nx +import numpy as np +import pandas as pd + +from laptrack import data_conversion + + +def test_convert_dataframe_to_coords(): + df = pd.DataFrame( + { + "frame": [0, 0, 0, 1, 1, 2, 2, 2, 2, 2], + "x": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + "y": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + "z": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + } + ) + coords_target = [ + np.array([[0, 0], [1, 1], [2, 2]]), + np.array([[3, 3], [4, 4]]), + np.array([[5, 5], [6, 6], [7, 7], [8, 8], [9, 9]]), + ] + + coords = data_conversion.convert_dataframe_to_coords(df, ["x", "y"]) + assert len(coords) == len(df["frame"].unique()) + assert all([np.all(c1 == c2) for c1, c2 in zip(coords, coords_target)]) + + +def test_convert_tree_to_dataframe(): + tree = nx.from_edgelist( + [ + ((0, 0), (1, 0)), + ((1, 0), (2, 0)), + ((2, 0), (3, 0)), + ((3, 0), (4, 0)), + ((4, 0), (5, 0)), + ((2, 0), (3, 1)), + ((3, 1), (4, 1)), + ((4, 1), (5, 1)), + ((1, 2), (2, 2)), + ((2, 2), (3, 2)), + ((3, 2), (4, 2)), + ((1, 3), (2, 2)), + ] + ) + segments = [ + [(0, 0), (1, 0), (2, 0)], + [(3, 0), (4, 0), (5, 0)], + [(3, 1), (4, 1), (5, 1)], + [(1, 2)], + [(2, 2), (3, 2), (4, 2)], + [(1, 3)], + ] + clones = [segments[:3], segments[3:]] + # 0-0-0-0-0-0 + # | + # -1-1-1 + # 2-2-2-2 + # | + # 3- + df, split_df, merge_df = data_conversion.convert_tree_to_dataframe(tree) + len(set(df["track_id"])) == len(segments) + segment_ids = [] + for segment in segments: + len(set(df.loc[segment, "track_id"])) == 1 # unique track id + segment_ids.append(df.loc[segment, "track_id"].iloc[0]) + for clone in clones: + clone_all = sum(clone, []) + len(set(df.loc[clone_all, "tree_id"])) == 1 # unique track id + + split_df_target = np.array( + [ + [segment_ids[0], segment_ids[1]], + [segment_ids[0], segment_ids[2]], + ] + ) + assert np.all( + split_df[["parent_track_id", "child_track_id"]].values == split_df_target + ) + + merge_df_target = np.array( + [ + [segment_ids[3], segment_ids[4]], + [segment_ids[5], segment_ids[4]], + ] + ) + assert np.all( + merge_df[["parent_track_id", "child_track_id"]].values == merge_df_target + )