Skip to content

Commit

Permalink
Merge pull request #150 from yfukai/data_conversion_apis
Browse files Browse the repository at this point in the history
Data conversion apis
  • Loading branch information
yfukai authored Aug 2, 2022
2 parents 682a1e5 + 59f76e8 commit f0e7b5e
Show file tree
Hide file tree
Showing 7 changed files with 241 additions and 6 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "laptrack"
version = "0.1.7-alpha.5"
version = "0.1.7-alpha.3"
description = "LapTrack"
authors = ["Yohsuke Fukai <[email protected]>"]
license = "BSD-3-Clause"
Expand Down
3 changes: 2 additions & 1 deletion src/laptrack/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@
__email__ = "[email protected]"

from ._tracking import laptrack, LapTrack, LapTrackMulti
from . import data_conversion

__all__ = ["laptrack", "LapTrack", "LapTrackMulti"]
__all__ = ["laptrack", "LapTrack", "LapTrackMulti", "data_conversion"]
File renamed without changes.
2 changes: 1 addition & 1 deletion src/laptrack/_cost_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
import numpy as np
from scipy.sparse import coo_matrix

from ._coo_matrix_builder import coo_matrix_builder
from ._typing_utils import Float
from ._typing_utils import Matrix
from ._utils import coo_matrix_builder


def build_frame_cost_matrix(
Expand Down
6 changes: 3 additions & 3 deletions src/laptrack/_tracking.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
from ._optimization import lap_optimization
from ._typing_utils import FloatArray
from ._typing_utils import Int
from ._utils import coo_matrix_builder
from ._coo_matrix_builder import coo_matrix_builder

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -284,10 +284,10 @@ def _link_frames(self, coords) -> nx.Graph:
"""Link particles between frames according to the cost function
Args:
coords (_type_): _description_
coords (List[np.ndarray]): the input coordinates
Returns:
nx.Graph: _description_
nx.Graph: the resulted tree
"""
# initialize tree
track_tree = nx.Graph()
Expand Down
146 changes: 146 additions & 0 deletions src/laptrack/data_conversion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
from typing import List
from typing import Tuple

import networkx as nx
import numpy as np
import pandas as pd

from ._typing_utils import Int
from ._typing_utils import NumArray

IntTuple = Tuple[Int, Int]


def convert_dataframe_to_coords(
df: pd.DataFrame,
coordinate_cols: List[str],
frame_col: str = "frame",
validate_frame: bool = True,
) -> List[NumArray]:
"""Convert a track dataframe to a list of coordinates for input.
Parameters
----------
df : pd.DataFrame
the input dataframe
coordinate_cols : List[str]
the list of columns to use for coordinates
frame_col : str, optional
The column name to use for the frame index. Defaults to "frame".
validate_frame : bool, optional
whether to validate the frame. Defaults to True.
Returns
-------
coords : List[np.ndarray]
the list of coordinates
"""

grps = list(df.groupby(frame_col, sort=True))
if validate_frame:
assert np.array_equal(np.arange(df[frame_col].max() + 1), [g[0] for g in grps])
coords = [grp[coordinate_cols].values for _frame, grp in grps]
return coords


def convert_tree_to_dataframe(
tree: nx.Graph,
) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
"""Convert the track tree to dataframes
Parameters
----------
tree : nx.Graph
The track tree, resulted from the traking
Returns
-------
df : pd.DataFrame
the track dataframe, with the following columns:
- "frame" : the frame index
- "index" : the coordinate index
- "track_id" : the track id
- "tree_id" : the tree id
split_df : pd.DataFrame
the splitting dataframe, with the following columns:
- "parent_track_id" : the track id of the parent
- "child_track_id" : the track id of the parent
merge_df : pd.DataFrame
the splitting dataframe, with the following columns:
- "parent_track_id" : the track id of the parent
- "child_track_id" : the track id of the parent
"""
df_data = []
node_values = np.array(list(tree.nodes))
frames = np.unique(node_values[:, 0])
for frame in frames:
indices = node_values[node_values[:, 0] == frame, 1]
df_data.append(
pd.DataFrame(
{
"frame": [frame] * len(indices),
"index": indices,
}
)
)
df = pd.concat(df_data).set_index(["frame", "index"])
connected_components = list(nx.connected_components(tree))
for track_id, nodes in enumerate(connected_components):
for (frame, index) in nodes:
df.loc[(frame, index), "tree_id"] = track_id
# tree.nodes[(frame, index)]["tree_id"] = track_id
tree2 = tree.copy()

splits: List[Tuple[IntTuple, List[IntTuple]]] = []
merges: List[Tuple[IntTuple, List[IntTuple]]] = []
for node in tree.nodes:
frame0, _index0 = node
neighbors = list(tree.neighbors(node))
children = [(frame, index) for (frame, index) in neighbors if frame > frame0]
parents = [(frame, index) for (frame, index) in neighbors if frame < frame0]
assert len(children) + len(parents) == len(neighbors)
if len(children) > 1:
for child in children:
if tree2.has_edge(node, child):
tree2.remove_edge(node, child)
if node not in [p[0] for p in splits]:
splits.append((node, children))
if len(parents) > 1:
for parent in parents:
if tree2.has_edge(node, parent):
tree2.remove_edge(node, parent)
if node not in [p[0] for p in merges]:
merges.append((node, parents))

connected_components = list(nx.connected_components(tree2))
for track_id, nodes in enumerate(connected_components):
for (frame, index) in nodes:
df.loc[(frame, index), "track_id"] = track_id
# tree.nodes[(frame, index)]["track_id"] = track_id

for k in ["tree_id", "track_id"]:
df[k] = df[k].astype(int)

split_df_data = []
for (node, children) in splits:
for child in children:
split_df_data.append(
{
"parent_track_id": df.loc[node, "track_id"],
"child_track_id": df.loc[child, "track_id"],
}
)
split_df = pd.DataFrame.from_records(split_df_data).astype(int)

merge_df_data = []
for (node, parents) in merges:
for parent in parents:
merge_df_data.append(
{
"parent_track_id": df.loc[parent, "track_id"],
"child_track_id": df.loc[node, "track_id"],
}
)
merge_df = pd.DataFrame.from_records(merge_df_data).astype(int)

return df, split_df, merge_df
88 changes: 88 additions & 0 deletions tests/test_data_conversion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import networkx as nx
import numpy as np
import pandas as pd

from laptrack import data_conversion


def test_convert_dataframe_to_coords():
df = pd.DataFrame(
{
"frame": [0, 0, 0, 1, 1, 2, 2, 2, 2, 2],
"x": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
"y": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
"z": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
}
)
coords_target = [
np.array([[0, 0], [1, 1], [2, 2]]),
np.array([[3, 3], [4, 4]]),
np.array([[5, 5], [6, 6], [7, 7], [8, 8], [9, 9]]),
]

coords = data_conversion.convert_dataframe_to_coords(df, ["x", "y"])
assert len(coords) == len(df["frame"].unique())
assert all([np.all(c1 == c2) for c1, c2 in zip(coords, coords_target)])


def test_convert_tree_to_dataframe():
tree = nx.from_edgelist(
[
((0, 0), (1, 0)),
((1, 0), (2, 0)),
((2, 0), (3, 0)),
((3, 0), (4, 0)),
((4, 0), (5, 0)),
((2, 0), (3, 1)),
((3, 1), (4, 1)),
((4, 1), (5, 1)),
((1, 2), (2, 2)),
((2, 2), (3, 2)),
((3, 2), (4, 2)),
((1, 3), (2, 2)),
]
)
segments = [
[(0, 0), (1, 0), (2, 0)],
[(3, 0), (4, 0), (5, 0)],
[(3, 1), (4, 1), (5, 1)],
[(1, 2)],
[(2, 2), (3, 2), (4, 2)],
[(1, 3)],
]
clones = [segments[:3], segments[3:]]
# 0-0-0-0-0-0
# |
# -1-1-1
# 2-2-2-2
# |
# 3-
df, split_df, merge_df = data_conversion.convert_tree_to_dataframe(tree)
len(set(df["track_id"])) == len(segments)
segment_ids = []
for segment in segments:
len(set(df.loc[segment, "track_id"])) == 1 # unique track id
segment_ids.append(df.loc[segment, "track_id"].iloc[0])
for clone in clones:
clone_all = sum(clone, [])
len(set(df.loc[clone_all, "tree_id"])) == 1 # unique track id

split_df_target = np.array(
[
[segment_ids[0], segment_ids[1]],
[segment_ids[0], segment_ids[2]],
]
)
assert np.all(
split_df[["parent_track_id", "child_track_id"]].values == split_df_target
)

merge_df_target = np.array(
[
[segment_ids[3], segment_ids[4]],
[segment_ids[5], segment_ids[4]],
]
)
assert np.all(
merge_df[["parent_track_id", "child_track_id"]].values == merge_df_target
)

0 comments on commit f0e7b5e

Please sign in to comment.