From 40b76a633430e7e3eb6109a48b37ba598f984a0b Mon Sep 17 00:00:00 2001 From: Ben Lansdell Date: Thu, 1 Feb 2024 21:35:36 -0700 Subject: [PATCH] Add bfill to all diff and shift to avoid creation of nans in first few rows. Closes #11 --- ethome/features/generic_features.py | 12 +++++------ ethome/features/mars_features.py | 32 ++++++++++++++--------------- tests/test_analysis.py | 4 ++-- 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/ethome/features/generic_features.py b/ethome/features/generic_features.py index 0008f4b..fdcfa97 100644 --- a/ethome/features/generic_features.py +++ b/ethome/features/generic_features.py @@ -5,7 +5,7 @@ def _diff_within_group(df, sort_key, diff_col, **kwargs): - return df.groupby(sort_key)[diff_col].transform(lambda x: x.diff(**kwargs)) + return df.groupby(sort_key)[diff_col].transform(lambda x: x.diff(**kwargs).bfill()) def compute_centerofmass_interanimal_distances( @@ -54,7 +54,7 @@ def compute_centerofmass_interanimal_speed( features_df = df.copy() - dt = features_df["time"].diff(periods=n_shifts) + dt = features_df["time"].diff(periods=n_shifts).bfill() for animal_id in mouse_ids: fxs = ["_".join([animal_id, "x", bp]) for bp in bodypart_ids] @@ -140,7 +140,7 @@ def compute_centerofmass_velocity( features_df = df.copy() orig_cols = df.columns - dt = features_df["time"].diff(periods=n_shifts) + dt = features_df["time"].diff(periods=n_shifts).bfill() for animal_id in mouse_ids: fxs = ["_".join([animal_id, "x", bp]) for bp in bodypart_ids] @@ -178,7 +178,7 @@ def compute_part_velocity( features_df = df.copy() orig_cols = df.columns - dt = features_df["time"].diff(periods=n_shifts) + dt = features_df["time"].diff(periods=n_shifts).bfill() for animal_id in mouse_ids: for bp in bodypart_ids: @@ -215,7 +215,7 @@ def compute_part_speed( features_df = df.copy() orig_cols = df.columns - dt = features_df["time"].diff(periods=n_shifts) + dt = features_df["time"].diff(periods=n_shifts).bfill() for animal_id in mouse_ids: for bp in bodypart_ids: @@ -245,7 +245,7 @@ def compute_speed_features( features_df = df.copy() orig_cols = df.columns - dt = features_df["time"].diff(periods=n_shifts) + dt = features_df["time"].diff(periods=n_shifts).bfill() ##Make the distance features for i, bp1 in enumerate(bodypart_ids): diff --git a/ethome/features/mars_features.py b/ethome/features/mars_features.py index 014ca71..5423a44 100644 --- a/ethome/features/mars_features.py +++ b/ethome/features/mars_features.py @@ -62,9 +62,9 @@ def wrapper(*args, **kwargs): # Rename all column names for p in periods: if mode == "shift": - s_df = df[added_cols].shift(p) + s_df = df[added_cols].shift(p).bfill() elif mode == "diff": - s_df = df[added_cols].diff(p) + s_df = df[added_cols].diff(p).bfill() s_df = s_df.rename( columns={k: f"{k}_shifted_{p}" for k in added_cols} ) @@ -221,13 +221,13 @@ def _compute_kinematics(df, names, animal_setup, window_size=5, n_shifts=3): for mouse_id in mouse_ids: for name in names: ## Speed of centroids - dx = df[f"centroid_{name}_{mouse_id}_x"].diff(window_size) - dy = df[f"centroid_{name}_{mouse_id}_y"].diff(window_size) + dx = df[f"centroid_{name}_{mouse_id}_x"].diff(window_size).bfill() + dy = df[f"centroid_{name}_{mouse_id}_y"].diff(window_size).bfill() df[f"centroid_{name}_{mouse_id}_speed"] = np.sqrt(dx**2 + dy**2) # colnames.append(f'centroid_{name}_{mouse_id}_speed') ## Acceleration of centroids - ddx = dx.diff(window_size) - ddy = dy.diff(window_size) + ddx = dx.diff(window_size).bfill() + ddy = dy.diff(window_size).bfill() df[f"centroid_{name}_{mouse_id}_accel_x"] = ddx / (window_size**2) df[f"centroid_{name}_{mouse_id}_accel_y"] = ddy / (window_size**2) return df @@ -249,8 +249,8 @@ def _compute_relative_body_motions( # Compute velocity of mouse centroids for m_id in mouse_ids: - vx = df[f"centroid_all_{m_id}_x"].diff(window_size) / window_size - vy = df[f"centroid_all_{m_id}_y"].diff(window_size) / window_size + vx = df[f"centroid_all_{m_id}_x"].diff(window_size).bfill() / window_size + vy = df[f"centroid_all_{m_id}_y"].diff(window_size).bfill() / window_size v_tangent = (dx * vx + dy * vy) / dm v_perp_x = vx - dx * v_tangent / dm v_perp_y = vy - dy * v_tangent / dm @@ -661,13 +661,13 @@ def make_features_velocities(df, animal_setup, n_shifts=5): # pragma: no cover f_new = "_".join([mouse_id, "speed", bp1, bp2]) features_df[f_new] = np.sqrt( ( - features_df[f1x].diff(periods=n_shifts) - - features_df[f2x].diff(periods=n_shifts) + features_df[f1x].diff(periods=n_shifts).bfill() + - features_df[f2x].diff(periods=n_shifts).bfill() ) ** 2 + ( - features_df[f1y].diff(periods=n_shifts) - - features_df[f2y].diff(periods=n_shifts) + features_df[f1y].diff(periods=n_shifts).bfill() + - features_df[f2y].diff(periods=n_shifts).bfill() ) ** 2 ) @@ -679,13 +679,13 @@ def make_features_velocities(df, animal_setup, n_shifts=5): # pragma: no cover f_new = "_".join(["M0_M1", "speed", bp1, bp2]) features_df[f_new] = np.sqrt( ( - features_df[f1x].diff(periods=n_shifts) - - features_df[f2x].diff(periods=n_shifts) + features_df[f1x].diff(periods=n_shifts).bfill() + - features_df[f2x].diff(periods=n_shifts).bfill() ) ** 2 + ( - features_df[f1y].diff(periods=n_shifts) - - features_df[f2y].diff(periods=n_shifts) + features_df[f1y].diff(periods=n_shifts).bfill() + - features_df[f2y].diff(periods=n_shifts).bfill() ) ** 2 ) diff --git a/tests/test_analysis.py b/tests/test_analysis.py index ceb122c..99de811 100644 --- a/tests/test_analysis.py +++ b/tests/test_analysis.py @@ -330,7 +330,7 @@ def test_marsreduced_features_by_string(dataset): # Test new feature creation methods... use a custom function, and use a custom class, and use a string def test_custom_feature_func(dataset): def diff_cols(df, required_columns=[]): - return df.loc[:, required_columns].diff() + return df.loc[:, required_columns].diff().bfill() dataset.features.add( diff_cols, required_columns=["resident_x_neck", "resident_y_neck"] @@ -345,7 +345,7 @@ def __init__(self, required_columns): self.required_columns = required_columns def transform(self, df, **kwargs): - return df.loc[:, self.required_columns].diff() + return df.loc[:, self.required_columns].diff().bfill() head_diff = BodyPartDiff(["resident_x_neck", "resident_y_neck"]) dataset.features.add(head_diff)