Skip to content

Commit

Permalink
Add bfill to all diff and shift to avoid creation of nans in first fe…
Browse files Browse the repository at this point in the history
…w rows. Closes #11
  • Loading branch information
benlansdell committed Feb 2, 2024
1 parent 9327a4c commit 40b76a6
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 24 deletions.
12 changes: 6 additions & 6 deletions ethome/features/generic_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@


def _diff_within_group(df, sort_key, diff_col, **kwargs):
return df.groupby(sort_key)[diff_col].transform(lambda x: x.diff(**kwargs))
return df.groupby(sort_key)[diff_col].transform(lambda x: x.diff(**kwargs).bfill())


def compute_centerofmass_interanimal_distances(
Expand Down Expand Up @@ -54,7 +54,7 @@ def compute_centerofmass_interanimal_speed(

features_df = df.copy()

dt = features_df["time"].diff(periods=n_shifts)
dt = features_df["time"].diff(periods=n_shifts).bfill()

for animal_id in mouse_ids:
fxs = ["_".join([animal_id, "x", bp]) for bp in bodypart_ids]
Expand Down Expand Up @@ -140,7 +140,7 @@ def compute_centerofmass_velocity(
features_df = df.copy()
orig_cols = df.columns

dt = features_df["time"].diff(periods=n_shifts)
dt = features_df["time"].diff(periods=n_shifts).bfill()

for animal_id in mouse_ids:
fxs = ["_".join([animal_id, "x", bp]) for bp in bodypart_ids]
Expand Down Expand Up @@ -178,7 +178,7 @@ def compute_part_velocity(
features_df = df.copy()
orig_cols = df.columns

dt = features_df["time"].diff(periods=n_shifts)
dt = features_df["time"].diff(periods=n_shifts).bfill()

for animal_id in mouse_ids:
for bp in bodypart_ids:
Expand Down Expand Up @@ -215,7 +215,7 @@ def compute_part_speed(
features_df = df.copy()
orig_cols = df.columns

dt = features_df["time"].diff(periods=n_shifts)
dt = features_df["time"].diff(periods=n_shifts).bfill()

for animal_id in mouse_ids:
for bp in bodypart_ids:
Expand Down Expand Up @@ -245,7 +245,7 @@ def compute_speed_features(
features_df = df.copy()
orig_cols = df.columns

dt = features_df["time"].diff(periods=n_shifts)
dt = features_df["time"].diff(periods=n_shifts).bfill()

##Make the distance features
for i, bp1 in enumerate(bodypart_ids):
Expand Down
32 changes: 16 additions & 16 deletions ethome/features/mars_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,9 @@ def wrapper(*args, **kwargs):
# Rename all column names
for p in periods:
if mode == "shift":
s_df = df[added_cols].shift(p)
s_df = df[added_cols].shift(p).bfill()
elif mode == "diff":
s_df = df[added_cols].diff(p)
s_df = df[added_cols].diff(p).bfill()
s_df = s_df.rename(
columns={k: f"{k}_shifted_{p}" for k in added_cols}
)
Expand Down Expand Up @@ -221,13 +221,13 @@ def _compute_kinematics(df, names, animal_setup, window_size=5, n_shifts=3):
for mouse_id in mouse_ids:
for name in names:
## Speed of centroids
dx = df[f"centroid_{name}_{mouse_id}_x"].diff(window_size)
dy = df[f"centroid_{name}_{mouse_id}_y"].diff(window_size)
dx = df[f"centroid_{name}_{mouse_id}_x"].diff(window_size).bfill()
dy = df[f"centroid_{name}_{mouse_id}_y"].diff(window_size).bfill()
df[f"centroid_{name}_{mouse_id}_speed"] = np.sqrt(dx**2 + dy**2)
# colnames.append(f'centroid_{name}_{mouse_id}_speed')
## Acceleration of centroids
ddx = dx.diff(window_size)
ddy = dy.diff(window_size)
ddx = dx.diff(window_size).bfill()
ddy = dy.diff(window_size).bfill()
df[f"centroid_{name}_{mouse_id}_accel_x"] = ddx / (window_size**2)
df[f"centroid_{name}_{mouse_id}_accel_y"] = ddy / (window_size**2)
return df
Expand All @@ -249,8 +249,8 @@ def _compute_relative_body_motions(

# Compute velocity of mouse centroids
for m_id in mouse_ids:
vx = df[f"centroid_all_{m_id}_x"].diff(window_size) / window_size
vy = df[f"centroid_all_{m_id}_y"].diff(window_size) / window_size
vx = df[f"centroid_all_{m_id}_x"].diff(window_size).bfill() / window_size
vy = df[f"centroid_all_{m_id}_y"].diff(window_size).bfill() / window_size
v_tangent = (dx * vx + dy * vy) / dm
v_perp_x = vx - dx * v_tangent / dm
v_perp_y = vy - dy * v_tangent / dm
Expand Down Expand Up @@ -661,13 +661,13 @@ def make_features_velocities(df, animal_setup, n_shifts=5): # pragma: no cover
f_new = "_".join([mouse_id, "speed", bp1, bp2])
features_df[f_new] = np.sqrt(
(
features_df[f1x].diff(periods=n_shifts)
- features_df[f2x].diff(periods=n_shifts)
features_df[f1x].diff(periods=n_shifts).bfill()
- features_df[f2x].diff(periods=n_shifts).bfill()
)
** 2
+ (
features_df[f1y].diff(periods=n_shifts)
- features_df[f2y].diff(periods=n_shifts)
features_df[f1y].diff(periods=n_shifts).bfill()
- features_df[f2y].diff(periods=n_shifts).bfill()
)
** 2
)
Expand All @@ -679,13 +679,13 @@ def make_features_velocities(df, animal_setup, n_shifts=5): # pragma: no cover
f_new = "_".join(["M0_M1", "speed", bp1, bp2])
features_df[f_new] = np.sqrt(
(
features_df[f1x].diff(periods=n_shifts)
- features_df[f2x].diff(periods=n_shifts)
features_df[f1x].diff(periods=n_shifts).bfill()
- features_df[f2x].diff(periods=n_shifts).bfill()
)
** 2
+ (
features_df[f1y].diff(periods=n_shifts)
- features_df[f2y].diff(periods=n_shifts)
features_df[f1y].diff(periods=n_shifts).bfill()
- features_df[f2y].diff(periods=n_shifts).bfill()
)
** 2
)
Expand Down
4 changes: 2 additions & 2 deletions tests/test_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ def test_marsreduced_features_by_string(dataset):
# Test new feature creation methods... use a custom function, and use a custom class, and use a string
def test_custom_feature_func(dataset):
def diff_cols(df, required_columns=[]):
return df.loc[:, required_columns].diff()
return df.loc[:, required_columns].diff().bfill()

dataset.features.add(
diff_cols, required_columns=["resident_x_neck", "resident_y_neck"]
Expand All @@ -345,7 +345,7 @@ def __init__(self, required_columns):
self.required_columns = required_columns

def transform(self, df, **kwargs):
return df.loc[:, self.required_columns].diff()
return df.loc[:, self.required_columns].diff().bfill()

head_diff = BodyPartDiff(["resident_x_neck", "resident_y_neck"])
dataset.features.add(head_diff)
Expand Down

0 comments on commit 40b76a6

Please sign in to comment.