From 3812c084b18a7dba29d15e48d7c550a0f49d440f Mon Sep 17 00:00:00 2001
From: Ben Lansdell <ben.lansdell@gmail.com>
Date: Fri, 2 Feb 2024 14:12:01 -0700
Subject: [PATCH] Start to add type hints (as per #12)

---
 ethome/config.py                 |  2 +-
 ethome/features/cnn1d.py         | 60 +++++++++++++++++---------------
 ethome/features/dl_features.py   | 55 +++++++++++++++--------------
 ethome/features/features.py      | 11 +++---
 ethome/features/mars_features.py |  2 +-
 ethome/interpolation.py          |  2 +-
 ethome/models.py                 | 28 +++++++--------
 ethome/plot.py                   |  4 +--
 ethome/unsupervised.py           |  4 +--
 ethome/utils.py                  |  2 +-
 10 files changed, 87 insertions(+), 83 deletions(-)

diff --git a/ethome/config.py b/ethome/config.py
index b9865b3..1ce64ab 100644
--- a/ethome/config.py
+++ b/ethome/config.py
@@ -2,7 +2,7 @@
 
 # TODO
 # Add support for the user to change these.
-
+ 
 global_config = {
     "make_movie__y_offset": 60,
     "make_movie__y_inc": 30,
diff --git a/ethome/features/cnn1d.py b/ethome/features/cnn1d.py
index b68ebcd..117d9ff 100644
--- a/ethome/features/cnn1d.py
+++ b/ethome/features/cnn1d.py
@@ -1,18 +1,19 @@
 import numpy as np
 import pandas as pd
 
+from typing import List, Callable
 from ..utils import check_keras
 from .mars_features import make_features_mars, make_features_mars_distr
 
 
 def build_baseline_model(
-    input_dim,
-    layer_channels=(512, 256),
-    dropout_rate=0.0,
-    learning_rate=1e-3,
-    conv_size=5,
-    num_classes=4,
-    class_weight=None,
+    input_dim: tuple,
+    layer_channels: tuple =(512, 256),
+    dropout_rate: float =0.0,
+    learning_rate: float =1e-3,
+    conv_size: int =5,
+    num_classes: int=4,
+    class_weight:tuple = None,
 ):
     if not check_keras():
         raise RuntimeError(
@@ -48,7 +49,7 @@ def add_conv_bn_activate(model, out_dim, activation="relu", conv_size=3, drop=0.
     return model
 
 
-def make_df(pts, colnames=None):  # pragma: no cover
+def make_df(pts, colnames: List[str] =None):  # pragma: no cover
     df = []
     for idx in range(len(pts)):
         data = pts[idx].flatten()
@@ -59,11 +60,12 @@ def make_df(pts, colnames=None):  # pragma: no cover
         return pd.DataFrame(df)
 
 
-def features_identity(inputs):  # pragma: no cover
+def features_identity(inputs: np.ndarray):  # pragma: no cover
+    
     return inputs, inputs.shape[1:]
 
 
-def features_via_sklearn(inputs, featurizer):  # pragma: no cover
+def features_via_sklearn(inputs: np.ndarray, featurizer: Callable):  # pragma: no cover
     # Use the ML functions to turn this into a pandas data table
     df = make_df(inputs)
     features_df, _, _ = featurizer(df)
@@ -71,18 +73,18 @@ def features_via_sklearn(inputs, featurizer):  # pragma: no cover
     return features, features.shape
 
 
-def features_mars(x):  # pragma: no cover
+def features_mars(x: np.ndarray):  # pragma: no cover
     return features_via_sklearn(x, make_features_mars)
 
 
 # #features_mars_no_shift = lambda x: features_via_sklearn(x, make_features_mars_no_shift)
 
 
-def features_mars_distr(x):  # pragma: no cover
+def features_mars_distr(x: np.ndarray):  # pragma: no cover
     return features_via_sklearn(x, make_features_mars_distr)
 
 
-def features_distances(inputs):
+def features_distances(inputs: np.ndarray):
     # inputs.shape (4509, 2,7,2) = (frame, mouse ID, body part, x/y)
 
     features = []
@@ -109,7 +111,7 @@ def features_distances(inputs):
     return features, features.shape[1:]
 
 
-def features_distances_normalized(inputs):  # pragma: no cover
+def features_distances_normalized(inputs: np.ndarray):  # pragma: no cover
     # inputs.shape (4509, 2,7,2) = (frame, mouse ID, body part, x/y)
 
     features = []
@@ -142,19 +144,19 @@ def features_distances_normalized(inputs):  # pragma: no cover
 class MABe_Generator:
     def __init__(
         self,
-        pose_dict,
-        batch_size,
-        dim,
-        use_conv,
-        num_classes,
-        augment=False,
-        class_to_number=None,
-        past_frames=0,
-        future_frames=0,
-        frame_gap=1,
-        shuffle=False,
-        mode="fit",
-        featurize=features_identity,
+        pose_dict: dict,
+        batch_size: int,
+        dim: tuple,
+        use_conv: bool,
+        num_classes: int,
+        augment: bool =False,
+        class_to_number: dict =None,
+        past_frames:int=0,
+        future_frames:int=0,
+        frame_gap:int=1,
+        shuffle:bool=False,
+        mode:str="fit",
+        featurize:Callable=features_identity,
     ):
         self.batch_size = batch_size
         self.featurize = featurize
@@ -205,7 +207,7 @@ def __init__(
     def __len__(self):
         return len(self.indexes) // self.batch_size
 
-    def augment_fn(self, x):
+    def augment_fn(self, x: np.ndarray):
         # Rotate
         angle = (np.random.rand() - 0.5) * (np.pi * 2)
         c, s = np.cos(angle), np.sin(angle)
@@ -217,7 +219,7 @@ def augment_fn(self, x):
         x = x + shift
         return x
 
-    def __getitem__(self, index):
+    def __getitem__(self, index: int):
         bs = self.batch_size
         indexes = self.indexes[index * bs : (index + 1) * bs]
         X = np.empty((bs, *self.dim), self.X_dtype)
diff --git a/ethome/features/dl_features.py b/ethome/features/dl_features.py
index 4c65405..4f03711 100644
--- a/ethome/features/dl_features.py
+++ b/ethome/features/dl_features.py
@@ -5,6 +5,7 @@
 import os
 from copy import deepcopy
 
+from typing import Callable, List
 from ethome.features.cnn1d import build_baseline_model
 from ethome.features.cnn1d import MABe_Generator, features_identity
 from .cnn1d import *
@@ -55,7 +56,7 @@
 }
 
 
-def seed_everything(seed=2012):
+def seed_everything(seed:int=2012):
     np.random.seed(seed)
     os.environ["PYTHONHASHSEED"] = str(seed)
 
@@ -69,19 +70,19 @@ class Trainer(object):
     def __init__(
         self,
         *,
-        feature_dim,
-        num_classes,
-        test_data=None,
-        class_to_number=None,
-        past_frames=0,
-        future_frames=0,
-        frame_gap=1,
-        use_conv=False,
-        build_model=build_baseline_model,
+        feature_dim: list,
+        num_classes: int,
+        test_data:np.ndarray=None,
+        class_to_number:dict=None,
+        past_frames:int=0,
+        future_frames:int=0,
+        frame_gap:int=1,
+        use_conv:bool=False,
+        build_model:Callable=build_baseline_model,
         Generator=MABe_Generator,
-        use_callbacks=False,
-        learning_decay_freq=10,
-        featurizer=features_identity,
+        use_callbacks:bool=False,
+        learning_decay_freq:int=10,
+        featurizer:Callable=features_identity,
     ):
         flat_dim = np.prod(feature_dim)
         if use_conv:
@@ -129,7 +130,7 @@ def _set_model(self, model):
         """Set an external, provide initialized and compiled keras model"""
         self.model = model
 
-    def inference(self, model_params, class_weight=None, n_folds=5):
+    def inference(self, model_params: dict, class_weight:dict=None, n_folds:int=5):
         kwargs = {}
         if class_weight is not None:
             if type(class_weight) is dict:
@@ -187,7 +188,7 @@ def get_test_prediction_probabilities(self):
         return all_test_preds
 
 
-def normalize_data(orig_pose_dictionary):
+def normalize_data(orig_pose_dictionary:dict):
     for key in orig_pose_dictionary:
         X = orig_pose_dictionary[key]["keypoints"]
         X = X.transpose((0, 1, 3, 2))  # last axis is x, y coordinates
@@ -199,16 +200,16 @@ def normalize_data(orig_pose_dictionary):
 
 
 def run_task(
-    vocabulary,
-    test_data,
-    config_name,
-    build_model,
-    skip_test_prediction=False,
-    seed=2021,
+    vocabulary:dict,
+    test_data:np.ndarray,
+    config_name:str,
+    build_model:Callable,
+    skip_test_prediction:bool=False,
+    seed:int=2021,
     Generator=MABe_Generator,
-    use_callbacks=False,
-    params=None,
-    use_conv=True,
+    use_callbacks:bool=False,
+    params:dict=None,
+    use_conv:bool=True,
 ):
     if params is None:
         if config_name is None:
@@ -278,13 +279,13 @@ def run_task(
     return all_test_probs
 
 
-def lrs(epoch, lr, freq=10):
+def lrs(epoch:int, lr:float, freq:int=10):
     if (epoch % freq) == 0 and epoch > 0:
         lr /= 3
     return lr
 
 
-def convert_to_mars_format(df, colnames, animal_setup):
+def convert_to_mars_format(df:pd.DataFrame, colnames:List[str], animal_setup:dict):
     n_animals = len(animal_setup["mouse_ids"])
     n_body_parts = len(animal_setup["bodypart_ids"])
     pose_dict = {}
@@ -300,7 +301,7 @@ def convert_to_mars_format(df, colnames, animal_setup):
 
 
 # Basically, undo the change above
-def convert_to_pandas_df(data, colnames=None):
+def convert_to_pandas_df(data, colnames:List[str]=None):
     dfs = []
     for vid in data:
         df = pd.DataFrame(data[vid], columns=colnames)
diff --git a/ethome/features/features.py b/ethome/features/features.py
index d83afbc..c264395 100644
--- a/ethome/features/features.py
+++ b/ethome/features/features.py
@@ -2,7 +2,8 @@
 """
 
 import warnings
-
+import pandas as pd
+from typing import Callable, List
 from ethome.features.dl_features import compute_dl_probability_features
 from ethome.features.mars_features import (
     compute_mars_features,
@@ -54,11 +55,11 @@ class Features:  # pragma: no cover
     def __init__(self):
         raise NotImplementedError
 
-    def transform(self, df):
+    def transform(self, df: pd.DataFrame):
         raise NotImplementedError
 
 
-def feature_class_maker(name, compute_function, required_columns=[]):
+def feature_class_maker(name:str, compute_function:Callable, required_columns:List[str]=[]):
     def __init__(self, required_columns=None, **kwargs):
         """Feature creation object. This houses the feature creation function and the columns that are required to compute the features. Performs some checks on data to make sure has these columns.
 
@@ -71,10 +72,10 @@ def __init__(self, required_columns=None, **kwargs):
             self.required_columns = required_columns
         self.kwargs = kwargs
 
-    def fit(self, edf, **kwargs):  # pragma: no cover
+    def fit(self, edf:pd.DataFrame, **kwargs):  # pragma: no cover
         return
 
-    def transform(self, edf, **kwargs):
+    def transform(self, edf:pd.DataFrame, **kwargs):
         """Make the features. This is called internally by the dataset object when running `add_features`.
 
         Args:
diff --git a/ethome/features/mars_features.py b/ethome/features/mars_features.py
index 5423a44..c1f8865 100644
--- a/ethome/features/mars_features.py
+++ b/ethome/features/mars_features.py
@@ -36,7 +36,7 @@ def wrapper(*args, **kwargs):
                 window_sizes = [1, 5, 10]
                 for ws in window_sizes:
                     data = np.dstack(
-                        [np.array(df[added_cols].shift(p)) for p in range(-ws, ws + 1)]
+                        [np.array(df[added_cols].shift(p).bfill()) for p in range(-ws, ws + 1)]
                     )
                     min_data = pd.DataFrame(
                         np.min(data, axis=2),
diff --git a/ethome/interpolation.py b/ethome/interpolation.py
index e6a2cf4..76921bf 100644
--- a/ethome/interpolation.py
+++ b/ethome/interpolation.py
@@ -3,7 +3,7 @@
 import pandas as pd
 import numpy as np
 
-
+ 
 def interpolate_lowconf_points(
     edf: pd.DataFrame,
     conf_threshold: float = 0.9,
diff --git a/ethome/models.py b/ethome/models.py
index da01e03..ae175e7 100644
--- a/ethome/models.py
+++ b/ethome/models.py
@@ -5,11 +5,11 @@
 import numpy as np
 
 
-def _logit(p):  # pragma: no cover
+def _logit(p: float):  # pragma: no cover
     return np.log(p / (1 - p))
 
 
-def _sample_prob_simplex(n=4):  # pragma: no cover
+def _sample_prob_simplex(n:int=4):  # pragma: no cover
     x = sorted(np.append(np.random.uniform(size=n - 1), [0, 1]))
     y = np.diff(np.array(x))
     return y
@@ -19,7 +19,7 @@ def _sample_prob_simplex(n=4):  # pragma: no cover
     import ssm
 
     class HMMSklearn(ssm.HMM):  # pragma: no cover
-        def __init__(self, D, C=11):
+        def __init__(self, D: int, C: int=11):
             """HMM model from Linderman state-space model package ssm, tweaked slightly to fit with sklearn syntax
 
             Args:
@@ -33,7 +33,7 @@ def __init__(self, D, C=11):
                 D, D + 1, observations="categorical", observation_kwargs={"C": C}
             )
 
-        def fit(self, X, y):
+        def fit(self, X: np.ndarray, y: np.ndarray):
             preds = np.argmax(X, axis=-1)
             X = np.hstack(
                 ((X * (self.C - 1)).astype(int), np.atleast_2d((preds).astype(int)).T)
@@ -62,7 +62,7 @@ def fit(self, X, y):
 
             self.observations.params = _logit(emission_dist)
 
-        def predict(self, X):
+        def predict(self, X: np.ndarray):
             preds = np.argmax(X, axis=-1)
             X = np.hstack(
                 ((X * (self.C - 1)).astype(int), np.atleast_2d((preds).astype(int)).T)
@@ -76,11 +76,11 @@ def predict(self, X):
 
 
 class F1Optimizer(ClassifierMixin):  # pragma: no cover
-    def __init__(self, N=1000, labels=[1]):
+    def __init__(self, N: int=1000, labels: list =[1]):
         self.N = N
         self.labels = labels
 
-    def fit(self, X, y):  # train_labels, train_pred_prob):
+    def fit(self, X:np.ndarray, y:np.ndarray):  # train_labels, train_pred_prob):
         self.dim_x = X.shape[1]
 
         f = lambda w: f1_score(
@@ -100,16 +100,16 @@ def fit(self, X, y):  # train_labels, train_pred_prob):
         self.w_star = w_star
         self.f_star = f_star
 
-    def predict(self, X):
+    def predict(self, X: np.ndarray):
         return np.argmax(X * self.w_star, axis=-1)
 
-    def predict_proba(self, X):
+    def predict_proba(self, X:np.ndarray):
         return X * self.w_star
 
-    def transform(self, X):
+    def transform(self, X:np.ndarray):
         return self.predict_proba(X)
 
-    def fit_transform(self, X, y=None):
+    def fit_transform(self, X:np.ndarray, y:np.ndarray=None):
         self.fit(X, y)
         return self.transform(X)
 
@@ -125,12 +125,12 @@ def __init__(self, Model, *args, **kwargs):
         """
         self.model = Model(*args, **kwargs)
 
-    def fit(self, X, y):
+    def fit(self, X: np.ndarray, y: np.ndarray):
         self.model.fit(X, y)
 
-    def transform(self, X):
+    def transform(self, X: np.ndarray):
         return self.model.predict_proba(X)
 
-    def fit_transform(self, X, y=None):
+    def fit_transform(self, X: np.ndarray, y: np.ndarray=None):
         self.fit(X, y)
         return self.transform(X)
diff --git a/ethome/plot.py b/ethome/plot.py
index d528351..ffc9af7 100644
--- a/ethome/plot.py
+++ b/ethome/plot.py
@@ -121,7 +121,7 @@ def plot_embedding(
 
 
 class MplColorHelper:  # pragma: no cover
-    def __init__(self, cmap_name, start_val, stop_val):
+    def __init__(self, cmap_name: str, start_val:int, stop_val:int):
         self.cmap_name = cmap_name
         self.cmap = plt.get_cmap(cmap_name)
         self.norm = mpl.colors.Normalize(vmin=start_val, vmax=stop_val)
@@ -344,7 +344,7 @@ def create_sample_videos(
     labels = labels[labels >= 0]
     # all_labels = np.unique(labels)
 
-    def get_window_size(label_idx, sample_row, max_size=500):
+    def get_window_size(label_idx: int, sample_row:int, max_size:int=500):
         s_m = 0
         for idx in range(max_size):
             try:
diff --git a/ethome/unsupervised.py b/ethome/unsupervised.py
index f0583ce..305e554 100644
--- a/ethome/unsupervised.py
+++ b/ethome/unsupervised.py
@@ -13,8 +13,8 @@ def compute_tsne_embedding(
     dataset: pd.DataFrame,
     cols: list,
     N_rows: int = 20000,
-    n_components=2,
-    perplexity=30,
+    n_components: int=2,
+    perplexity: int=30,
 ) -> tuple:
     """Compute TSNE embedding. Only for a random subset of rows.
 
diff --git a/ethome/utils.py b/ethome/utils.py
index 033de49..6b738b5 100644
--- a/ethome/utils.py
+++ b/ethome/utils.py
@@ -5,7 +5,7 @@
 # Make ffmpeg support windows friendly
 
 
-def _exec_php(cmd):
+def _exec_php(cmd: str):
     from subprocess import Popen, PIPE, STDOUT
 
     p = Popen(cmd, shell=False, stdout=PIPE, stderr=STDOUT)