From 3812c084b18a7dba29d15e48d7c550a0f49d440f Mon Sep 17 00:00:00 2001 From: Ben Lansdell Date: Fri, 2 Feb 2024 14:12:01 -0700 Subject: [PATCH] Start to add type hints (as per #12) --- ethome/config.py | 2 +- ethome/features/cnn1d.py | 60 +++++++++++++++++--------------- ethome/features/dl_features.py | 55 +++++++++++++++-------------- ethome/features/features.py | 11 +++--- ethome/features/mars_features.py | 2 +- ethome/interpolation.py | 2 +- ethome/models.py | 28 +++++++-------- ethome/plot.py | 4 +-- ethome/unsupervised.py | 4 +-- ethome/utils.py | 2 +- 10 files changed, 87 insertions(+), 83 deletions(-) diff --git a/ethome/config.py b/ethome/config.py index b9865b3..1ce64ab 100644 --- a/ethome/config.py +++ b/ethome/config.py @@ -2,7 +2,7 @@ # TODO # Add support for the user to change these. - + global_config = { "make_movie__y_offset": 60, "make_movie__y_inc": 30, diff --git a/ethome/features/cnn1d.py b/ethome/features/cnn1d.py index b68ebcd..117d9ff 100644 --- a/ethome/features/cnn1d.py +++ b/ethome/features/cnn1d.py @@ -1,18 +1,19 @@ import numpy as np import pandas as pd +from typing import List, Callable from ..utils import check_keras from .mars_features import make_features_mars, make_features_mars_distr def build_baseline_model( - input_dim, - layer_channels=(512, 256), - dropout_rate=0.0, - learning_rate=1e-3, - conv_size=5, - num_classes=4, - class_weight=None, + input_dim: tuple, + layer_channels: tuple =(512, 256), + dropout_rate: float =0.0, + learning_rate: float =1e-3, + conv_size: int =5, + num_classes: int=4, + class_weight:tuple = None, ): if not check_keras(): raise RuntimeError( @@ -48,7 +49,7 @@ def add_conv_bn_activate(model, out_dim, activation="relu", conv_size=3, drop=0. return model -def make_df(pts, colnames=None): # pragma: no cover +def make_df(pts, colnames: List[str] =None): # pragma: no cover df = [] for idx in range(len(pts)): data = pts[idx].flatten() @@ -59,11 +60,12 @@ def make_df(pts, colnames=None): # pragma: no cover return pd.DataFrame(df) -def features_identity(inputs): # pragma: no cover +def features_identity(inputs: np.ndarray): # pragma: no cover + return inputs, inputs.shape[1:] -def features_via_sklearn(inputs, featurizer): # pragma: no cover +def features_via_sklearn(inputs: np.ndarray, featurizer: Callable): # pragma: no cover # Use the ML functions to turn this into a pandas data table df = make_df(inputs) features_df, _, _ = featurizer(df) @@ -71,18 +73,18 @@ def features_via_sklearn(inputs, featurizer): # pragma: no cover return features, features.shape -def features_mars(x): # pragma: no cover +def features_mars(x: np.ndarray): # pragma: no cover return features_via_sklearn(x, make_features_mars) # #features_mars_no_shift = lambda x: features_via_sklearn(x, make_features_mars_no_shift) -def features_mars_distr(x): # pragma: no cover +def features_mars_distr(x: np.ndarray): # pragma: no cover return features_via_sklearn(x, make_features_mars_distr) -def features_distances(inputs): +def features_distances(inputs: np.ndarray): # inputs.shape (4509, 2,7,2) = (frame, mouse ID, body part, x/y) features = [] @@ -109,7 +111,7 @@ def features_distances(inputs): return features, features.shape[1:] -def features_distances_normalized(inputs): # pragma: no cover +def features_distances_normalized(inputs: np.ndarray): # pragma: no cover # inputs.shape (4509, 2,7,2) = (frame, mouse ID, body part, x/y) features = [] @@ -142,19 +144,19 @@ def features_distances_normalized(inputs): # pragma: no cover class MABe_Generator: def __init__( self, - pose_dict, - batch_size, - dim, - use_conv, - num_classes, - augment=False, - class_to_number=None, - past_frames=0, - future_frames=0, - frame_gap=1, - shuffle=False, - mode="fit", - featurize=features_identity, + pose_dict: dict, + batch_size: int, + dim: tuple, + use_conv: bool, + num_classes: int, + augment: bool =False, + class_to_number: dict =None, + past_frames:int=0, + future_frames:int=0, + frame_gap:int=1, + shuffle:bool=False, + mode:str="fit", + featurize:Callable=features_identity, ): self.batch_size = batch_size self.featurize = featurize @@ -205,7 +207,7 @@ def __init__( def __len__(self): return len(self.indexes) // self.batch_size - def augment_fn(self, x): + def augment_fn(self, x: np.ndarray): # Rotate angle = (np.random.rand() - 0.5) * (np.pi * 2) c, s = np.cos(angle), np.sin(angle) @@ -217,7 +219,7 @@ def augment_fn(self, x): x = x + shift return x - def __getitem__(self, index): + def __getitem__(self, index: int): bs = self.batch_size indexes = self.indexes[index * bs : (index + 1) * bs] X = np.empty((bs, *self.dim), self.X_dtype) diff --git a/ethome/features/dl_features.py b/ethome/features/dl_features.py index 4c65405..4f03711 100644 --- a/ethome/features/dl_features.py +++ b/ethome/features/dl_features.py @@ -5,6 +5,7 @@ import os from copy import deepcopy +from typing import Callable, List from ethome.features.cnn1d import build_baseline_model from ethome.features.cnn1d import MABe_Generator, features_identity from .cnn1d import * @@ -55,7 +56,7 @@ } -def seed_everything(seed=2012): +def seed_everything(seed:int=2012): np.random.seed(seed) os.environ["PYTHONHASHSEED"] = str(seed) @@ -69,19 +70,19 @@ class Trainer(object): def __init__( self, *, - feature_dim, - num_classes, - test_data=None, - class_to_number=None, - past_frames=0, - future_frames=0, - frame_gap=1, - use_conv=False, - build_model=build_baseline_model, + feature_dim: list, + num_classes: int, + test_data:np.ndarray=None, + class_to_number:dict=None, + past_frames:int=0, + future_frames:int=0, + frame_gap:int=1, + use_conv:bool=False, + build_model:Callable=build_baseline_model, Generator=MABe_Generator, - use_callbacks=False, - learning_decay_freq=10, - featurizer=features_identity, + use_callbacks:bool=False, + learning_decay_freq:int=10, + featurizer:Callable=features_identity, ): flat_dim = np.prod(feature_dim) if use_conv: @@ -129,7 +130,7 @@ def _set_model(self, model): """Set an external, provide initialized and compiled keras model""" self.model = model - def inference(self, model_params, class_weight=None, n_folds=5): + def inference(self, model_params: dict, class_weight:dict=None, n_folds:int=5): kwargs = {} if class_weight is not None: if type(class_weight) is dict: @@ -187,7 +188,7 @@ def get_test_prediction_probabilities(self): return all_test_preds -def normalize_data(orig_pose_dictionary): +def normalize_data(orig_pose_dictionary:dict): for key in orig_pose_dictionary: X = orig_pose_dictionary[key]["keypoints"] X = X.transpose((0, 1, 3, 2)) # last axis is x, y coordinates @@ -199,16 +200,16 @@ def normalize_data(orig_pose_dictionary): def run_task( - vocabulary, - test_data, - config_name, - build_model, - skip_test_prediction=False, - seed=2021, + vocabulary:dict, + test_data:np.ndarray, + config_name:str, + build_model:Callable, + skip_test_prediction:bool=False, + seed:int=2021, Generator=MABe_Generator, - use_callbacks=False, - params=None, - use_conv=True, + use_callbacks:bool=False, + params:dict=None, + use_conv:bool=True, ): if params is None: if config_name is None: @@ -278,13 +279,13 @@ def run_task( return all_test_probs -def lrs(epoch, lr, freq=10): +def lrs(epoch:int, lr:float, freq:int=10): if (epoch % freq) == 0 and epoch > 0: lr /= 3 return lr -def convert_to_mars_format(df, colnames, animal_setup): +def convert_to_mars_format(df:pd.DataFrame, colnames:List[str], animal_setup:dict): n_animals = len(animal_setup["mouse_ids"]) n_body_parts = len(animal_setup["bodypart_ids"]) pose_dict = {} @@ -300,7 +301,7 @@ def convert_to_mars_format(df, colnames, animal_setup): # Basically, undo the change above -def convert_to_pandas_df(data, colnames=None): +def convert_to_pandas_df(data, colnames:List[str]=None): dfs = [] for vid in data: df = pd.DataFrame(data[vid], columns=colnames) diff --git a/ethome/features/features.py b/ethome/features/features.py index d83afbc..c264395 100644 --- a/ethome/features/features.py +++ b/ethome/features/features.py @@ -2,7 +2,8 @@ """ import warnings - +import pandas as pd +from typing import Callable, List from ethome.features.dl_features import compute_dl_probability_features from ethome.features.mars_features import ( compute_mars_features, @@ -54,11 +55,11 @@ class Features: # pragma: no cover def __init__(self): raise NotImplementedError - def transform(self, df): + def transform(self, df: pd.DataFrame): raise NotImplementedError -def feature_class_maker(name, compute_function, required_columns=[]): +def feature_class_maker(name:str, compute_function:Callable, required_columns:List[str]=[]): def __init__(self, required_columns=None, **kwargs): """Feature creation object. This houses the feature creation function and the columns that are required to compute the features. Performs some checks on data to make sure has these columns. @@ -71,10 +72,10 @@ def __init__(self, required_columns=None, **kwargs): self.required_columns = required_columns self.kwargs = kwargs - def fit(self, edf, **kwargs): # pragma: no cover + def fit(self, edf:pd.DataFrame, **kwargs): # pragma: no cover return - def transform(self, edf, **kwargs): + def transform(self, edf:pd.DataFrame, **kwargs): """Make the features. This is called internally by the dataset object when running `add_features`. Args: diff --git a/ethome/features/mars_features.py b/ethome/features/mars_features.py index 5423a44..c1f8865 100644 --- a/ethome/features/mars_features.py +++ b/ethome/features/mars_features.py @@ -36,7 +36,7 @@ def wrapper(*args, **kwargs): window_sizes = [1, 5, 10] for ws in window_sizes: data = np.dstack( - [np.array(df[added_cols].shift(p)) for p in range(-ws, ws + 1)] + [np.array(df[added_cols].shift(p).bfill()) for p in range(-ws, ws + 1)] ) min_data = pd.DataFrame( np.min(data, axis=2), diff --git a/ethome/interpolation.py b/ethome/interpolation.py index e6a2cf4..76921bf 100644 --- a/ethome/interpolation.py +++ b/ethome/interpolation.py @@ -3,7 +3,7 @@ import pandas as pd import numpy as np - + def interpolate_lowconf_points( edf: pd.DataFrame, conf_threshold: float = 0.9, diff --git a/ethome/models.py b/ethome/models.py index da01e03..ae175e7 100644 --- a/ethome/models.py +++ b/ethome/models.py @@ -5,11 +5,11 @@ import numpy as np -def _logit(p): # pragma: no cover +def _logit(p: float): # pragma: no cover return np.log(p / (1 - p)) -def _sample_prob_simplex(n=4): # pragma: no cover +def _sample_prob_simplex(n:int=4): # pragma: no cover x = sorted(np.append(np.random.uniform(size=n - 1), [0, 1])) y = np.diff(np.array(x)) return y @@ -19,7 +19,7 @@ def _sample_prob_simplex(n=4): # pragma: no cover import ssm class HMMSklearn(ssm.HMM): # pragma: no cover - def __init__(self, D, C=11): + def __init__(self, D: int, C: int=11): """HMM model from Linderman state-space model package ssm, tweaked slightly to fit with sklearn syntax Args: @@ -33,7 +33,7 @@ def __init__(self, D, C=11): D, D + 1, observations="categorical", observation_kwargs={"C": C} ) - def fit(self, X, y): + def fit(self, X: np.ndarray, y: np.ndarray): preds = np.argmax(X, axis=-1) X = np.hstack( ((X * (self.C - 1)).astype(int), np.atleast_2d((preds).astype(int)).T) @@ -62,7 +62,7 @@ def fit(self, X, y): self.observations.params = _logit(emission_dist) - def predict(self, X): + def predict(self, X: np.ndarray): preds = np.argmax(X, axis=-1) X = np.hstack( ((X * (self.C - 1)).astype(int), np.atleast_2d((preds).astype(int)).T) @@ -76,11 +76,11 @@ def predict(self, X): class F1Optimizer(ClassifierMixin): # pragma: no cover - def __init__(self, N=1000, labels=[1]): + def __init__(self, N: int=1000, labels: list =[1]): self.N = N self.labels = labels - def fit(self, X, y): # train_labels, train_pred_prob): + def fit(self, X:np.ndarray, y:np.ndarray): # train_labels, train_pred_prob): self.dim_x = X.shape[1] f = lambda w: f1_score( @@ -100,16 +100,16 @@ def fit(self, X, y): # train_labels, train_pred_prob): self.w_star = w_star self.f_star = f_star - def predict(self, X): + def predict(self, X: np.ndarray): return np.argmax(X * self.w_star, axis=-1) - def predict_proba(self, X): + def predict_proba(self, X:np.ndarray): return X * self.w_star - def transform(self, X): + def transform(self, X:np.ndarray): return self.predict_proba(X) - def fit_transform(self, X, y=None): + def fit_transform(self, X:np.ndarray, y:np.ndarray=None): self.fit(X, y) return self.transform(X) @@ -125,12 +125,12 @@ def __init__(self, Model, *args, **kwargs): """ self.model = Model(*args, **kwargs) - def fit(self, X, y): + def fit(self, X: np.ndarray, y: np.ndarray): self.model.fit(X, y) - def transform(self, X): + def transform(self, X: np.ndarray): return self.model.predict_proba(X) - def fit_transform(self, X, y=None): + def fit_transform(self, X: np.ndarray, y: np.ndarray=None): self.fit(X, y) return self.transform(X) diff --git a/ethome/plot.py b/ethome/plot.py index d528351..ffc9af7 100644 --- a/ethome/plot.py +++ b/ethome/plot.py @@ -121,7 +121,7 @@ def plot_embedding( class MplColorHelper: # pragma: no cover - def __init__(self, cmap_name, start_val, stop_val): + def __init__(self, cmap_name: str, start_val:int, stop_val:int): self.cmap_name = cmap_name self.cmap = plt.get_cmap(cmap_name) self.norm = mpl.colors.Normalize(vmin=start_val, vmax=stop_val) @@ -344,7 +344,7 @@ def create_sample_videos( labels = labels[labels >= 0] # all_labels = np.unique(labels) - def get_window_size(label_idx, sample_row, max_size=500): + def get_window_size(label_idx: int, sample_row:int, max_size:int=500): s_m = 0 for idx in range(max_size): try: diff --git a/ethome/unsupervised.py b/ethome/unsupervised.py index f0583ce..305e554 100644 --- a/ethome/unsupervised.py +++ b/ethome/unsupervised.py @@ -13,8 +13,8 @@ def compute_tsne_embedding( dataset: pd.DataFrame, cols: list, N_rows: int = 20000, - n_components=2, - perplexity=30, + n_components: int=2, + perplexity: int=30, ) -> tuple: """Compute TSNE embedding. Only for a random subset of rows. diff --git a/ethome/utils.py b/ethome/utils.py index 033de49..6b738b5 100644 --- a/ethome/utils.py +++ b/ethome/utils.py @@ -5,7 +5,7 @@ # Make ffmpeg support windows friendly -def _exec_php(cmd): +def _exec_php(cmd: str): from subprocess import Popen, PIPE, STDOUT p = Popen(cmd, shell=False, stdout=PIPE, stderr=STDOUT)