From 024b45344aba61e96ca30cfe16fd7aca11f18c4c Mon Sep 17 00:00:00 2001 From: Christopher Morrison Date: Fri, 27 Oct 2023 18:40:10 -0700 Subject: [PATCH] Clean up typing and ordering usage. --- .../data_io/behavior_project_cloud_api.py | 32 ++++++++++-- .../data_io/behavior_project_lims_api.py | 49 +++++++++++++++---- .../ophys/project_constants.py | 15 +++--- allensdk/core/dataframe_utils.py | 7 ++- 4 files changed, 80 insertions(+), 23 deletions(-) diff --git a/allensdk/brain_observatory/behavior/behavior_project_cache/project_apis/data_io/behavior_project_cloud_api.py b/allensdk/brain_observatory/behavior/behavior_project_cache/project_apis/data_io/behavior_project_cloud_api.py index 5e1effd41..c2b49c32d 100644 --- a/allensdk/brain_observatory/behavior/behavior_project_cache/project_apis/data_io/behavior_project_cloud_api.py +++ b/allensdk/brain_observatory/behavior/behavior_project_cache/project_apis/data_io/behavior_project_cloud_api.py @@ -188,8 +188,15 @@ def _get_ophys_session_table(self): df["date_of_acquisition"] = pd.to_datetime( df["date_of_acquisition"], utc="True" ) - df = enforce_df_int_typing(df, VBO_INTEGER_COLUMNS, True) - df = enforce_df_column_order(df, VBO_METADATA_COLUMN_ORDER) + df = enforce_df_int_typing( + input_df=df, + int_columns=VBO_INTEGER_COLUMNS, + use_pandas_type=True + ) + df = enforce_df_column_order( + input_df=df, + column_order=VBO_METADATA_COLUMN_ORDER + ) self._ophys_session_table = df.set_index("ophys_session_id") def get_ophys_session_table(self) -> pd.DataFrame: @@ -214,8 +221,15 @@ def _get_behavior_session_table(self): df["date_of_acquisition"] = pd.to_datetime( df["date_of_acquisition"], utc="True" ) - df = enforce_df_int_typing(df, VBO_INTEGER_COLUMNS, True) - df = enforce_df_column_order(df, VBO_METADATA_COLUMN_ORDER) + df = enforce_df_int_typing( + input_df=df, + int_columns=VBO_INTEGER_COLUMNS, + use_pandas_type=True + ) + df = enforce_df_column_order( + input_df=df, + column_order=VBO_METADATA_COLUMN_ORDER + ) self._behavior_session_table = df.set_index("behavior_session_id") @@ -245,7 +259,15 @@ def _get_ophys_experiment_table(self): df["date_of_acquisition"] = pd.to_datetime( df["date_of_acquisition"], utc="True" ) - df = enforce_df_int_typing(df, VBO_INTEGER_COLUMNS, True) + df = enforce_df_int_typing( + input_df=df, + int_columns=VBO_INTEGER_COLUMNS, + use_pandas_type=True + ) + df = enforce_df_column_order( + input_df=df, + column_order=VBO_METADATA_COLUMN_ORDER + ) self._ophys_experiment_table = df.set_index("ophys_experiment_id") def _get_ophys_cells_table(self): diff --git a/allensdk/brain_observatory/behavior/behavior_project_cache/project_apis/data_io/behavior_project_lims_api.py b/allensdk/brain_observatory/behavior/behavior_project_cache/project_apis/data_io/behavior_project_lims_api.py index 6ec30ed5c..7c540028a 100644 --- a/allensdk/brain_observatory/behavior/behavior_project_cache/project_apis/data_io/behavior_project_lims_api.py +++ b/allensdk/brain_observatory/behavior/behavior_project_cache/project_apis/data_io/behavior_project_lims_api.py @@ -392,8 +392,15 @@ def _get_ophys_experiment_table(self) -> pd.DataFrame: ) targeted_imaging_depth.columns = ["targeted_imaging_depth"] df = query_df.merge(targeted_imaging_depth, on="ophys_container_id") - df = enforce_df_int_typing(df, VBO_INTEGER_COLUMNS) - df = enforce_df_column_order(df, VBO_METADATA_COLUMN_ORDER) + df = enforce_df_int_typing( + input_df=df, + int_columns=VBO_INTEGER_COLUMNS, + use_pandas_type=True + ) + df = enforce_df_column_order( + input_df=df, + column_order=VBO_METADATA_COLUMN_ORDER + ) return df def _get_ophys_cells_table(self): @@ -441,7 +448,11 @@ def _get_ophys_cells_table(self): df = self.lims_engine.select(query) # NaN's for invalid cells force this to float, push to int - df = enforce_df_int_typing(df, VBO_INTEGER_COLUMNS) + df = enforce_df_int_typing( + input_df=df, + int_columns=VBO_INTEGER_COLUMNS, + use_pandas_type=True + ) return df def get_ophys_cells_table(self): @@ -511,8 +522,15 @@ def get_ophys_session_table(self) -> pd.DataFrame: # Fill NaN values of imaging_plane_group_count with zero to match # the behavior of the BehaviorOphysExperiment object. - table = enforce_df_int_typing(table, VBO_INTEGER_COLUMNS) - table = enforce_df_column_order(table, VBO_METADATA_COLUMN_ORDER) + table = enforce_df_int_typing( + input_df=table, + int_columns=VBO_INTEGER_COLUMNS, + use_pandas_type=True + ) + table = enforce_df_column_order( + input_df=table, + column_order=VBO_METADATA_COLUMN_ORDER + ) return table def get_behavior_session( @@ -544,8 +562,15 @@ def get_ophys_experiment_table(self) -> pd.DataFrame: df = self._get_ophys_experiment_table() # Set type to pandas.Int64 to enforce integer typing and not revert to # float. - df = enforce_df_int_typing(df, VBO_INTEGER_COLUMNS) - df = enforce_df_column_order(df, VBO_METADATA_COLUMN_ORDER) + df = enforce_df_int_typing( + input_df=df, + int_columns=VBO_INTEGER_COLUMNS, + use_pandas_type=True + ) + df = enforce_df_column_order( + input_df=df, + column_order=VBO_METADATA_COLUMN_ORDER + ) return df.set_index("ophys_experiment_id") @@ -567,10 +592,14 @@ def get_behavior_session_table(self) -> pd.DataFrame: ) # Query returns float typing of age_in_days. Convert to int to match # typing of the Age data_object. - summary_tbl = enforce_df_int_typing(summary_tbl, VBO_INTEGER_COLUMNS) + summary_tbl = enforce_df_int_typing( + input_df=summary_tbl, + int_columns=VBO_INTEGER_COLUMNS, + use_pandas_type=True + ) summary_tbl = enforce_df_column_order( - summary_tbl, - VBO_METADATA_COLUMN_ORDER + input_df=summary_tbl, + column_order=VBO_METADATA_COLUMN_ORDER ) return summary_tbl.set_index("behavior_session_id") diff --git a/allensdk/brain_observatory/ophys/project_constants.py b/allensdk/brain_observatory/ophys/project_constants.py index 1f365a383..73f42ba34 100644 --- a/allensdk/brain_observatory/ophys/project_constants.py +++ b/allensdk/brain_observatory/ophys/project_constants.py @@ -44,15 +44,18 @@ 'mouse_id', 'indicator', 'full_genotype', 'driver_line', 'cre_line', 'reporter_line', 'sex', 'age_in_days', 'imaging_depth', 'targeted_structure', 'targeted_imaging_depth', - 'imaging_plane_group', 'project_code', 'session_type', - 'session_number', 'image_set', 'behavior_type', 'passive', - 'experience_level', 'prior_exposures_to_session_type', - 'prior_exposures_to_image_set', 'prior_exposures_to_omissions', - 'date_of_acquisition', 'equipment_name', 'published_at', - 'isi_experiment_id'] + 'imaging_plane_group_count', 'imaging_plane_group', + 'project_code', 'session_type', 'session_number', 'image_set', + 'behavior_type', 'passive', 'experience_level', + 'prior_exposures_to_session_type', 'prior_exposures_to_image_set', + 'prior_exposures_to_omissions', 'date_of_acquisition', + 'equipment_name', 'num_depths_per_area', 'ophys_experiment_id', + 'num_targeted_structures', 'published_at', 'isi_experiment_id'] + VBO_INTEGER_COLUMNS = [ "session_number", + "age_in_days", "prior_exposures_to_image_set", "ophys_session_id", "imaging_plane_group_count", diff --git a/allensdk/core/dataframe_utils.py b/allensdk/core/dataframe_utils.py index 8bf63814b..7dffde208 100644 --- a/allensdk/core/dataframe_utils.py +++ b/allensdk/core/dataframe_utils.py @@ -96,7 +96,8 @@ def patch_df_from_other( def enforce_df_column_order( - input_df: pd.DataFrame, column_order: List[str] + input_df: pd.DataFrame, + column_order: List[str] ) -> pd.DataFrame: """Return the data frame but with columns ordered. @@ -128,7 +129,9 @@ def enforce_df_column_order( def enforce_df_int_typing( - input_df: pd.DataFrame, int_columns: List[str], use_pandas_type=False + input_df: pd.DataFrame, + int_columns: List[str], + use_pandas_type: object = False ) -> pd.DataFrame: """Enforce integer typing for columns that may have lost int typing when combined into the final DataFrame.