Merge pull request #2580 from AllenInstitute/fixes_dataframe_indices

Fixes dataframe indices
AllenInstitute · Oct 25, 2022 · 5565a5a · 5565a5a
2 parents 0223f6d + 528fbed
commit 5565a5a
Show file tree

Hide file tree

Showing 6 changed files with 154 additions and 120 deletions.
diff --git a/allensdk/brain_observatory/ecephys/optotagging.py b/allensdk/brain_observatory/ecephys/optotagging.py
@@ -15,7 +15,7 @@ def __init__(self, table: pd.DataFrame):
         # pre-processed optotagging_table may use.
         table = \
             table.rename(columns={"name": "stimulus_name"})
-
+        table.index = table.index.rename('id')
         super().__init__(name='optotaggging_table', value=table)
 
     @property

diff --git a/allensdk/test/brain_observatory/ecephys/test_ecephys_project_lims_api.py b/allensdk/test/brain_observatory/ecephys/test_ecephys_project_lims_api.py
@@ -1,4 +1,3 @@
-import os
 import re
 from unittest import mock
 
@@ -39,7 +38,7 @@ def __call__(self, query, *args, **kwargs):
             "no_pa_check": lambda st: "published_at" not in st
         },
         pd.DataFrame(
-            {"something": [12, 14]}, 
+            {"something": [12, 14]},
             index=pd.Index(name="id", data=[5, 6])
         )
     ],
@@ -48,10 +47,11 @@ def __call__(self, query, *args, **kwargs):
         {"session_ids": [1, 2, 3]},
         pd.DataFrame({"id": [5, 6], "something": [12, 14]}),
         {
-            "filters_sessions": lambda st: re.compile(r".+and es.id in \(1,2,3\).*", re.DOTALL).match(st) is not None
+            "filters_sessions": lambda st: re.compile(
+                r".+and es.id in \(1,2,3\).*", re.DOTALL).match(st) is not None
         },
         pd.DataFrame(
-            {"something": [12, 14]}, 
+            {"something": [12, 14]},
             index=pd.Index(name="id", data=[5, 6])
         )
     ],
@@ -60,10 +60,11 @@ def __call__(self, query, *args, **kwargs):
         {"unit_ids": [1, 2, 3]},
         pd.DataFrame({"id": [5, 6], "something": [12, 14]}),
         {
-            "filters_units": lambda st: re.compile(r".+and eu.id in \(1,2,3\).*", re.DOTALL).match(st) is not None
+            "filters_units": lambda st: re.compile(
+                r".+and eu.id in \(1,2,3\).*", re.DOTALL).match(st) is not None
         },
         pd.DataFrame(
-            {"something": [12, 14]}, 
+            {"something": [12, 14]},
             index=pd.Index(name="id", data=[5, 6])
         )
     ],
@@ -72,11 +73,14 @@ def __call__(self, query, *args, **kwargs):
         {"channel_ids": [1, 2, 3], "probe_ids": [4, 5, 6]},
         pd.DataFrame({"id": [5, 6], "something": [12, 14]}),
         {
-            "filters_channels": lambda st: re.compile(r".+and ec.id in \(1,2,3\).*", re.DOTALL).match(st) is not None,
-            "filters_probes": lambda st: re.compile(r".+and ep.id in \(4,5,6\).*", re.DOTALL).match(st) is not None
+            "filters_channels": lambda st: re.compile(
+                r".+and ec.id in \(1,2,3\).*", re.DOTALL).match(
+                st) is not None,
+            "filters_probes": lambda st: re.compile(
+                r".+and ep.id in \(4,5,6\).*", re.DOTALL).match(st) is not None
         },
         pd.DataFrame(
-            {"something": [12, 14]}, 
+            {"something": [12, 14]},
             index=pd.Index(name="id", data=[5, 6])
         )
     ],
@@ -85,11 +89,15 @@ def __call__(self, query, *args, **kwargs):
         {"published_at": "2019-10-22"},
         pd.DataFrame({"id": [5, 6], "something": [12, 14]}),
         {
-            "checks_pa_not_null": lambda st: re.compile(r".+and es.published_at is not null.*", re.DOTALL).match(st) is not None,
-            "checks_pa": lambda st: re.compile(r".+and es.published_at <= '2019-10-22'.*", re.DOTALL).match(st) is not None
+            "checks_pa_not_null": lambda st: re.compile(
+                r".+and es.published_at is not null.*", re.DOTALL).match(
+                st) is not None,
+            "checks_pa": lambda st: re.compile(
+                r".+and es.published_at <= '2019-10-22'.*", re.DOTALL).match(
+                st) is not None
         },
         pd.DataFrame(
-            {"something": [12, 14]}, 
+            {"something": [12, 14]},
             index=pd.Index(name="id", data=[5, 6])
         )
     ],
@@ -98,12 +106,17 @@ def __call__(self, query, *args, **kwargs):
         {"published_at": "2019-10-22", "session_ids": [1, 2, 3]},
         pd.DataFrame({"id": [5, 6], "something": [12, 14]}),
         {
-            "checks_pa_not_null": lambda st: re.compile(r".+and es.published_at is not null.*", re.DOTALL).match(st) is not None,
-            "checks_pa": lambda st: re.compile(r".+and es.published_at <= '2019-10-22'.*", re.DOTALL).match(st) is not None,
-            "filters_sessions": lambda st: re.compile(r".+and es.id in \(1,2,3\).*", re.DOTALL).match(st) is not None
+            "checks_pa_not_null": lambda st: re.compile(
+                r".+and es.published_at is not null.*", re.DOTALL).match(
+                st) is not None,
+            "checks_pa": lambda st: re.compile(
+                r".+and es.published_at <= '2019-10-22'.*", re.DOTALL).match(
+                st) is not None,
+            "filters_sessions": lambda st: re.compile(
+                r".+and es.id in \(1,2,3\).*", re.DOTALL).match(st) is not None
         },
         pd.DataFrame(
-            {"something": [12, 14]}, 
+            {"something": [12, 14]},
             index=pd.Index(name="id", data=[5, 6])
         )
     ],
@@ -112,63 +125,81 @@ def __call__(self, query, *args, **kwargs):
         {"published_at": "2019-10-22", "session_ids": [1, 2, 3]},
         pd.DataFrame({"id": [5, 6], "something": [12, 14]}),
         {
-            "checks_pa_not_null": lambda st: re.compile(r".+and es.published_at is not null.*", re.DOTALL).match(st) is not None,
-            "checks_pa": lambda st: re.compile(r".+and es.published_at <= '2019-10-22'.*", re.DOTALL).match(st) is not None,
-            "filters_sessions": lambda st: re.compile(r".+and es.id in \(1,2,3\).*", re.DOTALL).match(st) is not None
+            "checks_pa_not_null": lambda st: re.compile(
+                r".+and es.published_at is not null.*", re.DOTALL).match(
+                st) is not None,
+            "checks_pa": lambda st: re.compile(
+                r".+and es.published_at <= '2019-10-22'.*", re.DOTALL).match(
+                st) is not None,
+            "filters_sessions": lambda st: re.compile(
+                r".+and es.id in \(1,2,3\).*", re.DOTALL).match(st) is not None
         },
         pd.DataFrame(
-            {"something": [12, 14]}, 
+            {"something": [12, 14]},
             index=pd.Index(name="id", data=[5, 6])
         )
     ],
     [
         "get_sessions",
         {"published_at": "2019-10-22", "session_ids": [1, 2, 3]},
-        pd.DataFrame({"id": [5, 6], "something": [12, 14], "genotype": ["foo", np.nan]}),
+        pd.DataFrame({"id": [5, 6], "something": [12, 14],
+                      "genotype": ["foo", np.nan]}),
         {
-            "checks_pa_not_null": lambda st: re.compile(r".+and es.published_at is not null.*", re.DOTALL).match(st) is not None,
-            "checks_pa": lambda st: re.compile(r".+and es.published_at <= '2019-10-22'.*", re.DOTALL).match(st) is not None,
-            "filters_sessions": lambda st: re.compile(r".+and es.id in \(1,2,3\).*", re.DOTALL).match(st) is not None
+            "checks_pa_not_null": lambda st: re.compile(
+                r".+and es.published_at is not null.*", re.DOTALL).match(
+                st) is not None,
+            "checks_pa": lambda st: re.compile(
+                r".+and es.published_at <= '2019-10-22'.*", re.DOTALL).match(
+                st) is not None,
+            "filters_sessions": lambda st: re.compile(
+                r".+and es.id in \(1,2,3\).*", re.DOTALL).match(st) is not None
         },
         pd.DataFrame(
-            {"something": [12, 14], "genotype": ["foo", "wt"]}, 
+            {"something": [12, 14], "genotype": ["foo", "wt"]},
             index=pd.Index(name="id", data=[5, 6])
         )
     ],
     [
         "get_unit_analysis_metrics",
         {"ecephys_session_ids": [1, 2, 3]},
-        pd.DataFrame({"id": [5, 6], "data": [{"a": 1, "b": 2}, {"a": 3, "b": 4}], "ecephys_unit_id": [10, 11]}),
+        pd.DataFrame(
+            {"id": [5, 6], "data": [{"a": 1, "b": 2}, {"a": 3, "b": 4}],
+             "ecephys_unit_id": [10, 11]}),
         {
-            "filters_sessions": lambda st: re.compile(r".+and es.id in \(1,2,3\).*", re.DOTALL).match(st) is not None
+            "filters_sessions": lambda st: re.compile(
+                r".+and es.id in \(1,2,3\).*", re.DOTALL).match(st) is not None
         },
         pd.DataFrame(
-            {"id": [5, 6], "a": [1, 3], "b": [2, 4]}, 
-            index=pd.Index(name="iecephys_unit_id", data=[10, 11])
+            {"id": [5, 6], "a": [1, 3], "b": [2, 4]},
+            index=pd.Index(name="ecephys_unit_id", data=[10, 11])
         )
     ]
 ])
 def test_pg_query(method_name, kwargs, response, checks, expected):
-
     selector = MockSelector(checks, response)
 
-    with mock.patch("allensdk.internal.api.psycopg2_select", new=selector) as ptc:
-        api = epla.EcephysProjectLimsApi.default(lims_credentials=mock_lims_credentials)
+    with mock.patch("allensdk.internal.api.psycopg2_select",
+                    new=selector) as ptc:
+        api = epla.EcephysProjectLimsApi.default(
+            lims_credentials=mock_lims_credentials)
         obtained = getattr(api, method_name)(**kwargs)
-        pd.testing.assert_frame_equal(expected, obtained, check_like=True, check_dtype=False)
+        pd.testing.assert_frame_equal(expected, obtained, check_like=True,
+                                      check_dtype=False)
 
         any_checks_failed = False
         for name, result in ptc.passed.items():
             if not result:
                 print(f"check {name} failed")
                 any_checks_failed = True
-        
+
         if any_checks_failed:
             print(ptc.query)
         assert not any_checks_failed
 
 
 WKF_ID = 12345
+
+
 class MockPgEngine:
 
     def __init__(self, query_pattern):
@@ -220,8 +251,8 @@ def stream(self, url):
     ]
 ])
 def test_file_getter(method, kwargs, query_pattern, pg_engine_cls):
-
     api = epla.EcephysProjectLimsApi(
-        postgres_engine=pg_engine_cls(query_pattern), app_engine=MockHttpEngine()
+        postgres_engine=pg_engine_cls(query_pattern),
+        app_engine=MockHttpEngine()
     )
-    getattr(api, method)(**kwargs)
+    getattr(api, method)(**kwargs)
diff --git a/allensdk/test/brain_observatory/ecephys/test_ecephys_session.py b/allensdk/test/brain_observatory/ecephys/test_ecephys_session.py
@@ -300,7 +300,7 @@ def test_get_stimulus_presentations(valid_stimulus_table_api):
         "stimulus_name": ['invalid_presentation',
                           'invalid_presentation', 'a', 'a_movie'],
         "phase": [np.nan, np.nan, 120.0, 180.0]
-    }, index=pd.Index(name='stimulus_presentations_id', data=[0, 1, 2, 3]))
+    }, index=pd.Index(name='stimulus_presentation_id', data=[0, 1, 2, 3]))
 
     session = EcephysSession(api=valid_stimulus_table_api)
     obtained = session.stimulus_presentations[["start_time",
@@ -323,7 +323,7 @@ def test_get_stimulus_presentations_no_invalid_times(just_stim_table_api):
         "stop_time": [1/2, 1, 3/2, 2],
         'stimulus_name': ['a', 'a', 'a', 'a_movie'],
 
-    }, index=pd.Index(name='stimulus_presentations_id', data=[0, 1, 2, 3]))
+    }, index=pd.Index(name='stimulus_presentation_id', data=[0, 1, 2, 3]))
 
     session = EcephysSession(api=just_stim_table_api)
 
@@ -468,8 +468,8 @@ def test_empty_presentationwise_spike_times(spike_times_api):
             session.stimulus_presentations.index.values,
             session.units.index.values)
 
-    assert(isinstance(obtained, pd.DataFrame))
-    assert(obtained.empty)
+    assert isinstance(obtained, pd.DataFrame)
+    assert obtained.empty
 
 
 def test_conditionwise_spike_statistics(spike_times_api):
@@ -503,11 +503,11 @@ def test_empty_conditionwise_spike_statistics(spike_times_api):
         stimulus_presentation_ids=session.stimulus_presentations.index.values,
         unit_ids=session.units.index.values
     )
-    assert(len(obtained) == 12)
-    assert(not np.any(obtained['spike_count']))  # check all spike_counts are 0
-    assert(not np.any(obtained['spike_mean']))  # spike_means are 0
-    assert(np.all(np.isnan(obtained['spike_std'])))  # std/sem is undefined
-    assert(np.all(np.isnan(obtained['spike_sem'])))
+    assert len(obtained) == 12
+    assert not np.any(obtained['spike_count'])  # check all spike_counts are 0
+    assert not np.any(obtained['spike_mean'])  # spike_means are 0
+    assert np.all(np.isnan(obtained['spike_std']))  # std/sem is undefined
+    assert np.all(np.isnan(obtained['spike_sem']))
 
 
 def test_get_stimulus_parameter_values(just_stim_table_api):

diff --git a/allensdk/test/brain_observatory/ecephys/test_write_nwb.py b/allensdk/test/brain_observatory/ecephys/test_write_nwb.py
@@ -215,7 +215,8 @@ def test_add_stimulus_presentations_color(
                    "level": [10., 9., 8., 7.],
                    "condition": ["a", "a", "b", "c"],
                    "stimulus_name": ["w", "x", "y", "z"],
-                   "duration": [0.5, 0.5, 0.5, 0.5]})),
+                   "duration": [0.5, 0.5, 0.5, 0.5]},
+                  index=pd.Index(name="id", data=[0, 1, 2, 3]))),
 
     (pd.DataFrame({"start_time": [0., 1., 2., 3.],
                    "stop_time": [0.5, 1.5, 2.5, 3.5],
@@ -239,6 +240,7 @@ def test_add_optotagging_table_to_nwbfile(
 
     if expected is None:
         expected = opto_table.value
+        expected.index.name = 'id'
 
     pd.testing.assert_frame_equal(obtained, expected, check_like=True)
 
@@ -1243,7 +1245,8 @@ def test_add_eye_tracking_rig_geometry_data_to_nwbfile(nwbfile,
                       "eye_height": [6.] * 5,
                       "eye_width": [6.] * 5,
                       "eye_phi": [3.] * 5},
-                     index=[3., 4., 5., 6., 7.]),
+                     index=pd.Index(name="Time (s)",
+                                    data=[3., 4., 5., 6., 7.])),
         # expected_gaze_data
         pd.DataFrame({"raw_eye_area": [3., 5., 7., 9., 11.],
                       "raw_pupil_area": [2., 4., 6., 8., 10.],
@@ -1281,7 +1284,8 @@ def test_add_eye_tracking_rig_geometry_data_to_nwbfile(nwbfile,
                                                                       np.nan,
                                                                       8.,
                                                                       10.]},
-                     index=[3., 4., 5., 6., 7.])
+                     index=pd.Index(name="Time (s)",
+                                    data=[3., 4., 5., 6., 7.]))
     ),
 ])
 def test_add_eye_tracking_data_to_nwbfile(nwbfile,