Move boolean column tests to queries test file

We need roughly the same test for both registry and the new query system, so move it out of the registry tests to a place where we have access to both query systems.
lsst · Aug 9, 2024 · 94456a4 · 94456a4
1 parent 5b4498c
commit 94456a4
Show file tree

Hide file tree

Showing 2 changed files with 64 additions and 42 deletions.
diff --git a/python/lsst/daf/butler/registry/tests/_registry.py b/python/lsst/daf/butler/registry/tests/_registry.py
@@ -4133,45 +4133,3 @@ def test_collection_summary(self) -> None:
         # Note that instrument governor resurrects here, even though there are
         # no datasets left with that governor.
         self.assertEqual(summary.governors, {"instrument": {"Cam1"}, "skymap": {"SkyMap1"}})
-
-    def test_query_where_string_boolean_expressions(self) -> None:
-        """Test that 'where' clauses for queries return the expected results
-        for boolean columns used as expressions.
-        """
-        registry = self.makeRegistry()
-        # Exposure is the only dimension that has boolean columns, and this set
-        # of data has all the pre-requisites for exposure set up.
-        self.loadData(registry, "hsc-rc2-subset.yaml")
-        base_data = {"instrument": "HSC", "physical_filter": "HSC-R", "group": "903342", "day_obs": 20130617}
-
-        TRUE_ID_1 = 1001
-        TRUE_ID_2 = 2001
-        FALSE_ID_1 = 1002
-        FALSE_ID_2 = 2002
-        records = [
-            {"id": TRUE_ID_1, "obs_id": "true-1", "can_see_sky": True},
-            {"id": TRUE_ID_2, "obs_id": "true-2", "can_see_sky": True},
-            {"id": FALSE_ID_1, "obs_id": "false-1", "can_see_sky": False},
-            {"id": FALSE_ID_2, "obs_id": "false-2", "can_see_sky": False},
-            # There is also a record ID 903342 from the YAML file with a NULL
-            # value for can_see_sky.
-        ]
-        for record in records:
-            registry.insertDimensionData("exposure", base_data | record)
-
-        def _run_query(where: str) -> list[str]:
-            result = list(registry.queryDimensionRecords("exposure", where=where, instrument="HSC"))
-            return [x.dataId["exposure"] for x in result]
-
-        # Boolean columns should be usable standalone as an expression.
-        self.assertCountEqual(_run_query("exposure.can_see_sky"), [TRUE_ID_1, TRUE_ID_2])
-
-        # You can find false values in the column with NOT.  The NOT of NULL
-        # is NULL, consistent with SQL semantics -- so records with NULL
-        # can_see_sky are not included here.
-        self.assertCountEqual(_run_query("NOT exposure.can_see_sky"), [FALSE_ID_1, FALSE_ID_2])
-
-        # Make sure the bare column composes with other expressions correctly.
-        self.assertCountEqual(
-            _run_query("exposure.can_see_sky OR exposure = 1002"), [TRUE_ID_1, TRUE_ID_2, FALSE_ID_1]
-        )
diff --git a/python/lsst/daf/butler/tests/butler_queries.py b/python/lsst/daf/butler/tests/butler_queries.py
@@ -932,3 +932,67 @@ def test_column_expressions(self) -> None:
             # Error to reference tract without skymap in a WHERE clause.
             with self.assertRaises(InvalidQueryError):
                 list(query.where(_x.tract == 4).dimension_records("patch"))
+
+    def test_boolean_columns(self) -> None:
+        """Test that boolean columns work as expected when specifying
+        expressions.
+        """
+        # Exposure is the only dimension that has boolean columns, and this set
+        # of data has all the pre-requisites for exposure set up.
+        butler = self.make_butler("hsc-rc2-subset.yaml")
+
+        base_data = {"instrument": "HSC", "physical_filter": "HSC-R", "group": "903342", "day_obs": 20130617}
+
+        TRUE_ID_1 = 1001
+        TRUE_ID_2 = 2001
+        FALSE_ID_1 = 1002
+        FALSE_ID_2 = 2002
+        records = [
+            {"id": TRUE_ID_1, "obs_id": "true-1", "can_see_sky": True},
+            {"id": TRUE_ID_2, "obs_id": "true-2", "can_see_sky": True},
+            {"id": FALSE_ID_1, "obs_id": "false-1", "can_see_sky": False},
+            {"id": FALSE_ID_2, "obs_id": "false-2", "can_see_sky": False},
+            # There is also a record ID 903342 from the YAML file with a NULL
+            # value for can_see_sky.
+        ]
+        for record in records:
+            butler.registry.insertDimensionData("exposure", base_data | record)
+
+        # Go through the registry interface to cover the old query system, too.
+        # This can be removed once the old query system is removed.
+        def _run_registry_query(where: str) -> list[int]:
+            return _get_exposure_ids_from_dimension_records(
+                butler.registry.queryDimensionRecords("exposure", where=where, instrument="HSC")
+            )
+
+        def _run_query(where: str) -> list[int]:
+            with butler._query() as query:
+                return _get_exposure_ids_from_dimension_records(
+                    query.dimension_records("exposure").where(where, instrument="HSC")
+                )
+
+        for test, query_func in [("registry", _run_registry_query), ("new-query", _run_query)]:
+            with self.subTest(test):
+                # Boolean columns should be usable standalone as an expression.
+                self.assertCountEqual(query_func("exposure.can_see_sky"), [TRUE_ID_1, TRUE_ID_2])
+
+                # You can find false values in the column with NOT.  The NOT of
+                # NULL is NULL, consistent with SQL semantics -- so records
+                # with NULL can_see_sky are not included here.
+                self.assertCountEqual(query_func("NOT exposure.can_see_sky"), [FALSE_ID_1, FALSE_ID_2])
+
+                # Make sure the bare column composes with other expressions
+                # correctly.
+                self.assertCountEqual(
+                    query_func("exposure.can_see_sky OR exposure = 1002"), [TRUE_ID_1, TRUE_ID_2, FALSE_ID_1]
+                )
+
+
+def _get_exposure_ids_from_dimension_records(dimension_records: Iterable[DimensionRecord]) -> list[int]:
+    output = []
+    for rec in dimension_records:
+        id = rec.dataId["exposure"]
+        assert isinstance(id, int)
+        output.append(id)
+
+    return output