Skip to content

Commit

Permalink
Move boolean column tests to queries test file
Browse files Browse the repository at this point in the history
We need roughly the same test for both registry and the new query system, so move it out of the registry tests to a place where we have access to both query systems.
  • Loading branch information
dhirving committed Aug 9, 2024
1 parent 5b4498c commit 94456a4
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 42 deletions.
42 changes: 0 additions & 42 deletions python/lsst/daf/butler/registry/tests/_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -4133,45 +4133,3 @@ def test_collection_summary(self) -> None:
# Note that instrument governor resurrects here, even though there are
# no datasets left with that governor.
self.assertEqual(summary.governors, {"instrument": {"Cam1"}, "skymap": {"SkyMap1"}})

def test_query_where_string_boolean_expressions(self) -> None:
"""Test that 'where' clauses for queries return the expected results
for boolean columns used as expressions.
"""
registry = self.makeRegistry()
# Exposure is the only dimension that has boolean columns, and this set
# of data has all the pre-requisites for exposure set up.
self.loadData(registry, "hsc-rc2-subset.yaml")
base_data = {"instrument": "HSC", "physical_filter": "HSC-R", "group": "903342", "day_obs": 20130617}

TRUE_ID_1 = 1001
TRUE_ID_2 = 2001
FALSE_ID_1 = 1002
FALSE_ID_2 = 2002
records = [
{"id": TRUE_ID_1, "obs_id": "true-1", "can_see_sky": True},
{"id": TRUE_ID_2, "obs_id": "true-2", "can_see_sky": True},
{"id": FALSE_ID_1, "obs_id": "false-1", "can_see_sky": False},
{"id": FALSE_ID_2, "obs_id": "false-2", "can_see_sky": False},
# There is also a record ID 903342 from the YAML file with a NULL
# value for can_see_sky.
]
for record in records:
registry.insertDimensionData("exposure", base_data | record)

def _run_query(where: str) -> list[str]:
result = list(registry.queryDimensionRecords("exposure", where=where, instrument="HSC"))
return [x.dataId["exposure"] for x in result]

# Boolean columns should be usable standalone as an expression.
self.assertCountEqual(_run_query("exposure.can_see_sky"), [TRUE_ID_1, TRUE_ID_2])

# You can find false values in the column with NOT. The NOT of NULL
# is NULL, consistent with SQL semantics -- so records with NULL
# can_see_sky are not included here.
self.assertCountEqual(_run_query("NOT exposure.can_see_sky"), [FALSE_ID_1, FALSE_ID_2])

# Make sure the bare column composes with other expressions correctly.
self.assertCountEqual(
_run_query("exposure.can_see_sky OR exposure = 1002"), [TRUE_ID_1, TRUE_ID_2, FALSE_ID_1]
)
64 changes: 64 additions & 0 deletions python/lsst/daf/butler/tests/butler_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -932,3 +932,67 @@ def test_column_expressions(self) -> None:
# Error to reference tract without skymap in a WHERE clause.
with self.assertRaises(InvalidQueryError):
list(query.where(_x.tract == 4).dimension_records("patch"))

def test_boolean_columns(self) -> None:
"""Test that boolean columns work as expected when specifying
expressions.
"""
# Exposure is the only dimension that has boolean columns, and this set
# of data has all the pre-requisites for exposure set up.
butler = self.make_butler("hsc-rc2-subset.yaml")

base_data = {"instrument": "HSC", "physical_filter": "HSC-R", "group": "903342", "day_obs": 20130617}

TRUE_ID_1 = 1001
TRUE_ID_2 = 2001
FALSE_ID_1 = 1002
FALSE_ID_2 = 2002
records = [
{"id": TRUE_ID_1, "obs_id": "true-1", "can_see_sky": True},
{"id": TRUE_ID_2, "obs_id": "true-2", "can_see_sky": True},
{"id": FALSE_ID_1, "obs_id": "false-1", "can_see_sky": False},
{"id": FALSE_ID_2, "obs_id": "false-2", "can_see_sky": False},
# There is also a record ID 903342 from the YAML file with a NULL
# value for can_see_sky.
]
for record in records:
butler.registry.insertDimensionData("exposure", base_data | record)

# Go through the registry interface to cover the old query system, too.
# This can be removed once the old query system is removed.
def _run_registry_query(where: str) -> list[int]:
return _get_exposure_ids_from_dimension_records(
butler.registry.queryDimensionRecords("exposure", where=where, instrument="HSC")
)

def _run_query(where: str) -> list[int]:
with butler._query() as query:
return _get_exposure_ids_from_dimension_records(
query.dimension_records("exposure").where(where, instrument="HSC")
)

for test, query_func in [("registry", _run_registry_query), ("new-query", _run_query)]:
with self.subTest(test):
# Boolean columns should be usable standalone as an expression.
self.assertCountEqual(query_func("exposure.can_see_sky"), [TRUE_ID_1, TRUE_ID_2])

# You can find false values in the column with NOT. The NOT of
# NULL is NULL, consistent with SQL semantics -- so records
# with NULL can_see_sky are not included here.
self.assertCountEqual(query_func("NOT exposure.can_see_sky"), [FALSE_ID_1, FALSE_ID_2])

# Make sure the bare column composes with other expressions
# correctly.
self.assertCountEqual(
query_func("exposure.can_see_sky OR exposure = 1002"), [TRUE_ID_1, TRUE_ID_2, FALSE_ID_1]
)


def _get_exposure_ids_from_dimension_records(dimension_records: Iterable[DimensionRecord]) -> list[int]:
output = []
for rec in dimension_records:
id = rec.dataId["exposure"]
assert isinstance(id, int)
output.append(id)

return output

0 comments on commit 94456a4

Please sign in to comment.