diff --git a/doc/changes/DM-46430.bugfix.md b/doc/changes/DM-46430.bugfix.md new file mode 100644 index 0000000000..8b95a9a9e1 --- /dev/null +++ b/doc/changes/DM-46430.bugfix.md @@ -0,0 +1 @@ +Fix an issue where `query_datasets` would sometimes fail when searching in a single run collection. diff --git a/python/lsst/daf/butler/registry/datasets/byDimensions/_storage.py b/python/lsst/daf/butler/registry/datasets/byDimensions/_storage.py index 3d3d4586ac..fe38ed42e9 100644 --- a/python/lsst/daf/butler/registry/datasets/byDimensions/_storage.py +++ b/python/lsst/daf/butler/registry/datasets/byDimensions/_storage.py @@ -674,7 +674,13 @@ def _finish_query_builder( only_collection_record = collections[0] sql_projection.joiner.where(collection_col == only_collection_record.key) if "collection" in fields: - fields_provided["collection"] = sqlalchemy.literal(only_collection_record.name) + fields_provided["collection"] = sqlalchemy.literal(only_collection_record.name).cast( + # This cast is necessary to ensure that Postgres knows the + # type of this column if it is used in an aggregate + # function. + sqlalchemy.String + ) + elif not collections: sql_projection.joiner.where(sqlalchemy.literal(False)) if "collection" in fields: @@ -710,7 +716,13 @@ def _finish_query_builder( # know that if we find the dataset in that collection, # then that's the datasets's run; we don't need to # query for it. - fields_provided["run"] = sqlalchemy.literal(only_collection_record.name) + # + fields_provided["run"] = sqlalchemy.literal(only_collection_record.name).cast( + # This cast is necessary to ensure that Postgres knows the + # type of this column if it is used in an aggregate + # function. + sqlalchemy.String + ) elif run_collections_only: # Once again we can avoid joining to the collection table by # adding a CASE statement. diff --git a/python/lsst/daf/butler/tests/butler_queries.py b/python/lsst/daf/butler/tests/butler_queries.py index 1165e4e054..2d3d83f0eb 100644 --- a/python/lsst/daf/butler/tests/butler_queries.py +++ b/python/lsst/daf/butler/tests/butler_queries.py @@ -1819,6 +1819,46 @@ def test_collection_query_info(self) -> None: assert dataset_types is not None self.assertCountEqual(dataset_types, ["flat"]) + def test_dataset_queries(self) -> None: + butler = self.make_butler("base.yaml", "spatial.yaml") + + # Need a dataset with some spatial information to trigger aggregate + # value logic in queries. + butler.registry.registerDatasetType( + DatasetType("dt", ["visit", "detector"], "int", universe=butler.dimensions) + ) + butler.collections.register("run1") + butler.registry.insertDatasets("dt", [{"instrument": "Cam1", "visit": 1, "detector": 1}], "run1") + + # Tests for a regression of DM-46340, where invalid SQL would be + # generated when the list of collections is a single run collection and + # there is region-postprocessing logic involved. This was due to + # missing type information associated with the "run" dataset field. + result = butler.query_datasets( + "dt", + "run1", + where="instrument='Cam1' and skymap='SkyMap1' and visit=1 and tract=0", + with_dimension_records=True, + ) + self.assertEqual(result[0].dataId, {"instrument": "Cam1", "visit": 1, "detector": 1}) + self.assertEqual(result[0].run, "run1") + + # A similar issue to the "run" issue above was occuring with the + # 'collection' dataset field. + with butler.query() as query: + rows = list( + query.join_dataset_search("dt", "run1") + .where("instrument='Cam1' and skymap='SkyMap1' and visit=1 and tract=0") + .general( + dimensions=["visit", "detector"], + dataset_fields={"dt": set(["collection"])}, + find_first=True, + ) + ) + self.assertEqual(len(rows), 1) + self.assertEqual(rows[0]["visit"], 1) + self.assertEqual(rows[0]["dt.collection"], "run1") + def _get_exposure_ids_from_dimension_records(dimension_records: Iterable[DimensionRecord]) -> list[int]: output = []