Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-40184: fix rare bug in QG generation with warps as an overall-input #871

Merged
merged 2 commits into from
Jul 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/changes/DM-40184.bugfix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fix a rare bug in follow-up dataset queries involving relation commutators.

This occurred when building QuantumGraphs where a "warp" dataset type was an overall input to the pipeline and present in more than one input RUN collection.
23 changes: 22 additions & 1 deletion python/lsst/daf/butler/registry/queries/find_first_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from collections.abc import Sequence, Set
from typing import final

from lsst.daf.relation import ColumnTag, Relation, RowFilter
from lsst.daf.relation import ColumnTag, Relation, RowFilter, UnaryCommutator, UnaryOperationRelation
from lsst.utils.classes import cached_getter

from ...core import DatasetColumnTag, DimensionKeyColumnTag
Expand Down Expand Up @@ -73,3 +73,24 @@
def applied_min_rows(self, target: Relation) -> int:
# Docstring inherited.
return 1 if target.min_rows else 0

def commute(self, current: UnaryOperationRelation) -> UnaryCommutator:
# Docstring inherited.
if not self.columns_required <= current.target.columns:
return UnaryCommutator(

Check warning on line 80 in python/lsst/daf/butler/registry/queries/find_first_dataset.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/registry/queries/find_first_dataset.py#L80

Added line #L80 was not covered by tests
first=None,
second=current.operation,
done=False,
messages=(
f"{current.target} is missing columns "
f"{set(self.columns_required - current.target.columns)}",
),
)
if current.operation.is_count_dependent:
return UnaryCommutator(

Check warning on line 90 in python/lsst/daf/butler/registry/queries/find_first_dataset.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/registry/queries/find_first_dataset.py#L90

Added line #L90 was not covered by tests
first=None,
second=current.operation,
done=False,
messages=(f"{current.operation} is count-dependent",),
)
return UnaryCommutator(self, current.operation)
46 changes: 46 additions & 0 deletions python/lsst/daf/butler/registry/tests/_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -3428,6 +3428,52 @@ def pop_transfer(tree: Relation) -> Relation:
sql.Engine,
)

def test_query_find_datasets_drop_postprocessing(self) -> None:
"""Test that DataCoordinateQueryResults.findDatasets avoids commutator
problems with the FindFirstDataset relation operation.
"""
# Setup: load some visit, tract, and patch records, and insert two
# datasets with dimensions {visit, patch}, with one in each of two
# RUN collections.
registry = self.makeRegistry()
self.loadData(registry, "base.yaml")
self.loadData(registry, "spatial.yaml")
storage_class = StorageClass("Warpy")
registry.storageClasses.registerStorageClass(storage_class)
dataset_type = DatasetType(
"warp", {"visit", "patch"}, storageClass=storage_class, universe=registry.dimensions
)
registry.registerDatasetType(dataset_type)
(data_id,) = registry.queryDataIds(["visit", "patch"]).limit(1)
registry.registerRun("run1")
registry.registerRun("run2")
(ref1,) = registry.insertDatasets(dataset_type, [data_id], run="run1")
(ref2,) = registry.insertDatasets(dataset_type, [data_id], run="run2")
# Query for the dataset using queryDataIds(...).findDatasets(...)
# against only one of the two collections. This should work even
# though the relation returned by queryDataIds ends with
# iteration-engine region-filtering, because we can recognize before
# running the query that there is only one collecton to search and
# hence the (default) findFirst=True is irrelevant, and joining in the
# dataset query commutes past the iteration-engine postprocessing.
query1 = registry.queryDataIds(
{"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"]
)
self.assertEqual(
set(query1.findDatasets(dataset_type.name, collections=["run1"])),
{ref1},
)
# Query for the dataset using queryDataIds(...).findDatasets(...)
# against both collections. This can only work if the FindFirstDataset
# operation can be commuted past the iteration-engine options into SQL.
query2 = registry.queryDataIds(
{"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"]
)
self.assertEqual(
set(query2.findDatasets(dataset_type.name, collections=["run2", "run1"])),
{ref2},
)

def test_query_empty_collections(self) -> None:
"""Test for registry query methods with empty collections. The methods
should return empty result set (or None when applicable) and provide
Expand Down
Loading