Skip to content

Commit

Permalink
Fix summary_datasets being ignored
Browse files Browse the repository at this point in the history
When attempting to add a test for the summary_datasets parameter to ButlerCollectionInfo for RemoteButler, it turned out that it does not doing anything in the DirectButler version.

This was occurring because a caching context causes this parameter to be ignored, and the cache was always enabled in query_info previously.  This cache does not have a benefit for the new implementation.
  • Loading branch information
dhirving committed Sep 10, 2024
1 parent 8c3ca9c commit cc654d1
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 44 deletions.
87 changes: 43 additions & 44 deletions python/lsst/daf/butler/direct_butler/_direct_butler_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,52 +117,51 @@ def query_info(
summary_datasets: Iterable[DatasetType] | Iterable[str] | None = None,
) -> Sequence[CollectionInfo]:
info = []
with self._registry.caching_context():
if collection_types is None:
collection_types = CollectionType.all()
elif isinstance(collection_types, CollectionType):
collection_types = {collection_types}

records = self._registry._managers.collections.resolve_wildcard(
CollectionWildcard.from_expression(expression),
collection_types=collection_types,
flatten_chains=flatten_chains,
include_chains=include_chains,
)
if collection_types is None:
collection_types = CollectionType.all()
elif isinstance(collection_types, CollectionType):
collection_types = {collection_types}

records = self._registry._managers.collections.resolve_wildcard(
CollectionWildcard.from_expression(expression),
collection_types=collection_types,
flatten_chains=flatten_chains,
include_chains=include_chains,
)

summaries: Mapping[Any, CollectionSummary] = {}
if include_summary:
summaries = self._registry._managers.datasets.fetch_summaries(records, summary_datasets)

docs: Mapping[Any, str] = {}
if include_doc:
docs = self._registry._managers.collections.get_docs(record.key for record in records)

for record in records:
doc = docs.get(record.key, "")
children: tuple[str, ...] = tuple()
if record.type == CollectionType.CHAINED:
assert isinstance(record, ChainedCollectionRecord)
children = tuple(record.children)
parents: frozenset[str] | None = None
if include_parents:
# TODO: This is non-vectorized, so expensive to do in a
# loop.
parents = frozenset(self._registry.getCollectionParentChains(record.name))
dataset_types: Set[str] | None = None
if summary := summaries.get(record.key):
dataset_types = frozenset([dt.name for dt in summary.dataset_types])

info.append(
CollectionInfo(
name=record.name,
type=record.type,
doc=doc,
parents=parents,
children=children,
dataset_types=dataset_types,
)
summaries: Mapping[Any, CollectionSummary] = {}
if include_summary:
summaries = self._registry._managers.datasets.fetch_summaries(records, summary_datasets)

docs: Mapping[Any, str] = {}
if include_doc:
docs = self._registry._managers.collections.get_docs(record.key for record in records)

for record in records:
doc = docs.get(record.key, "")
children: tuple[str, ...] = tuple()
if record.type == CollectionType.CHAINED:
assert isinstance(record, ChainedCollectionRecord)
children = tuple(record.children)
parents: frozenset[str] | None = None
if include_parents:
# TODO: This is non-vectorized, so expensive to do in a
# loop.
parents = frozenset(self._registry.getCollectionParentChains(record.name))
dataset_types: Set[str] | None = None
if summary := summaries.get(record.key):
dataset_types = frozenset([dt.name for dt in summary.dataset_types])

info.append(
CollectionInfo(
name=record.name,
type=record.type,
doc=doc,
parents=parents,
children=children,
dataset_types=dataset_types,
)
)

return info

Expand Down
16 changes: 16 additions & 0 deletions python/lsst/daf/butler/tests/butler_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -1804,6 +1804,22 @@ def test_calibration_join_queries(self) -> None:
],
)

# summary_datasets parameter is currently broken in DirectButler.
def test_collection_query_info(self) -> None:
butler = self.make_butler("base.yaml", "datasets.yaml")

info = butler.collections.query_info("imported_g", include_summary=True)
self.assertEqual(len(info), 1)
dataset_types = info[0].dataset_types
assert dataset_types is not None
self.assertCountEqual(dataset_types, ["flat", "bias"])

info = butler.collections.query_info("imported_g", include_summary=True, summary_datasets=["flat"])
self.assertEqual(len(info), 1)
dataset_types = info[0].dataset_types
assert dataset_types is not None
self.assertCountEqual(dataset_types, ["flat"])


def _get_exposure_ids_from_dimension_records(dimension_records: Iterable[DimensionRecord]) -> list[int]:
output = []
Expand Down

0 comments on commit cc654d1

Please sign in to comment.