diff --git a/python/lsst/daf/butler/_butler_collections.py b/python/lsst/daf/butler/_butler_collections.py index 9c9ff6a8bf..75de1d3b49 100644 --- a/python/lsst/daf/butler/_butler_collections.py +++ b/python/lsst/daf/butler/_butler_collections.py @@ -210,7 +210,6 @@ def remove_from_chain( """ raise NotImplementedError() - @abstractmethod def x_query( self, expression: str | Iterable[str], @@ -220,6 +219,8 @@ def x_query( ) -> Sequence[str]: """Query the butler for collections matching an expression. + **This is an experimental interface that can change at any time.** + Parameters ---------- expression : `str` or `~collections.abc.Iterable` [ `str` ] @@ -240,6 +241,61 @@ def x_query( collections : `~collections.abc.Sequence` [ `str` ] The names of collections that match ``expression``. + Notes + ----- + The order in which collections are returned is unspecified, except that + the children of a `~CollectionType.CHAINED` collection are guaranteed + to be in the order in which they are searched. When multiple parent + `~CollectionType.CHAINED` collections match the same criteria, the + order in which the two lists appear is unspecified, and the lists of + children may be incomplete if a child has multiple parents. + + The default implementation is a wrapper around `x_query_info`. + """ + collections_info = self.x_query_info( + expression, + collection_types=collection_types, + flatten_chains=flatten_chains, + include_chains=include_chains, + ) + return [info.name for info in collections_info] + + @abstractmethod + def x_query_info( + self, + expression: str | Iterable[str], + collection_types: Set[CollectionType] | CollectionType | None = None, + flatten_chains: bool = False, + include_chains: bool | None = None, + include_parents: bool = False, + ) -> Sequence[CollectionInfo]: + """Query the butler for collections matching an expression and + return detailed information about those collections. + + **This is an experimental interface that can change at any time.** + + Parameters + ---------- + expression : `str` or `~collections.abc.Iterable` [ `str` ] + One or more collection names or globs to include in the search. + collection_types : `set` [`CollectionType`], `CollectionType` or `None` + Restrict the types of collections to be searched. If `None` all + collection types are searched. + flatten_chains : `bool`, optional + If `True` (`False` is default), recursively yield the child + collections of matching `~CollectionType.CHAINED` collections. + include_chains : `bool` or `None`, optional + If `True`, yield records for matching `~CollectionType.CHAINED` + collections. Default is the opposite of ``flatten_chains``: + include either CHAINED collections or their children, but not both. + include_parents : `bool`, optional + Whether the returned information includes parents. + + Returns + ------- + collections : `~collections.abc.Sequence` [ `CollectionInfo` ] + The names of collections that match ``expression``. + Notes ----- The order in which collections are returned is unspecified, except that @@ -302,6 +358,8 @@ def register(self, name: str, type: CollectionType = CollectionType.RUN, doc: st def x_remove(self, name: str) -> None: """Remove the given collection from the registry. + **This is an experimental interface that can change at any time.** + Parameters ---------- name : `str` diff --git a/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py b/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py index ab4b0665bc..10b438c824 100644 --- a/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py +++ b/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py @@ -88,6 +88,8 @@ def x_query( ) -> Sequence[str]: if collection_types is None: collection_types = CollectionType.all() + # Do not use base implementation for now to avoid the additional + # unused queries. return self._registry.queryCollections( expression, collectionTypes=collection_types, @@ -95,6 +97,27 @@ def x_query( includeChains=include_chains, ) + def x_query_info( + self, + expression: str | Iterable[str], + collection_types: Set[CollectionType] | CollectionType | None = None, + flatten_chains: bool = False, + include_chains: bool | None = None, + include_parents: bool = False, + ) -> Sequence[CollectionInfo]: + info = [] + with self._registry.caching_context(): + if collection_types is None: + collection_types = CollectionType.all() + for name in self._registry.queryCollections( + expression, + collectionTypes=collection_types, + flattenChains=flatten_chains, + includeChains=include_chains, + ): + info.append(self.get_info(name, include_parents=include_parents)) + return info + def get_info(self, name: str, include_doc: bool = False, include_parents: bool = False) -> CollectionInfo: record = self._registry.get_collection_record(name) doc = "" diff --git a/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py b/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py index 3d052b9aa7..1e62d9c3e9 100644 --- a/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py +++ b/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py @@ -71,21 +71,27 @@ def remove_from_chain( ) -> None: raise NotImplementedError("Not yet available") - def x_query( + def x_query_info( self, expression: str | Iterable[str], collection_types: Set[CollectionType] | CollectionType | None = None, flatten_chains: bool = False, include_chains: bool | None = None, - ) -> Sequence[str]: + include_parents: bool = False, + ) -> Sequence[CollectionInfo]: + # This should become a single call on the server in the future. if collection_types is None: collection_types = CollectionType.all() - return self._registry.queryCollections( + + info = [] + for name in self._registry.queryCollections( expression, collectionTypes=collection_types, flattenChains=flatten_chains, includeChains=include_chains, - ) + ): + info.append(self.get_info(name, include_parents=include_parents)) + return info def get_info(self, name: str, include_doc: bool = False, include_parents: bool = False) -> CollectionInfo: info = self._registry._get_collection_info( diff --git a/python/lsst/daf/butler/script/exportCalibs.py b/python/lsst/daf/butler/script/exportCalibs.py index cddd0f3cfd..4581082d19 100644 --- a/python/lsst/daf/butler/script/exportCalibs.py +++ b/python/lsst/daf/butler/script/exportCalibs.py @@ -35,6 +35,7 @@ from astropy.table import Table from .._butler import Butler +from .._butler_collections import CollectionInfo from .._collection_type import CollectionType if TYPE_CHECKING: @@ -44,7 +45,7 @@ def find_calibration_datasets( - butler: Butler, collection: str, datasetTypes: Iterable[DatasetType] + butler: Butler, collection: CollectionInfo, datasetTypes: Iterable[DatasetType] ) -> list[DatasetRef]: """Search a calibration collection for calibration datasets. @@ -52,7 +53,7 @@ def find_calibration_datasets( ---------- butler : `lsst.daf.butler.Butler` Butler to use. - collection : `str` + collection : `CollectionInfo` Collection to search. This should be a CALIBRATION collection. datasetTypes : `list` [`lsst.daf.Butler.DatasetType`] @@ -68,18 +69,18 @@ def find_calibration_datasets( RuntimeError Raised if the collection to search is not a CALIBRATION collection. """ - if butler.collections.get_info(collection).type != CollectionType.CALIBRATION: - raise RuntimeError(f"Collection {collection} is not a CALIBRATION collection.") + if collection.type != CollectionType.CALIBRATION: + raise RuntimeError(f"Collection {collection.name} is not a CALIBRATION collection.") exportDatasets = [] for calibType in datasetTypes: with butler._query() as query: - results = query.datasets(calibType, collections=collection, find_first=False) + results = query.datasets(calibType, collections=collection.name, find_first=False) try: refs = list(results.with_dimension_records()) except Exception as e: - e.add_note(f"Error from querying dataset type {calibType} and collection {collection}") + e.add_note(f"Error from querying dataset type {calibType} and collection {collection.name}") raise exportDatasets.extend(refs) @@ -133,18 +134,17 @@ def exportCalibs( collectionsToExport = [] datasetsToExport = [] - for collection in butler.collections.x_query( + for collection in butler.collections.x_query_info( collections_query, flatten_chains=True, include_chains=True, collection_types={CollectionType.CALIBRATION, CollectionType.CHAINED}, ): - log.info("Checking collection: %s", collection) + log.info("Checking collection: %s", collection.name) # Get collection information. - collectionsToExport.append(collection) - info = butler.collections.get_info(collection) - if info.type == CollectionType.CALIBRATION: + collectionsToExport.append(collection.name) + if collection.type == CollectionType.CALIBRATION: exportDatasets = find_calibration_datasets(butler, collection, calibTypes) datasetsToExport.extend(exportDatasets) diff --git a/python/lsst/daf/butler/script/queryCollections.py b/python/lsst/daf/butler/script/queryCollections.py index 43cee5d2ab..6f1b1ef856 100644 --- a/python/lsst/daf/butler/script/queryCollections.py +++ b/python/lsst/daf/butler/script/queryCollections.py @@ -32,6 +32,7 @@ from astropy.table import Table from .._butler import Butler +from .._butler_collections import CollectionInfo from .._collection_type import CollectionType @@ -71,34 +72,36 @@ def _getTable( dtype=(str, str, str), ) butler = Butler.from_config(repo) - names = sorted(butler.collections.x_query(glob or "*", collection_types=frozenset(collection_type))) + collections = sorted( + butler.collections.x_query_info( + glob or "*", collection_types=frozenset(collection_type), include_parents=inverse + ) + ) if inverse: - for name in names: - info = butler.collections.get_info(name, include_parents=True) + for info in collections: if info.parents: first = True for parentName in sorted(info.parents): - table.add_row((name if first else "", info.type.name if first else "", parentName)) + table.add_row((info.name if first else "", info.type.name if first else "", parentName)) first = False else: - table.add_row((name, info.type.name, "")) + table.add_row((info.name, info.type.name, "")) # If none of the datasets has a parent dataset then remove the # description column. if not any(c for c in table[descriptionCol]): del table[descriptionCol] else: - for name in names: - info = butler.collections.get_info(name) + for info in collections: if info.type == CollectionType.CHAINED: if info.children: first = True for child in info.children: - table.add_row((name if first else "", info.type.name if first else "", child)) + table.add_row((info.name if first else "", info.type.name if first else "", child)) first = False else: - table.add_row((name, info.type.name, "")) + table.add_row((info.name, info.type.name, "")) else: - table.add_row((name, info.type.name, "")) + table.add_row((info.name, info.type.name, "")) # If there aren't any CHAINED datasets in the results then remove the # description column. if not any(columnVal == CollectionType.CHAINED.name for columnVal in table[typeCol]): @@ -142,18 +145,21 @@ def _getTree( ) butler = Butler.from_config(repo, without_datastore=True) - def addCollection(name: str, level: int = 0) -> None: - info = butler.collections.get_info(name, include_parents=inverse) - table.add_row((" " * level + name, info.type.name)) + def addCollection(info: CollectionInfo, level: int = 0) -> None: + table.add_row((" " * level + info.name, info.type.name)) if inverse: for pname in sorted(info.parents): - addCollection(pname, level + 1) + pinfo = butler.collections.get_info(pname, include_parents=inverse) + addCollection(pinfo, level + 1) else: if info.type == CollectionType.CHAINED: for name in info.children: - addCollection(name, level + 1) + cinfo = butler.collections.get_info(name) + addCollection(cinfo, level + 1) - collections = butler.collections.x_query(glob or "*", collection_types=frozenset(collection_type)) + collections = butler.collections.x_query_info( + glob or "*", collection_types=frozenset(collection_type), include_parents=inverse + ) for collection in sorted(collections): addCollection(collection) return table @@ -165,14 +171,14 @@ def _getFlatten( collection_type: Iterable[CollectionType], ) -> Table: butler = Butler.from_config(repo) - collectionNames = list( - butler.collections.x_query( + collections = list( + butler.collections.x_query_info( glob or "*", collection_types=frozenset(collection_type), flatten_chains=True ) ) - - collectionTypes = [butler.collections.get_info(c).type.name for c in collectionNames] - return Table((collectionNames, collectionTypes), names=("Name", "Type")) + names = [c.name for c in collections] + types = [c.type.name for c in collections] + return Table((names, types), names=("Name", "Type")) def queryCollections( diff --git a/python/lsst/daf/butler/script/removeCollections.py b/python/lsst/daf/butler/script/removeCollections.py index 1ae062cac1..747abaf1dc 100644 --- a/python/lsst/daf/butler/script/removeCollections.py +++ b/python/lsst/daf/butler/script/removeCollections.py @@ -85,18 +85,17 @@ def _getCollectionInfo( """ butler = Butler.from_config(repo, without_datastore=True) try: - names = sorted(butler.collections.x_query(collection, include_chains=True)) + collections_info = sorted(butler.collections.x_query_info(collection, include_chains=True)) except MissingCollectionError: # Hide the error and act like no collections should be removed. - names = [] + collections_info = [] collections = Table(names=("Collection", "Collection Type"), dtype=(str, str)) runCollections = Table(names=("Collection",), dtype=(str,)) - for name in names: - collection_info = butler.collections.get_info(name) + for collection_info in collections_info: if collection_info.type == CollectionType.RUN: - runCollections.add_row((name,)) + runCollections.add_row((collection_info.name,)) else: - collections.add_row((name, collection_info.type.name)) + collections.add_row((collection_info.name, collection_info.type.name)) return CollectionInfo(collections, runCollections) diff --git a/python/lsst/daf/butler/script/removeRuns.py b/python/lsst/daf/butler/script/removeRuns.py index bf17c2f1d5..dc1cbe1c94 100644 --- a/python/lsst/daf/butler/script/removeRuns.py +++ b/python/lsst/daf/butler/script/removeRuns.py @@ -87,20 +87,21 @@ def _getCollectionInfo( """ butler = Butler.from_config(repo) try: - collectionNames = butler.collections.x_query(collection, CollectionType.RUN, include_chains=False) + collections = butler.collections.x_query_info( + collection, CollectionType.RUN, include_chains=False, include_parents=True + ) except MissingCollectionError: # Act as if no collections matched. - collectionNames = [] + collections = [] dataset_types = butler.registry.queryDatasetTypes(...) runs = [] datasets: dict[str, int] = defaultdict(int) - for collectionName in collectionNames: - collection_info = butler.collections.get_info(collectionName, include_parents=True) + for collection_info in collections: assert collection_info.type == CollectionType.RUN - runs.append(RemoveRun(collectionName, list(collection_info.parents))) + runs.append(RemoveRun(collection_info.name, list(collection_info.parents))) with butler._query() as query: for dt in dataset_types: - results = query.datasets(dt, collections=collectionName) + results = query.datasets(dt, collections=collection_info.name) count = results.count(exact=False) if count: datasets[dt.name] += count diff --git a/python/lsst/daf/butler/tests/hybrid_butler_collections.py b/python/lsst/daf/butler/tests/hybrid_butler_collections.py index b89fd30f10..1a0875e1c8 100644 --- a/python/lsst/daf/butler/tests/hybrid_butler_collections.py +++ b/python/lsst/daf/butler/tests/hybrid_butler_collections.py @@ -77,18 +77,20 @@ def remove_from_chain( parent_collection_name, child_collection_names ) - def x_query( + def x_query_info( self, expression: str | Iterable[str], collection_types: Set[CollectionType] | CollectionType | None = None, flatten_chains: bool = False, include_chains: bool | None = None, - ) -> Sequence[str]: - return self._hybrid._remote_butler.collections.x_query( + include_parents: bool = False, + ) -> Sequence[CollectionInfo]: + return self._hybrid._remote_butler.collections.x_query_info( expression, collection_types=collection_types, flatten_chains=flatten_chains, include_chains=include_chains, + include_parents=include_parents, ) def get_info(self, name: str, include_doc: bool = False, include_parents: bool = False) -> CollectionInfo: