diff --git a/python/lsst/daf/butler/_butler_collections.py b/python/lsst/daf/butler/_butler_collections.py index da457d34f5..0b756f5757 100644 --- a/python/lsst/daf/butler/_butler_collections.py +++ b/python/lsst/daf/butler/_butler_collections.py @@ -27,12 +27,24 @@ from __future__ import annotations -__all__ = ("ButlerCollections",) +__all__ = ("ButlerCollections", "CollectionType", "CollectionInfo") from abc import ABC, abstractmethod from collections.abc import Iterable, Sequence from typing import Any, overload +from pydantic import BaseModel + +from ._collection_type import CollectionType + + +class CollectionInfo(BaseModel): + name: str + type: CollectionType + doc: str = "" + children: tuple[str, ...] = tuple() + parents: frozenset = frozenset() + class ButlerCollections(ABC, Sequence): """Methods for working with collections stored in the Butler.""" @@ -190,3 +202,64 @@ def remove_from_chain( transactions short. """ raise NotImplementedError() + + @abstractmethod + def query( + self, + expression: str | Iterable[str], + collection_types: set[CollectionType] | CollectionType | None = None, + flatten_chains: bool = False, + include_chains: bool | None = None, + ) -> Sequence[str]: + """Query the butler for collections matching an expression. + + Parameters + ---------- + expression : `str` or `~collections.abc.Iterable` [ `str` ] + One or more collection names or globs to include in the search. + collection_types : `set` [`CollectionType`], `CollectionType` or `None` + Restrict the types of collections to be searched. If `None` all + collection types are searched. + flatten_chains : `bool`, optional + If `True` (`False` is default), recursively yield the child + collections of matching `~CollectionType.CHAINED` collections. + include_chains : `bool` or `None`, optional + If `True`, yield records for matching `~CollectionType.CHAINED` + collections. Default is the opposite of ``flatten_chains``: + include either CHAINED collections or their children, but not both. + + Returns + ------- + collections : `~collections.abc.Sequence` [ `str` ] + The names of collections that match ``expression``. + + Notes + ----- + The order in which collections are returned is unspecified, except that + the children of a `~CollectionType.CHAINED` collection are guaranteed + to be in the order in which they are searched. When multiple parent + `~CollectionType.CHAINED` collections match the same criteria, the + order in which the two lists appear is unspecified, and the lists of + children may be incomplete if a child has multiple parents. + """ + raise NotImplementedError() + + @abstractmethod + def get_info(self, name: str, include_doc: bool = False, include_parents: bool = False) -> CollectionInfo: + """Obtain information for a specific collection. + + Parameters + ---------- + name : `str` + The name of the collection of interest. + include_doc : `bool`, optional + If `True` any documentation about this collection will be included. + include_parents : `bool`, optional + If `True` any parents of this collection will be included. + + Returns + ------- + info : `CollectionInfo` + Information on the requested collection. + """ + raise NotImplementedError() diff --git a/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py b/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py index 224d734132..1696a30df3 100644 --- a/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py +++ b/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py @@ -33,7 +33,9 @@ from lsst.utils.iteration import ensure_iterable -from .._butler_collections import ButlerCollections +from .._butler_collections import ButlerCollections, CollectionInfo +from ..registry import CollectionType +from ..registry.interfaces import ChainedCollectionRecord from ..registry.sql_registry import SqlRegistry @@ -76,3 +78,33 @@ def remove_from_chain( return self._registry._managers.collections.remove_from_collection_chain( parent_collection_name, list(ensure_iterable(child_collection_names)) ) + + def query( + self, + expression: str | Iterable[str], + collection_types: set[CollectionType] | CollectionType | None = None, + flatten_chains: bool = False, + include_chains: bool | None = None, + ) -> Sequence[str]: + if collection_types is None: + collection_types = CollectionType.all() + return self._registry.queryCollections( + expression, + collectionTypes=collection_types, + flattenChains=flatten_chains, + includeChains=include_chains, + ) + + def get_info(self, name: str, include_doc: bool = False, include_parents: bool = False) -> CollectionInfo: + record = self._registry.get_collection_record(name) + doc = "" + if include_doc: + doc = self._registry.getCollectionDocumentation(name) or "" + children: tuple[str, ...] = tuple() + if record.type == CollectionType.CHAINED: + assert isinstance(record, ChainedCollectionRecord) + children = tuple(record.children) + parents: set[str] = set() + if include_parents: + parents = self._registry.getCollectionParentChains(name) + return CollectionInfo(name=name, type=record.type, doc=doc, parents=parents, children=children) diff --git a/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py b/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py new file mode 100644 index 0000000000..56b136471c --- /dev/null +++ b/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py @@ -0,0 +1,94 @@ +# This file is part of daf_butler. +# +# Developed for the LSST Data Management System. +# This product includes software developed by the LSST Project +# (http://www.lsst.org). +# See the COPYRIGHT file at the top-level directory of this distribution +# for details of code ownership. +# +# This software is dual licensed under the GNU General Public License and also +# under a 3-clause BSD license. Recipients may choose which of these licenses +# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, +# respectively. If you choose the GPL option then the following text applies +# (but note that there is still no warranty even if you opt for BSD instead): +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from __future__ import annotations + +__all__ = ("RemoteButlerCollections",) + +from collections.abc import Iterable, Sequence + +from .._butler_collections import ButlerCollections, CollectionInfo +from .._collection_type import CollectionType +from ._registry import RemoteButlerRegistry + + +class RemoteButlerCollections(ButlerCollections): + """Implementation of ButlerCollections for RemoteButler. + + Parameters + ---------- + registry : `~lsst.daf.butler.registry.sql_registry.SqlRegistry` + Registry object used to work with the collections database. + """ + + def __init__(self, registry: RemoteButlerRegistry): + self._registry = registry + + @property + def defaults(self) -> Sequence[str]: + return self._registry.defaults.collections + + def extend_chain(self, parent_collection_name: str, child_collection_names: str | Iterable[str]) -> None: + raise NotImplementedError("Not yet available") + + def prepend_chain(self, parent_collection_name: str, child_collection_names: str | Iterable[str]) -> None: + raise NotImplementedError("Not yet available") + + def redefine_chain( + self, parent_collection_name: str, child_collection_names: str | Iterable[str] + ) -> None: + raise NotImplementedError("Not yet available") + + def remove_from_chain( + self, parent_collection_name: str, child_collection_names: str | Iterable[str] + ) -> None: + raise NotImplementedError("Not yet available") + + def query( + self, + expression: str | Iterable[str], + collection_types: set[CollectionType] | CollectionType | None = None, + flatten_chains: bool = False, + include_chains: bool | None = None, + ) -> Sequence[str]: + if collection_types is None: + collection_types = CollectionType.all() + return self._registry.queryCollections( + expression, + collectionTypes=collection_types, + flattenChains=flatten_chains, + includeChains=include_chains, + ) + + def get_info(self, name: str, include_doc: bool = False, include_parents: bool = False) -> CollectionInfo: + info = self._registry._get_collection_info( + name, include_doc=include_doc, include_parents=include_parents + ) + doc = info.doc or "" + children = info.children or () + parents = info.parents or set() + return CollectionInfo(name=name, type=info.type, doc=doc, parents=parents, children=children) diff --git a/python/lsst/daf/butler/script/queryCollections.py b/python/lsst/daf/butler/script/queryCollections.py index 2951080c98..82d2b6f1ce 100644 --- a/python/lsst/daf/butler/script/queryCollections.py +++ b/python/lsst/daf/butler/script/queryCollections.py @@ -71,38 +71,34 @@ def _getTable( dtype=(str, str, str), ) butler = Butler.from_config(repo) - names = sorted( - butler.registry.queryCollections(collectionTypes=frozenset(collection_type), expression=glob or ...) - ) + names = sorted(butler.collections.query(glob or "*", collection_types=frozenset(collection_type))) if inverse: for name in names: - type = butler.registry.getCollectionType(name) - parentNames = butler.registry.getCollectionParentChains(name) - if parentNames: + info = butler.collections.get_info(name, include_parents=True) + if info.parents: first = True - for parentName in sorted(parentNames): - table.add_row((name if first else "", type.name if first else "", parentName)) + for parentName in sorted(info.parents): + table.add_row((name if first else "", info.type.name if first else "", parentName)) first = False else: - table.add_row((name, type.name, "")) + table.add_row((name, info.type.name, "")) # If none of the datasets has a parent dataset then remove the # description column. if not any(c for c in table[descriptionCol]): del table[descriptionCol] else: for name in names: - type = butler.registry.getCollectionType(name) - if type == CollectionType.CHAINED: - children = butler.registry.getCollectionChain(name) - if children: + info = butler.collections.get_info(name) + if info.type == CollectionType.CHAINED: + if info.children: first = True - for child in children: - table.add_row((name if first else "", type.name if first else "", child)) + for child in info.children: + table.add_row((name if first else "", info.type.name if first else "", child)) first = False else: - table.add_row((name, type.name, "")) + table.add_row((name, info.type.name, "")) else: - table.add_row((name, type.name, "")) + table.add_row((name, info.type.name, "")) # If there aren't any CHAINED datasets in the results then remove the # description column. if not any(columnVal == CollectionType.CHAINED.name for columnVal in table[typeCol]): @@ -147,21 +143,17 @@ def _getTree( butler = Butler.from_config(repo, without_datastore=True) def addCollection(name: str, level: int = 0) -> None: - collectionType = butler.registry.getCollectionType(name) - table.add_row((" " * level + name, collectionType.name)) + info = butler.collections.get_info(name, include_parents=inverse) + table.add_row((" " * level + name, info.type.name)) if inverse: - parentNames = butler.registry.getCollectionParentChains(name) - for pname in sorted(parentNames): + for pname in sorted(info.parents): addCollection(pname, level + 1) else: - if collectionType == CollectionType.CHAINED: - childNames = butler.registry.getCollectionChain(name) - for name in childNames: + if info.type == CollectionType.CHAINED: + for name in info.children: addCollection(name, level + 1) - collections = butler.registry.queryCollections( - collectionTypes=frozenset(collection_type), expression=glob or ... - ) + collections = butler.collections.query(glob or "*", collection_types=frozenset(collection_type)) for collection in sorted(collections): addCollection(collection) return table @@ -174,12 +166,12 @@ def _getFlatten( ) -> Table: butler = Butler.from_config(repo) collectionNames = list( - butler.registry.queryCollections( - collectionTypes=frozenset(collection_type), flattenChains=True, expression=glob or ... + butler.collections.query( + glob or "*", collection_types=frozenset(collection_type), flatten_chains=True ) ) - collectionTypes = [butler.registry.getCollectionType(c).name for c in collectionNames] + collectionTypes = [butler.collections.get_info(c).type.name for c in collectionNames] return Table((collectionNames, collectionTypes), names=("Name", "Type"))