diff --git a/doc/changes/DM-45738.feature.rst b/doc/changes/DM-45738.feature.rst
new file mode 100644
index 0000000000..df6ddf1f53
--- /dev/null
+++ b/doc/changes/DM-45738.feature.rst
@@ -0,0 +1,4 @@
+* Added ``--collections`` option to ``butler query-dataset-types`` to allow the resultant dataset types to be constrained by those that are used by specific collections.
+* Changed the ``Butler.collections`` property to be a ``ButlerCollections`` instance.
+ This object can still act as a sequence equivalent to ``ButlerCollections.defaults`` but adds new APIs for querying and manipulating collections.
+ Any methods with names starting with ``x_`` are deemed to be an experimental API that may change in the future.
diff --git a/doc/changes/DM-45738.removal.rst b/doc/changes/DM-45738.removal.rst
new file mode 100644
index 0000000000..f165b57d13
--- /dev/null
+++ b/doc/changes/DM-45738.removal.rst
@@ -0,0 +1,2 @@
+The ``Butler.collection_chains`` property is now deprecated.
+Please use ``Butler.collections`` instead.
diff --git a/python/lsst/daf/butler/_butler.py b/python/lsst/daf/butler/_butler.py
index 2c16478aa9..621be17b17 100644
--- a/python/lsst/daf/butler/_butler.py
+++ b/python/lsst/daf/butler/_butler.py
@@ -1422,15 +1422,17 @@ def collection_chains(self) -> ButlerCollections:
"""Object with methods for modifying collection chains
(`~lsst.daf.butler.ButlerCollections`).
- Use of this object is preferred over `registry` wherever possible.
+ Deprecated. Replaced with ``collections`` property.
"""
raise NotImplementedError()
@property
@abstractmethod
- def collections(self) -> Sequence[str]:
- """The collections to search by default, in order
- (`~collections.abc.Sequence` [ `str` ]).
+ def collections(self) -> ButlerCollections:
+ """Object with methods for modifying and querying collections
+ (`~lsst.daf.butler.ButlerCollections`).
+
+ Use of this object is preferred over `registry` wherever possible.
"""
raise NotImplementedError()
diff --git a/python/lsst/daf/butler/_butler_collections.py b/python/lsst/daf/butler/_butler_collections.py
index e6032b5f2d..8a3d2e2d63 100644
--- a/python/lsst/daf/butler/_butler_collections.py
+++ b/python/lsst/daf/butler/_butler_collections.py
@@ -27,15 +27,67 @@
from __future__ import annotations
-__all__ = ("ButlerCollections",)
+__all__ = ("ButlerCollections", "CollectionInfo")
from abc import ABC, abstractmethod
-from collections.abc import Iterable
+from collections.abc import Iterable, Sequence, Set
+from typing import Any, overload
+from pydantic import BaseModel
-class ButlerCollections(ABC):
+from ._collection_type import CollectionType
+
+
+class CollectionInfo(BaseModel):
+ """Information about a single Butler collection."""
+
+ name: str
+ """Name of the collection."""
+ type: CollectionType
+ """Type of the collection."""
+ doc: str = ""
+ """Documentation string associated with this collection."""
+ children: tuple[str, ...] = tuple()
+ """Children of this collection (only if CHAINED)."""
+ parents: frozenset[str] | None = None
+ """Any parents of this collection.
+
+ `None` if the parents were not requested.
+ """
+ dataset_types: frozenset[str] | None = None
+ """Names of any dataset types associated with datasets in this collection.
+
+ `None` if no dataset type information was requested
+ """
+
+ def __lt__(self, other: Any) -> bool:
+ """Compare objects by collection name."""
+ if not isinstance(other, type(self)):
+ return NotImplemented
+ return self.name < other.name
+
+
+class ButlerCollections(ABC, Sequence):
"""Methods for working with collections stored in the Butler."""
+ @overload
+ def __getitem__(self, index: int) -> str: ...
+
+ @overload
+ def __getitem__(self, index: slice) -> Sequence[str]: ...
+
+ def __getitem__(self, index: int | slice) -> str | Sequence[str]:
+ return self.defaults[index]
+
+ def __len__(self) -> int:
+ return len(self.defaults)
+
+ @property
+ @abstractmethod
+ def defaults(self) -> Sequence[str]:
+ """Collection defaults associated with this butler."""
+ raise NotImplementedError("Defaults must be implemented by a subclass")
+
@abstractmethod
def extend_chain(self, parent_collection_name: str, child_collection_names: str | Iterable[str]) -> None:
"""Add children to the end of a CHAINED collection.
@@ -165,3 +217,201 @@ def remove_from_chain(
transactions short.
"""
raise NotImplementedError()
+
+ def x_query(
+ self,
+ expression: str | Iterable[str],
+ collection_types: Set[CollectionType] | CollectionType | None = None,
+ flatten_chains: bool = False,
+ include_chains: bool | None = None,
+ ) -> Sequence[str]:
+ """Query the butler for collections matching an expression.
+
+ **This is an experimental interface that can change at any time.**
+
+ Parameters
+ ----------
+ expression : `str` or `~collections.abc.Iterable` [ `str` ]
+ One or more collection names or globs to include in the search.
+ collection_types : `set` [`CollectionType`], `CollectionType` or `None`
+ Restrict the types of collections to be searched. If `None` all
+ collection types are searched.
+ flatten_chains : `bool`, optional
+ If `True` (`False` is default), recursively yield the child
+ collections of matching `~CollectionType.CHAINED` collections.
+ include_chains : `bool` or `None`, optional
+ If `True`, yield records for matching `~CollectionType.CHAINED`
+ collections. Default is the opposite of ``flatten_chains``:
+ include either CHAINED collections or their children, but not both.
+
+ Returns
+ -------
+ collections : `~collections.abc.Sequence` [ `str` ]
+ The names of collections that match ``expression``.
+
+ Notes
+ -----
+ The order in which collections are returned is unspecified, except that
+ the children of a `~CollectionType.CHAINED` collection are guaranteed
+ to be in the order in which they are searched. When multiple parent
+ `~CollectionType.CHAINED` collections match the same criteria, the
+ order in which the two lists appear is unspecified, and the lists of
+ children may be incomplete if a child has multiple parents.
+
+ The default implementation is a wrapper around `x_query_info`.
+ """
+ collections_info = self.x_query_info(
+ expression,
+ collection_types=collection_types,
+ flatten_chains=flatten_chains,
+ include_chains=include_chains,
+ )
+ return [info.name for info in collections_info]
+
+ @abstractmethod
+ def x_query_info(
+ self,
+ expression: str | Iterable[str],
+ collection_types: Set[CollectionType] | CollectionType | None = None,
+ flatten_chains: bool = False,
+ include_chains: bool | None = None,
+ include_parents: bool = False,
+ include_summary: bool = False,
+ ) -> Sequence[CollectionInfo]:
+ """Query the butler for collections matching an expression and
+ return detailed information about those collections.
+
+ **This is an experimental interface that can change at any time.**
+
+ Parameters
+ ----------
+ expression : `str` or `~collections.abc.Iterable` [ `str` ]
+ One or more collection names or globs to include in the search.
+ collection_types : `set` [`CollectionType`], `CollectionType` or `None`
+ Restrict the types of collections to be searched. If `None` all
+ collection types are searched.
+ flatten_chains : `bool`, optional
+ If `True` (`False` is default), recursively yield the child
+ collections of matching `~CollectionType.CHAINED` collections.
+ include_chains : `bool` or `None`, optional
+ If `True`, yield records for matching `~CollectionType.CHAINED`
+ collections. Default is the opposite of ``flatten_chains``:
+ include either CHAINED collections or their children, but not both.
+ include_parents : `bool`, optional
+ Whether the returned information includes parents.
+ include_summary : `bool`, optional
+ Whether the returned information includes dataset type and
+ governor information for the collections.
+
+ Returns
+ -------
+ collections : `~collections.abc.Sequence` [ `CollectionInfo` ]
+ The names of collections that match ``expression``.
+
+ Notes
+ -----
+ The order in which collections are returned is unspecified, except that
+ the children of a `~CollectionType.CHAINED` collection are guaranteed
+ to be in the order in which they are searched. When multiple parent
+ `~CollectionType.CHAINED` collections match the same criteria, the
+ order in which the two lists appear is unspecified, and the lists of
+ children may be incomplete if a child has multiple parents.
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def get_info(
+ self, name: str, include_parents: bool = False, include_summary: bool = False
+ ) -> CollectionInfo:
+ """Obtain information for a specific collection.
+
+ Parameters
+ ----------
+ name : `str`
+ The name of the collection of interest.
+ include_parents : `bool`, optional
+ If `True` any parents of this collection will be included.
+ include_summary : `bool`, optional
+ If `True` dataset type names and governor dimensions of datasets
+ stored in this collection will be included in the result.
+
+ Returns
+ -------
+ info : `CollectionInfo`
+ Information on the requested collection.
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def register(self, name: str, type: CollectionType = CollectionType.RUN, doc: str | None = None) -> bool:
+ """Add a new collection if one with the given name does not exist.
+
+ Parameters
+ ----------
+ name : `str`
+ The name of the collection to create.
+ type : `CollectionType`, optional
+ Enum value indicating the type of collection to create. Default
+ is to create a RUN collection.
+ doc : `str`, optional
+ Documentation string for the collection.
+
+ Returns
+ -------
+ registered : `bool`
+ Boolean indicating whether the collection was already registered
+ or was created by this call.
+
+ Notes
+ -----
+ This method cannot be called within transactions, as it needs to be
+ able to perform its own transaction to be concurrent
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def x_remove(self, name: str) -> None:
+ """Remove the given collection from the registry.
+
+ **This is an experimental interface that can change at any time.**
+
+ Parameters
+ ----------
+ name : `str`
+ The name of the collection to remove.
+
+ Raises
+ ------
+ lsst.daf.butler.registry.MissingCollectionError
+ Raised if no collection with the given name exists.
+ lsst.daf.butler.registry.OrphanedRecordError
+ Raised if the database rows associated with the collection are
+ still referenced by some other table, such as a dataset in a
+ datastore (for `~CollectionType.RUN` collections only) or a
+ `~CollectionType.CHAINED` collection of which this collection is
+ a child.
+
+ Notes
+ -----
+ If this is a `~CollectionType.RUN` collection, all datasets and quanta
+ in it will removed from the `Registry` database. This requires that
+ those datasets be removed (or at least trashed) from any datastores
+ that hold them first.
+
+ A collection may not be deleted as long as it is referenced by a
+ `~CollectionType.CHAINED` collection; the ``CHAINED`` collection must
+ be deleted or redefined first.
+ """
+ raise NotImplementedError()
+
+ def _filter_dataset_types(
+ self, dataset_types: Iterable[str], collections: Iterable[CollectionInfo]
+ ) -> Iterable[str]:
+ dataset_types_set = set(dataset_types)
+ collection_dataset_types: set[str] = set()
+ for info in collections:
+ if info.dataset_types is None:
+ raise RuntimeError("Can only filter by collections if include_summary was True")
+ collection_dataset_types.update(info.dataset_types)
+ dataset_types_set = dataset_types_set.intersection(collection_dataset_types)
+ return dataset_types_set
diff --git a/python/lsst/daf/butler/cli/cmd/commands.py b/python/lsst/daf/butler/cli/cmd/commands.py
index 98810481c6..03b2325956 100644
--- a/python/lsst/daf/butler/cli/cmd/commands.py
+++ b/python/lsst/daf/butler/cli/cmd/commands.py
@@ -439,6 +439,11 @@ def query_collections(*args: Any, **kwargs: Any) -> None:
"dataset types to return."
)
@verbose_option(help="Include dataset type name, dimensions, and storage class in output.")
+@collections_option(
+ help="Constrain the resulting dataset types by these collections. "
+ "This constraint does not say that a dataset of this type is definitely present, "
+ "solely that one may have been present at some point."
+)
@options_file_option()
def query_dataset_types(*args: Any, **kwargs: Any) -> None:
"""Get the dataset types in a repository."""
diff --git a/python/lsst/daf/butler/direct_butler/_direct_butler.py b/python/lsst/daf/butler/direct_butler/_direct_butler.py
index e1a3a73fd6..8a2f67212b 100644
--- a/python/lsst/daf/butler/direct_butler/_direct_butler.py
+++ b/python/lsst/daf/butler/direct_butler/_direct_butler.py
@@ -46,6 +46,7 @@
from collections.abc import Iterable, Iterator, MutableMapping, Sequence
from typing import TYPE_CHECKING, Any, ClassVar, TextIO, cast
+from deprecated.sphinx import deprecated
from lsst.resources import ResourcePath, ResourcePathExpression
from lsst.utils.introspection import get_class_of
from lsst.utils.logging import VERBOSE, getLogger
@@ -296,7 +297,7 @@ def __reduce__(self) -> tuple:
DirectButler._unpickle,
(
self._config,
- self.collections,
+ self.collections.defaults,
self.run,
dict(self._registry.defaults.dataId.required),
self._registry.isWriteable(),
@@ -1975,7 +1976,7 @@ def transfer_from(
if registry := getattr(source_butler, "registry", None):
run_doc = registry.getCollectionDocumentation(run)
if not dry_run:
- registered = self._registry.registerRun(run, doc=run_doc)
+ registered = self.collections.register(run, doc=run_doc)
else:
registered = True
handled_collections.add(run)
@@ -2149,21 +2150,19 @@ def validateConfiguration(
raise ValidationError(";\n".join(messages))
@property
+ @deprecated(
+ "Please use 'collections' instead. collection_chains will be removed after v28.",
+ version="v28",
+ category=FutureWarning,
+ )
def collection_chains(self) -> DirectButlerCollections:
"""Object with methods for modifying collection chains."""
return DirectButlerCollections(self._registry)
@property
- def collections(self) -> Sequence[str]:
- """The collections to search by default, in order
- (`~collections.abc.Sequence` [ `str` ]).
-
- This is an alias for ``self.registry.defaults.collections``. It cannot
- be set directly in isolation, but all defaults may be changed together
- by assigning a new `RegistryDefaults` instance to
- ``self.registry.defaults``.
- """
- return self._registry.defaults.collections
+ def collections(self) -> DirectButlerCollections:
+ """Object with methods for modifying and inspecting collections."""
+ return DirectButlerCollections(self._registry)
@property
def run(self) -> str | None:
diff --git a/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py b/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py
index 1f5ac270b4..8a4b7c70e8 100644
--- a/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py
+++ b/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py
@@ -29,11 +29,15 @@
__all__ = ("DirectButlerCollections",)
-from collections.abc import Iterable
+from collections.abc import Iterable, Sequence, Set
+import sqlalchemy
from lsst.utils.iteration import ensure_iterable
-from .._butler_collections import ButlerCollections
+from .._butler_collections import ButlerCollections, CollectionInfo
+from .._collection_type import CollectionType
+from ..registry._exceptions import OrphanedRecordError
+from ..registry.interfaces import ChainedCollectionRecord
from ..registry.sql_registry import SqlRegistry
@@ -49,6 +53,10 @@ class DirectButlerCollections(ButlerCollections):
def __init__(self, registry: SqlRegistry):
self._registry = registry
+ @property
+ def defaults(self) -> Sequence[str]:
+ return self._registry.defaults.collections
+
def extend_chain(self, parent_collection_name: str, child_collection_names: str | Iterable[str]) -> None:
return self._registry._managers.collections.extend_collection_chain(
parent_collection_name, list(ensure_iterable(child_collection_names))
@@ -72,3 +80,80 @@ def remove_from_chain(
return self._registry._managers.collections.remove_from_collection_chain(
parent_collection_name, list(ensure_iterable(child_collection_names))
)
+
+ def x_query(
+ self,
+ expression: str | Iterable[str],
+ collection_types: Set[CollectionType] | CollectionType | None = None,
+ flatten_chains: bool = False,
+ include_chains: bool | None = None,
+ ) -> Sequence[str]:
+ if collection_types is None:
+ collection_types = CollectionType.all()
+ # Do not use base implementation for now to avoid the additional
+ # unused queries.
+ return self._registry.queryCollections(
+ expression,
+ collectionTypes=collection_types,
+ flattenChains=flatten_chains,
+ includeChains=include_chains,
+ )
+
+ def x_query_info(
+ self,
+ expression: str | Iterable[str],
+ collection_types: Set[CollectionType] | CollectionType | None = None,
+ flatten_chains: bool = False,
+ include_chains: bool | None = None,
+ include_parents: bool = False,
+ include_summary: bool = False,
+ ) -> Sequence[CollectionInfo]:
+ info = []
+ with self._registry.caching_context():
+ if collection_types is None:
+ collection_types = CollectionType.all()
+ for name in self._registry.queryCollections(
+ expression,
+ collectionTypes=collection_types,
+ flattenChains=flatten_chains,
+ includeChains=include_chains,
+ ):
+ info.append(
+ self.get_info(name, include_parents=include_parents, include_summary=include_summary)
+ )
+ return info
+
+ def get_info(
+ self, name: str, include_parents: bool = False, include_summary: bool = False
+ ) -> CollectionInfo:
+ record = self._registry.get_collection_record(name)
+ doc = self._registry.getCollectionDocumentation(name) or ""
+ children: tuple[str, ...] = tuple()
+ if record.type == CollectionType.CHAINED:
+ assert isinstance(record, ChainedCollectionRecord)
+ children = tuple(record.children)
+ parents: set[str] | None = None
+ if include_parents:
+ parents = self._registry.getCollectionParentChains(name)
+ dataset_types: Set[str] | None = None
+ if include_summary:
+ summary = self._registry.getCollectionSummary(name)
+ dataset_types = frozenset([dt.name for dt in summary.dataset_types])
+
+ return CollectionInfo(
+ name=name,
+ type=record.type,
+ doc=doc,
+ parents=parents,
+ children=children,
+ dataset_types=dataset_types,
+ )
+
+ def register(self, name: str, type: CollectionType = CollectionType.RUN, doc: str | None = None) -> bool:
+ return self._registry.registerCollection(name, type, doc)
+
+ def x_remove(self, name: str) -> None:
+ try:
+ self._registry.removeCollection(name)
+ except sqlalchemy.exc.IntegrityError as e:
+ raise OrphanedRecordError(f"Datasets in run {name} are still referenced elsewhere.") from e
diff --git a/python/lsst/daf/butler/registry/tests/_registry.py b/python/lsst/daf/butler/registry/tests/_registry.py
index 0732e03556..0a66d7797b 100644
--- a/python/lsst/daf/butler/registry/tests/_registry.py
+++ b/python/lsst/daf/butler/registry/tests/_registry.py
@@ -1060,7 +1060,7 @@ def testNestedTransaction(self):
# be committed).
registry.insertDimensionData(dimension, dataId2)
checkpointReached = True
- # This should conflict and raise, triggerring a rollback
+ # This should conflict and raise, triggering a rollback
# of the previous insertion within the same transaction
# context, but not the original insertion in the outer
# block.
@@ -2968,7 +2968,7 @@ def testQueryResultSummaries(self):
# Second query should yield no results, which we should see when
# we attempt to expand the data ID.
query2 = registry.queryDataIds(["physical_filter"], band="h")
- # There's no execute=False, exact=Fals test here because the behavior
+ # There's no execute=False, exact=False test here because the behavior
# not something we want to guarantee in this case (and exact=False
# says either answer is legal).
self.assertFalse(query2.any(execute=True, exact=False))
@@ -3684,7 +3684,7 @@ def test_skypix_constraint_queries(self) -> None:
else:
raise RuntimeError("Could not find usable skypix ID for this dimension configuration.")
# New query system does not support non-common skypix constraints
- # and we are deprecating it to replace with region-based constrints.
+ # and we are deprecating it to replace with region-based constraints.
# TODO: Drop this tests once we remove support for non-common skypix.
with contextlib.suppress(NotImplementedError):
self.assertEqual(
@@ -3895,7 +3895,7 @@ def test_query_find_datasets_drop_postprocessing(self) -> None:
# against only one of the two collections. This should work even
# though the relation returned by queryDataIds ends with
# iteration-engine region-filtering, because we can recognize before
- # running the query that there is only one collecton to search and
+ # running the query that there is only one collection to search and
# hence the (default) findFirst=True is irrelevant, and joining in the
# dataset query commutes past the iteration-engine postprocessing.
query1 = registry.queryDataIds(
diff --git a/python/lsst/daf/butler/remote_butler/_remote_butler.py b/python/lsst/daf/butler/remote_butler/_remote_butler.py
index 3392fc8791..f09a1a62bc 100644
--- a/python/lsst/daf/butler/remote_butler/_remote_butler.py
+++ b/python/lsst/daf/butler/remote_butler/_remote_butler.py
@@ -35,6 +35,7 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any, TextIO, cast
+from deprecated.sphinx import deprecated
from lsst.daf.butler.datastores.file_datastore.retrieve_artifacts import (
determine_destination_for_retrieved_artifact,
)
@@ -61,6 +62,7 @@
from ._collection_args import convert_collection_arg_to_glob_string_list
from ._query_driver import RemoteQueryDriver
from ._ref_utils import apply_storage_class_override, normalize_dataset_type_name, simplify_dataId
+from ._remote_butler_collections import RemoteButlerCollections
from .server_models import (
CollectionList,
FindDatasetRequestModel,
@@ -143,9 +145,23 @@ def isWriteable(self) -> bool:
return False
@property
+ @deprecated(
+ "Please use 'collections' instead. collection_chains will be removed after v28.",
+ version="v28",
+ category=FutureWarning,
+ )
def collection_chains(self) -> ButlerCollections:
"""Object with methods for modifying collection chains."""
- raise NotImplementedError()
+ from ._registry import RemoteButlerRegistry
+
+ return RemoteButlerCollections(cast(RemoteButlerRegistry, self._registry))
+
+ @property
+ def collections(self) -> ButlerCollections:
+ """Object with methods for modifying and querying collections."""
+ from ._registry import RemoteButlerRegistry
+
+ return RemoteButlerCollections(cast(RemoteButlerRegistry, self._registry))
@property
def dimensions(self) -> DimensionUniverse:
@@ -510,11 +526,6 @@ def validateConfiguration(
# Docstring inherited.
raise NotImplementedError()
- @property
- def collections(self) -> Sequence[str]:
- # Docstring inherited.
- return self._registry_defaults.collections
-
@property
def run(self) -> str | None:
# Docstring inherited.
diff --git a/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py b/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py
new file mode 100644
index 0000000000..f021781a05
--- /dev/null
+++ b/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py
@@ -0,0 +1,120 @@
+# This file is part of daf_butler.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (http://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This software is dual licensed under the GNU General Public License and also
+# under a 3-clause BSD license. Recipients may choose which of these licenses
+# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
+# respectively. If you choose the GPL option then the following text applies
+# (but note that there is still no warranty even if you opt for BSD instead):
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+from __future__ import annotations
+
+__all__ = ("RemoteButlerCollections",)
+
+from collections.abc import Iterable, Sequence, Set
+from typing import TYPE_CHECKING
+
+from .._butler_collections import ButlerCollections, CollectionInfo
+from .._collection_type import CollectionType
+
+if TYPE_CHECKING:
+ from ._registry import RemoteButlerRegistry
+
+
+class RemoteButlerCollections(ButlerCollections):
+ """Implementation of ButlerCollections for RemoteButler.
+
+ Parameters
+ ----------
+ registry : `~lsst.daf.butler.registry.sql_registry.SqlRegistry`
+ Registry object used to work with the collections database.
+ """
+
+ def __init__(self, registry: RemoteButlerRegistry):
+ self._registry = registry
+
+ @property
+ def defaults(self) -> Sequence[str]:
+ return self._registry.defaults.collections
+
+ def extend_chain(self, parent_collection_name: str, child_collection_names: str | Iterable[str]) -> None:
+ raise NotImplementedError("Not yet available")
+
+ def prepend_chain(self, parent_collection_name: str, child_collection_names: str | Iterable[str]) -> None:
+ raise NotImplementedError("Not yet available")
+
+ def redefine_chain(
+ self, parent_collection_name: str, child_collection_names: str | Iterable[str]
+ ) -> None:
+ raise NotImplementedError("Not yet available")
+
+ def remove_from_chain(
+ self, parent_collection_name: str, child_collection_names: str | Iterable[str]
+ ) -> None:
+ raise NotImplementedError("Not yet available")
+
+ def x_query_info(
+ self,
+ expression: str | Iterable[str],
+ collection_types: Set[CollectionType] | CollectionType | None = None,
+ flatten_chains: bool = False,
+ include_chains: bool | None = None,
+ include_parents: bool = False,
+ include_summary: bool = False,
+ ) -> Sequence[CollectionInfo]:
+ # This should become a single call on the server in the future.
+ if collection_types is None:
+ collection_types = CollectionType.all()
+
+ info = []
+ for name in self._registry.queryCollections(
+ expression,
+ collectionTypes=collection_types,
+ flattenChains=flatten_chains,
+ includeChains=include_chains,
+ ):
+ info.append(self.get_info(name, include_parents=include_parents, include_summary=include_summary))
+ return info
+
+ def get_info(
+ self, name: str, include_parents: bool = False, include_summary: bool = False
+ ) -> CollectionInfo:
+ info = self._registry._get_collection_info(name, include_doc=True, include_parents=include_parents)
+ doc = info.doc or ""
+ children = info.children or ()
+ dataset_types: Set[str] | None = None
+ if include_summary:
+ summary = self._registry.getCollectionSummary(name)
+ dataset_types = frozenset([dt.name for dt in summary.dataset_types])
+ return CollectionInfo(
+ name=name,
+ type=info.type,
+ doc=doc,
+ parents=info.parents,
+ children=children,
+ dataset_types=dataset_types,
+ )
+
+ def register(self, name: str, type: CollectionType = CollectionType.RUN, doc: str | None = None) -> bool:
+ raise NotImplementedError("Not yet available.")
+
+ def x_remove(self, name: str) -> None:
+ raise NotImplementedError("Not yet available.")
diff --git a/python/lsst/daf/butler/script/_associate.py b/python/lsst/daf/butler/script/_associate.py
index 74a4bb5baa..3e1f16e743 100644
--- a/python/lsst/daf/butler/script/_associate.py
+++ b/python/lsst/daf/butler/script/_associate.py
@@ -60,7 +60,7 @@ def associate(
"""
butler = Butler.from_config(repo, writeable=True, without_datastore=True)
- butler.registry.registerCollection(collection, CollectionType.TAGGED)
+ butler.collections.register(collection, CollectionType.TAGGED)
results = QueryDatasets(
butler=butler,
diff --git a/python/lsst/daf/butler/script/_pruneDatasets.py b/python/lsst/daf/butler/script/_pruneDatasets.py
index 1c8caf2fd5..f3a7a155c8 100644
--- a/python/lsst/daf/butler/script/_pruneDatasets.py
+++ b/python/lsst/daf/butler/script/_pruneDatasets.py
@@ -223,8 +223,8 @@ def pruneDatasets(
# If purging, verify that the collection to purge is RUN type collection.
if purge_run:
butler = Butler.from_config(repo, without_datastore=True)
- collectionType = butler.registry.getCollectionType(purge_run)
- if collectionType is not CollectionType.RUN:
+ collection_info = butler.collections.get_info(purge_run)
+ if collection_info.type is not CollectionType.RUN:
return PruneDatasetsResult(
state=PruneDatasetsResult.State.ERR_PRUNE_ON_NOT_RUN, errDict=dict(collection=purge_run)
)
diff --git a/python/lsst/daf/butler/script/certifyCalibrations.py b/python/lsst/daf/butler/script/certifyCalibrations.py
index cec8142aed..0eb2dfbe69 100644
--- a/python/lsst/daf/butler/script/certifyCalibrations.py
+++ b/python/lsst/daf/butler/script/certifyCalibrations.py
@@ -70,13 +70,14 @@ def certifyCalibrations(
collection, instead of just the most recent one.
"""
butler = Butler.from_config(repo, writeable=True, without_datastore=True)
- registry = butler.registry
timespan = Timespan(
begin=astropy.time.Time(begin_date, scale="tai") if begin_date is not None else None,
end=astropy.time.Time(end_date, scale="tai") if end_date is not None else None,
)
- if not search_all_inputs and registry.getCollectionType(input_collection) is CollectionType.CHAINED:
- input_collection = next(iter(registry.getCollectionChain(input_collection)))
+ if not search_all_inputs:
+ collection_info = butler.collections.get_info(input_collection)
+ if collection_info.type is CollectionType.CHAINED:
+ input_collection = collection_info.children[0]
with butler._query() as query:
results = query.datasets(dataset_type_name, collections=input_collection)
@@ -86,5 +87,5 @@ def certifyCalibrations(
raise RuntimeError(
f"No inputs found for dataset {dataset_type_name} in {input_collection}. {explanation}"
)
- registry.registerCollection(output_collection, type=CollectionType.CALIBRATION)
- registry.certify(output_collection, refs, timespan)
+ butler.collections.register(output_collection, type=CollectionType.CALIBRATION)
+ butler.registry.certify(output_collection, refs, timespan)
diff --git a/python/lsst/daf/butler/script/collectionChain.py b/python/lsst/daf/butler/script/collectionChain.py
index 36a7b2b138..1857507f57 100644
--- a/python/lsst/daf/butler/script/collectionChain.py
+++ b/python/lsst/daf/butler/script/collectionChain.py
@@ -32,7 +32,6 @@
from .._butler import Butler
from .._collection_type import CollectionType
from ..registry import MissingCollectionError
-from ..registry.wildcards import CollectionWildcard
def collectionChain(
@@ -80,13 +79,13 @@ def collectionChain(
raise RuntimeError(f"Must provide children when defining a collection chain in mode {mode}.")
try:
- butler.registry.getCollectionType(parent)
+ butler.collections.get_info(parent)
except MissingCollectionError:
# Create it -- but only if mode can work with empty chain.
if mode in ("redefine", "extend", "prepend"):
if not doc:
doc = None
- butler.registry.registerCollection(parent, CollectionType.CHAINED, doc)
+ butler.collections.register(parent, CollectionType.CHAINED, doc)
else:
raise RuntimeError(
f"Mode '{mode}' requires that the collection exists "
@@ -96,26 +95,25 @@ def collectionChain(
if flatten:
if mode not in ("redefine", "prepend", "extend"):
raise RuntimeError(f"'flatten' flag is not allowed for {mode}")
- wildcard = CollectionWildcard.from_names(children)
- children = butler.registry.queryCollections(wildcard, flattenChains=True)
+ children = butler.collections.x_query(children, flatten_chains=True)
_modify_collection_chain(butler, mode, parent, children)
- return tuple(butler.registry.getCollectionChain(parent))
+ return butler.collections.get_info(parent).children
def _modify_collection_chain(butler: Butler, mode: str, parent: str, children: Iterable[str]) -> None:
if mode == "prepend":
- butler.collection_chains.prepend_chain(parent, children)
+ butler.collections.prepend_chain(parent, children)
elif mode == "redefine":
- butler.collection_chains.redefine_chain(parent, children)
+ butler.collections.redefine_chain(parent, children)
elif mode == "remove":
- butler.collection_chains.remove_from_chain(parent, children)
+ butler.collections.remove_from_chain(parent, children)
elif mode == "pop":
children_to_pop = _find_children_to_pop(butler, parent, children)
- butler.collection_chains.remove_from_chain(parent, children_to_pop)
+ butler.collections.remove_from_chain(parent, children_to_pop)
elif mode == "extend":
- butler.collection_chains.extend_chain(parent, children)
+ butler.collections.extend_chain(parent, children)
else:
raise ValueError(f"Unrecognized update mode: '{mode}'")
@@ -125,7 +123,8 @@ def _find_children_to_pop(butler: Butler, parent: str, children: Iterable[str])
the given indexes.
"""
children = list(children)
- current = butler.registry.getCollectionChain(parent)
+ collection_info = butler.collections.get_info(parent)
+ current = collection_info.children
n_current = len(current)
if children:
diff --git a/python/lsst/daf/butler/script/exportCalibs.py b/python/lsst/daf/butler/script/exportCalibs.py
index 25573f0c7b..4581082d19 100644
--- a/python/lsst/daf/butler/script/exportCalibs.py
+++ b/python/lsst/daf/butler/script/exportCalibs.py
@@ -35,6 +35,7 @@
from astropy.table import Table
from .._butler import Butler
+from .._butler_collections import CollectionInfo
from .._collection_type import CollectionType
if TYPE_CHECKING:
@@ -44,7 +45,7 @@
def find_calibration_datasets(
- butler: Butler, collection: str, datasetTypes: Iterable[DatasetType]
+ butler: Butler, collection: CollectionInfo, datasetTypes: Iterable[DatasetType]
) -> list[DatasetRef]:
"""Search a calibration collection for calibration datasets.
@@ -52,7 +53,7 @@ def find_calibration_datasets(
----------
butler : `lsst.daf.butler.Butler`
Butler to use.
- collection : `str`
+ collection : `CollectionInfo`
Collection to search. This should be a CALIBRATION
collection.
datasetTypes : `list` [`lsst.daf.Butler.DatasetType`]
@@ -68,18 +69,18 @@ def find_calibration_datasets(
RuntimeError
Raised if the collection to search is not a CALIBRATION collection.
"""
- if butler.registry.getCollectionType(collection) != CollectionType.CALIBRATION:
- raise RuntimeError(f"Collection {collection} is not a CALIBRATION collection.")
+ if collection.type != CollectionType.CALIBRATION:
+ raise RuntimeError(f"Collection {collection.name} is not a CALIBRATION collection.")
exportDatasets = []
for calibType in datasetTypes:
with butler._query() as query:
- results = query.datasets(calibType, collections=collection, find_first=False)
+ results = query.datasets(calibType, collections=collection.name, find_first=False)
try:
refs = list(results.with_dimension_records())
except Exception as e:
- e.add_note(f"Error from querying dataset type {calibType} and collection {collection}")
+ e.add_note(f"Error from querying dataset type {calibType} and collection {collection.name}")
raise
exportDatasets.extend(refs)
@@ -122,7 +123,7 @@ def exportCalibs(
butler = Butler.from_config(repo, writeable=False)
dataset_type_query = dataset_type or ...
- collections_query = collections or ...
+ collections_query = collections or "*"
calibTypes = [
datasetType
@@ -133,18 +134,17 @@ def exportCalibs(
collectionsToExport = []
datasetsToExport = []
- for collection in butler.registry.queryCollections(
+ for collection in butler.collections.x_query_info(
collections_query,
- flattenChains=True,
- includeChains=True,
- collectionTypes={CollectionType.CALIBRATION, CollectionType.CHAINED},
+ flatten_chains=True,
+ include_chains=True,
+ collection_types={CollectionType.CALIBRATION, CollectionType.CHAINED},
):
- log.info("Checking collection: %s", collection)
+ log.info("Checking collection: %s", collection.name)
# Get collection information.
- collectionsToExport.append(collection)
- collectionType = butler.registry.getCollectionType(collection)
- if collectionType == CollectionType.CALIBRATION:
+ collectionsToExport.append(collection.name)
+ if collection.type == CollectionType.CALIBRATION:
exportDatasets = find_calibration_datasets(butler, collection, calibTypes)
datasetsToExport.extend(exportDatasets)
diff --git a/python/lsst/daf/butler/script/queryCollections.py b/python/lsst/daf/butler/script/queryCollections.py
index 2951080c98..f80b8e71cf 100644
--- a/python/lsst/daf/butler/script/queryCollections.py
+++ b/python/lsst/daf/butler/script/queryCollections.py
@@ -32,6 +32,7 @@
from astropy.table import Table
from .._butler import Butler
+from .._butler_collections import CollectionInfo
from .._collection_type import CollectionType
@@ -71,38 +72,36 @@ def _getTable(
dtype=(str, str, str),
)
butler = Butler.from_config(repo)
- names = sorted(
- butler.registry.queryCollections(collectionTypes=frozenset(collection_type), expression=glob or ...)
+ collections = sorted(
+ butler.collections.x_query_info(
+ glob or "*", collection_types=frozenset(collection_type), include_parents=inverse
+ )
)
if inverse:
- for name in names:
- type = butler.registry.getCollectionType(name)
- parentNames = butler.registry.getCollectionParentChains(name)
- if parentNames:
+ for info in collections:
+ if info.parents:
first = True
- for parentName in sorted(parentNames):
- table.add_row((name if first else "", type.name if first else "", parentName))
+ for parentName in sorted(info.parents):
+ table.add_row((info.name if first else "", info.type.name if first else "", parentName))
first = False
else:
- table.add_row((name, type.name, ""))
+ table.add_row((info.name, info.type.name, ""))
# If none of the datasets has a parent dataset then remove the
# description column.
if not any(c for c in table[descriptionCol]):
del table[descriptionCol]
else:
- for name in names:
- type = butler.registry.getCollectionType(name)
- if type == CollectionType.CHAINED:
- children = butler.registry.getCollectionChain(name)
- if children:
+ for info in collections:
+ if info.type == CollectionType.CHAINED:
+ if info.children:
first = True
- for child in children:
- table.add_row((name if first else "", type.name if first else "", child))
+ for child in info.children:
+ table.add_row((info.name if first else "", info.type.name if first else "", child))
first = False
else:
- table.add_row((name, type.name, ""))
+ table.add_row((info.name, info.type.name, ""))
else:
- table.add_row((name, type.name, ""))
+ table.add_row((info.name, info.type.name, ""))
# If there aren't any CHAINED datasets in the results then remove the
# description column.
if not any(columnVal == CollectionType.CHAINED.name for columnVal in table[typeCol]):
@@ -146,21 +145,21 @@ def _getTree(
)
butler = Butler.from_config(repo, without_datastore=True)
- def addCollection(name: str, level: int = 0) -> None:
- collectionType = butler.registry.getCollectionType(name)
- table.add_row((" " * level + name, collectionType.name))
+ def addCollection(info: CollectionInfo, level: int = 0) -> None:
+ table.add_row((" " * level + info.name, info.type.name))
if inverse:
- parentNames = butler.registry.getCollectionParentChains(name)
- for pname in sorted(parentNames):
- addCollection(pname, level + 1)
+ assert info.parents is not None # For mypy.
+ for pname in sorted(info.parents):
+ pinfo = butler.collections.get_info(pname, include_parents=inverse)
+ addCollection(pinfo, level + 1)
else:
- if collectionType == CollectionType.CHAINED:
- childNames = butler.registry.getCollectionChain(name)
- for name in childNames:
- addCollection(name, level + 1)
+ if info.type == CollectionType.CHAINED:
+ for name in info.children:
+ cinfo = butler.collections.get_info(name)
+ addCollection(cinfo, level + 1)
- collections = butler.registry.queryCollections(
- collectionTypes=frozenset(collection_type), expression=glob or ...
+ collections = butler.collections.x_query_info(
+ glob or "*", collection_types=frozenset(collection_type), include_parents=inverse
)
for collection in sorted(collections):
addCollection(collection)
@@ -173,14 +172,14 @@ def _getFlatten(
collection_type: Iterable[CollectionType],
) -> Table:
butler = Butler.from_config(repo)
- collectionNames = list(
- butler.registry.queryCollections(
- collectionTypes=frozenset(collection_type), flattenChains=True, expression=glob or ...
+ collections = list(
+ butler.collections.x_query_info(
+ glob or "*", collection_types=frozenset(collection_type), flatten_chains=True
)
)
-
- collectionTypes = [butler.registry.getCollectionType(c).name for c in collectionNames]
- return Table((collectionNames, collectionTypes), names=("Name", "Type"))
+ names = [c.name for c in collections]
+ types = [c.type.name for c in collections]
+ return Table((names, types), names=("Name", "Type"))
def queryCollections(
diff --git a/python/lsst/daf/butler/script/queryDataIds.py b/python/lsst/daf/butler/script/queryDataIds.py
index c6053b5127..41bbfd4d6d 100644
--- a/python/lsst/daf/butler/script/queryDataIds.py
+++ b/python/lsst/daf/butler/script/queryDataIds.py
@@ -168,12 +168,23 @@ def queryDataIds(
with butler._query() as query:
if datasets:
# Need to constrain results based on dataset type and collection.
- query_collections = collections or ...
-
- expanded_collections = butler.registry.queryCollections(query_collections)
-
- sub_query = query.join_dataset_search(dataset_types.pop(0), collections=expanded_collections)
- for dt in dataset_types:
+ query_collections = collections or "*"
+ collections_info = butler.collections.x_query_info(query_collections, include_summary=True)
+ expanded_collections = [info.name for info in collections_info]
+ filtered_dataset_types = list(
+ butler.collections._filter_dataset_types([dt.name for dt in dataset_types], collections_info)
+ )
+ if not filtered_dataset_types:
+ return (
+ None,
+ f"No datasets of type {datasets!r} existed in the specified "
+ f"collections {','.join(expanded_collections)}.",
+ )
+
+ sub_query = query.join_dataset_search(
+ filtered_dataset_types.pop(0), collections=expanded_collections
+ )
+ for dt in filtered_dataset_types:
sub_query = sub_query.join_dataset_search(dt, collections=expanded_collections)
results = sub_query.data_ids(dimensions)
diff --git a/python/lsst/daf/butler/script/queryDatasetTypes.py b/python/lsst/daf/butler/script/queryDatasetTypes.py
index 58cf68b693..8d657e69bf 100644
--- a/python/lsst/daf/butler/script/queryDatasetTypes.py
+++ b/python/lsst/daf/butler/script/queryDatasetTypes.py
@@ -34,7 +34,9 @@
from .._butler import Butler
-def queryDatasetTypes(repo: str, verbose: bool, glob: Iterable[str]) -> Table:
+def queryDatasetTypes(
+ repo: str, verbose: bool, glob: Iterable[str], collections: Iterable[str] | None = None
+) -> Table:
"""Get the dataset types in a repository.
Parameters
@@ -48,16 +50,27 @@ def queryDatasetTypes(repo: str, verbose: bool, glob: Iterable[str]) -> Table:
glob : iterable [`str`]
A list of glob-style search string that fully or partially identify
the dataset type names to search for.
+ collections : iterable [`str`] or `None`, optional
+ Constrains resulting dataset types such that only dataset type
+ found (at some point) in these collections will be returned.
Returns
-------
- collections : `astropy.table.Table`
+ dataset_types_table : `astropy.table.Table`
A dict whose key is "datasetTypes" and whose value is a list of
collection names.
"""
butler = Butler.from_config(repo, without_datastore=True)
expression = glob or ...
datasetTypes = butler.registry.queryDatasetTypes(expression=expression)
+
+ if collections:
+ collections_info = butler.collections.x_query_info(collections, include_summary=True)
+ filtered_dataset_types = set(
+ butler.collections._filter_dataset_types([d.name for d in datasetTypes], collections_info)
+ )
+ datasetTypes = [d for d in datasetTypes if d.name in filtered_dataset_types]
+
if verbose:
table = Table(
array(
diff --git a/python/lsst/daf/butler/script/queryDatasets.py b/python/lsst/daf/butler/script/queryDatasets.py
index 73dde6b96a..5403a44f5b 100644
--- a/python/lsst/daf/butler/script/queryDatasets.py
+++ b/python/lsst/daf/butler/script/queryDatasets.py
@@ -27,9 +27,9 @@
from __future__ import annotations
import dataclasses
+import logging
from collections import defaultdict
from collections.abc import Iterable, Iterator
-from types import EllipsisType
from typing import TYPE_CHECKING
import numpy as np
@@ -43,6 +43,9 @@
from lsst.resources import ResourcePath
+_LOG = logging.getLogger(__name__)
+
+
@dataclasses.dataclass(frozen=True)
class _RefInfo:
datasetRef: DatasetRef
@@ -205,6 +208,7 @@ def getTables(self) -> list[AstropyTable]:
return [table.getAstropyTable(datasetTypeName) for datasetTypeName, table in tables.items()]
+ # @profile
def getDatasets(self) -> Iterator[DatasetRef]:
"""Get the datasets as a list.
@@ -214,16 +218,36 @@ def getDatasets(self) -> Iterator[DatasetRef]:
Dataset references matching the given query criteria.
"""
datasetTypes = self._dataset_type_glob or ...
- query_collections: Iterable[str] | EllipsisType = self._collections_wildcard or ...
+ query_collections: Iterable[str] = self._collections_wildcard or ["*"]
# Currently need to use old interface to get all the matching
# dataset types and loop over the dataset types executing a new
# query each time.
- dataset_types = self.butler.registry.queryDatasetTypes(datasetTypes)
+ dataset_types: set[str] = {d.name for d in self.butler.registry.queryDatasetTypes(datasetTypes)}
+ n_dataset_types = len(dataset_types)
with self.butler._query() as query:
- query_collections = self.butler.registry.queryCollections(query_collections)
+ # Expand the collections query and include summary information.
+ query_collections_info = self.butler.collections.x_query_info(
+ query_collections, include_summary=True
+ )
+ query_collections = [c.name for c in query_collections_info]
+
+ # Only iterate over dataset types that are relevant for the query.
+ dataset_types = set(
+ self.butler.collections._filter_dataset_types(dataset_types, query_collections_info)
+ )
+
+ if (n_filtered := len(dataset_types)) != n_dataset_types:
+ _LOG.info("Filtered %d dataset types down to %d", n_dataset_types, n_filtered)
+ elif n_dataset_types == 0:
+ _LOG.info("The given dataset type, %s, is not known to this butler.", datasetTypes)
+ else:
+ _LOG.info(
+ "Processing %d dataset type%s", n_dataset_types, "" if n_dataset_types == 1 else "s"
+ )
+
# Accumulate over dataset types.
- for dt in dataset_types:
+ for dt in sorted(dataset_types):
results = query.datasets(dt, collections=query_collections, find_first=self._find_first)
if self._where:
results = results.where(self._where)
diff --git a/python/lsst/daf/butler/script/queryDimensionRecords.py b/python/lsst/daf/butler/script/queryDimensionRecords.py
index ee8ad09995..60ee77c506 100644
--- a/python/lsst/daf/butler/script/queryDimensionRecords.py
+++ b/python/lsst/daf/butler/script/queryDimensionRecords.py
@@ -79,9 +79,14 @@ def queryDimensionRecords(
with butler._query() as query:
if datasets:
- query_collections = collections or ...
- expanded_collections = butler.registry.queryCollections(query_collections)
- dataset_types = list(butler.registry.queryDatasetTypes(datasets))
+ query_collections = collections or "*"
+ collections_info = butler.collections.x_query_info(query_collections, include_summary=True)
+ expanded_collections = [info.name for info in collections_info]
+ dataset_types = [dt.name for dt in butler.registry.queryDatasetTypes(datasets)]
+ dataset_types = list(butler.collections._filter_dataset_types(dataset_types, collections_info))
+
+ if not dataset_types:
+ return None
sub_query = query.join_dataset_search(dataset_types.pop(0), collections=expanded_collections)
for dt in dataset_types:
diff --git a/python/lsst/daf/butler/script/removeCollections.py b/python/lsst/daf/butler/script/removeCollections.py
index 0b8a1ec87e..71606a5461 100644
--- a/python/lsst/daf/butler/script/removeCollections.py
+++ b/python/lsst/daf/butler/script/removeCollections.py
@@ -41,7 +41,7 @@
class RemoveCollectionResult:
"""Container to return to the cli command; holds tables describing the
collections that will be removed, as well as any found RUN collections
- which can not be removed by this command. Also holds the callback funciton
+ which can not be removed by this command. Also holds the callback function
to execute the remove upon user confirmation.
"""
@@ -73,7 +73,7 @@ def _getCollectionInfo(
Parameters
----------
repo : `str`
- The URI to the repostiory.
+ The URI to the repository.
collection : `str`
The collection string to search for. Same as the `expression`
argument to `registry.queryCollections`.
@@ -85,30 +85,17 @@ def _getCollectionInfo(
"""
butler = Butler.from_config(repo, without_datastore=True)
try:
- names = sorted(
- butler.registry.queryCollections(
- collectionTypes=frozenset(
- (
- CollectionType.RUN,
- CollectionType.TAGGED,
- CollectionType.CHAINED,
- CollectionType.CALIBRATION,
- )
- ),
- expression=collection,
- includeChains=True,
- )
- )
+ collections_info = sorted(butler.collections.x_query_info(collection, include_chains=True))
except MissingCollectionError:
- names = []
+ # Hide the error and act like no collections should be removed.
+ collections_info = []
collections = Table(names=("Collection", "Collection Type"), dtype=(str, str))
runCollections = Table(names=("Collection",), dtype=(str,))
- for name in names:
- collectionType = butler.registry.getCollectionType(name).name
- if collectionType == "RUN":
- runCollections.add_row((name,))
+ for collection_info in collections_info:
+ if collection_info.type == CollectionType.RUN:
+ runCollections.add_row((collection_info.name,))
else:
- collections.add_row((name, collectionType))
+ collections.add_row((collection_info.name, collection_info.type.name))
return CollectionInfo(collections, runCollections)
@@ -138,7 +125,7 @@ def _doRemove(collections: Table) -> None:
"""Perform the prune collection step."""
butler = Butler.from_config(repo, writeable=True, without_datastore=True)
for name in collections["Collection"]:
- butler.registry.removeCollection(name)
+ butler.collections.x_remove(name)
result = RemoveCollectionResult(
onConfirmation=partial(_doRemove, collectionInfo.nonRunCollections),
diff --git a/python/lsst/daf/butler/script/removeRuns.py b/python/lsst/daf/butler/script/removeRuns.py
index efd93e4a9d..94e85ce26d 100644
--- a/python/lsst/daf/butler/script/removeRuns.py
+++ b/python/lsst/daf/butler/script/removeRuns.py
@@ -87,29 +87,26 @@ def _getCollectionInfo(
"""
butler = Butler.from_config(repo)
try:
- collectionNames = list(
- butler.registry.queryCollections(
- collectionTypes=frozenset((CollectionType.RUN,)),
- expression=collection,
- includeChains=False,
- )
+ collections = butler.collections.x_query_info(
+ collection, CollectionType.RUN, include_chains=False, include_parents=True, include_summary=True
)
except MissingCollectionError:
- collectionNames = []
+ # Act as if no collections matched.
+ collections = []
+ dataset_types = [dt.name for dt in butler.registry.queryDatasetTypes(...)]
+ dataset_types = list(butler.collections._filter_dataset_types(dataset_types, collections))
- dataset_types = butler.registry.queryDatasetTypes(...)
runs = []
datasets: dict[str, int] = defaultdict(int)
- for collectionName in collectionNames:
- assert butler.registry.getCollectionType(collectionName).name == "RUN"
- parents = butler.registry.getCollectionParentChains(collectionName)
- runs.append(RemoveRun(collectionName, list(parents)))
+ for collection_info in collections:
+ assert collection_info.type == CollectionType.RUN and collection_info.parents is not None
+ runs.append(RemoveRun(collection_info.name, list(collection_info.parents)))
with butler._query() as query:
for dt in dataset_types:
- results = query.datasets(dt, collections=collectionName)
+ results = query.datasets(dt, collections=collection_info.name)
count = results.count(exact=False)
if count:
- datasets[dt.name] += count
+ datasets[dt] += count
return runs, {k: datasets[k] for k in sorted(datasets.keys())}
@@ -140,7 +137,7 @@ def _doRemove(runs: Sequence[RemoveRun]) -> None:
with butler.transaction():
for run in runs:
for parent in run.parents:
- butler.collection_chains.remove_from_chain(parent, run.name)
+ butler.collections.remove_from_chain(parent, run.name)
butler.removeRuns([r.name for r in runs], unstore=True)
result = RemoveRunsResult(
diff --git a/python/lsst/daf/butler/script/retrieveArtifacts.py b/python/lsst/daf/butler/script/retrieveArtifacts.py
index bbfc74f95d..066c8b6d56 100644
--- a/python/lsst/daf/butler/script/retrieveArtifacts.py
+++ b/python/lsst/daf/butler/script/retrieveArtifacts.py
@@ -30,7 +30,6 @@
__all__ = ("retrieveArtifacts",)
import logging
-from types import EllipsisType
from typing import TYPE_CHECKING
from .._butler import Butler
@@ -85,16 +84,18 @@ def retrieveArtifacts(
The destination URIs of every transferred artifact.
"""
query_types = dataset_type or ...
- query_collections: tuple[str, ...] | EllipsisType = collections or ...
+ query_collections: tuple[str, ...] = collections or ("*",)
butler = Butler.from_config(repo, writeable=False)
# Need to store in list so we can count the number to give some feedback
# to caller.
- dataset_types = butler.registry.queryDatasetTypes(query_types)
+ dataset_types = [dt.name for dt in butler.registry.queryDatasetTypes(query_types)]
refs: list[DatasetRef] = []
with butler._query() as query:
- expanded_collections = butler.registry.queryCollections(query_collections)
+ collections_info = butler.collections.x_query_info(query_collections, include_summary=True)
+ expanded_collections = [info.name for info in collections_info]
+ dataset_types = list(butler.collections._filter_dataset_types(dataset_types, collections_info))
for dt in dataset_types:
results = query.datasets(dt, collections=expanded_collections, find_first=find_first)
if where:
diff --git a/python/lsst/daf/butler/script/transferDatasets.py b/python/lsst/daf/butler/script/transferDatasets.py
index 5b9eeab9ec..9839212d2f 100644
--- a/python/lsst/daf/butler/script/transferDatasets.py
+++ b/python/lsst/daf/butler/script/transferDatasets.py
@@ -29,7 +29,6 @@
__all__ = ("transferDatasets",)
import logging
-from types import EllipsisType
from lsst.daf.butler import DatasetRef
@@ -79,12 +78,18 @@ def transferDatasets(
dest_butler = Butler.from_config(dest, writeable=True)
dataset_type_expr = dataset_type or ...
- collections_expr: tuple[str, ...] | EllipsisType = collections or ...
+ collections_expr: tuple[str, ...] = collections or ("*",)
- dataset_types = source_butler.registry.queryDatasetTypes(dataset_type_expr)
+ dataset_types = [dt.name for dt in source_butler.registry.queryDatasetTypes(dataset_type_expr)]
source_refs: list[DatasetRef] = []
with source_butler._query() as query:
- query_collections = source_butler.registry.queryCollections(collections_expr)
+ query_collections_info = source_butler.collections.x_query_info(
+ collections_expr, include_summary=True
+ )
+ query_collections = [info.name for info in query_collections_info]
+ dataset_types = list(
+ source_butler.collections._filter_dataset_types(dataset_types, query_collections_info)
+ )
# Loop over dataset types and accumulate.
for dt in dataset_types:
results = query.datasets(dt, collections=query_collections, find_first=find_first)
diff --git a/python/lsst/daf/butler/tests/butler_queries.py b/python/lsst/daf/butler/tests/butler_queries.py
index b641bafb84..f39cd89830 100644
--- a/python/lsst/daf/butler/tests/butler_queries.py
+++ b/python/lsst/daf/butler/tests/butler_queries.py
@@ -434,9 +434,9 @@ def test_dataset_constrained_record_query(self) -> None:
"""
butler = self.make_butler("base.yaml", "datasets.yaml")
butler.registry.insertDimensionData("instrument", {"name": "Cam2"})
- butler.registry.registerCollection("empty", CollectionType.RUN)
- butler.registry.registerCollection("chain", CollectionType.CHAINED)
- butler.registry.setCollectionChain("chain", ["imported_g", "empty", "imported_r"])
+ butler.collections.register("empty", CollectionType.RUN)
+ butler.collections.register("chain", CollectionType.CHAINED)
+ butler.collections.redefine_chain("chain", ["imported_g", "empty", "imported_r"])
with butler._query() as query:
# No collections here or in defaults is an error.
with self.assertRaises(NoDefaultCollectionError):
diff --git a/python/lsst/daf/butler/tests/hybrid_butler.py b/python/lsst/daf/butler/tests/hybrid_butler.py
index c64d2a4ad3..86c9c05c74 100644
--- a/python/lsst/daf/butler/tests/hybrid_butler.py
+++ b/python/lsst/daf/butler/tests/hybrid_butler.py
@@ -57,6 +57,7 @@
from ..registry import Registry
from ..remote_butler import RemoteButler
from ..transfers import RepoExportContext
+from .hybrid_butler_collections import HybridButlerCollections
from .hybrid_butler_registry import HybridButlerRegistry
@@ -321,10 +322,6 @@ def validateConfiguration(
) -> None:
return self._direct_butler.validateConfiguration(logFailures, datasetTypeNames, ignore)
- @property
- def collections(self) -> Sequence[str]:
- return self._remote_butler.collections
-
@property
def run(self) -> str | None:
return self._remote_butler.run
@@ -458,4 +455,8 @@ def _extract_all_dimension_records_from_data_ids(
@property
def collection_chains(self) -> ButlerCollections:
- return self._direct_butler.collection_chains
+ return HybridButlerCollections(self)
+
+ @property
+ def collections(self) -> ButlerCollections:
+ return HybridButlerCollections(self)
diff --git a/python/lsst/daf/butler/tests/hybrid_butler_collections.py b/python/lsst/daf/butler/tests/hybrid_butler_collections.py
new file mode 100644
index 0000000000..6c9e438e79
--- /dev/null
+++ b/python/lsst/daf/butler/tests/hybrid_butler_collections.py
@@ -0,0 +1,109 @@
+# This file is part of daf_butler.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (http://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This software is dual licensed under the GNU General Public License and also
+# under a 3-clause BSD license. Recipients may choose which of these licenses
+# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
+# respectively. If you choose the GPL option then the following text applies
+# (but note that there is still no warranty even if you opt for BSD instead):
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+from __future__ import annotations
+
+__all__ = ("HybridButlerCollections",)
+
+from collections.abc import Iterable, Sequence, Set
+from typing import TYPE_CHECKING
+
+from .._butler_collections import ButlerCollections, CollectionInfo
+from .._collection_type import CollectionType
+
+if TYPE_CHECKING:
+ from .hybrid_butler import HybridButler
+
+
+class HybridButlerCollections(ButlerCollections):
+ """Implementation of ButlerCollections for HybridButler.
+
+ Parameters
+ ----------
+ butler : `~lsst.daf.butler.tests.hybrid_butler.HybridButler`
+ Hybrid butler to use.
+ """
+
+ def __init__(self, butler: HybridButler):
+ self._hybrid = butler
+
+ @property
+ def defaults(self) -> Sequence[str]:
+ return self._hybrid._remote_butler.collections.defaults
+
+ def extend_chain(self, parent_collection_name: str, child_collection_names: str | Iterable[str]) -> None:
+ return self._hybrid._direct_butler.collections.extend_chain(
+ parent_collection_name, child_collection_names
+ )
+
+ def prepend_chain(self, parent_collection_name: str, child_collection_names: str | Iterable[str]) -> None:
+ return self._hybrid._direct_butler.collections.prepend_chain(
+ parent_collection_name, child_collection_names
+ )
+
+ def redefine_chain(
+ self, parent_collection_name: str, child_collection_names: str | Iterable[str]
+ ) -> None:
+ self._hybrid._direct_butler.collections.redefine_chain(parent_collection_name, child_collection_names)
+
+ def remove_from_chain(
+ self, parent_collection_name: str, child_collection_names: str | Iterable[str]
+ ) -> None:
+ return self._hybrid._direct_butler.collections.remove_from_chain(
+ parent_collection_name, child_collection_names
+ )
+
+ def x_query_info(
+ self,
+ expression: str | Iterable[str],
+ collection_types: Set[CollectionType] | CollectionType | None = None,
+ flatten_chains: bool = False,
+ include_chains: bool | None = None,
+ include_parents: bool = False,
+ include_summary: bool = False,
+ ) -> Sequence[CollectionInfo]:
+ return self._hybrid._remote_butler.collections.x_query_info(
+ expression,
+ collection_types=collection_types,
+ flatten_chains=flatten_chains,
+ include_chains=include_chains,
+ include_parents=include_parents,
+ include_summary=include_summary,
+ )
+
+ def get_info(
+ self, name: str, include_parents: bool = False, include_summary: bool = False
+ ) -> CollectionInfo:
+ return self._hybrid._remote_butler.collections.get_info(
+ name, include_parents=include_parents, include_summary=include_summary
+ )
+
+ def register(self, name: str, type: CollectionType = CollectionType.RUN, doc: str | None = None) -> bool:
+ return self._hybrid._direct_butler.collections.register(name, type=type, doc=doc)
+
+ def x_remove(self, name: str) -> None:
+ self._hybrid._direct_butler.collections.x_remove(name)
diff --git a/python/lsst/daf/butler/tests/utils.py b/python/lsst/daf/butler/tests/utils.py
index 8efbe446e5..3e74b82db5 100644
--- a/python/lsst/daf/butler/tests/utils.py
+++ b/python/lsst/daf/butler/tests/utils.py
@@ -183,7 +183,7 @@ def assertAstropyTablesEqual(
original_max = self.maxDiff
self.maxDiff = None # This is required to get the full diff.
try:
- self.assertEqual(table1, expected1)
+ self.assertEqual(table1, expected1, f"Table:\n{table}\n\nvs Expected:\n{expected}")
finally:
self.maxDiff = original_max
@@ -281,7 +281,7 @@ def _do_init(self, butler: Butler, butlerConfigFile: str) -> None:
# New datasets will be added to run and tag, but we will only look in
# tag when looking up datasets.
- self.butler.registry.registerCollection(self._DEFAULT_TAG, CollectionType.TAGGED)
+ self.butler.collections.register(self._DEFAULT_TAG, CollectionType.TAGGED)
# Create and register a DatasetType
self.datasetType = addDatasetType(
@@ -351,7 +351,7 @@ def addDataset(
A reference to the added dataset.
"""
if run:
- self.butler.registry.registerCollection(run, type=CollectionType.RUN)
+ self.butler.collections.register(run)
else:
run = self._DEFAULT_RUN
metric = self._makeExampleMetrics()
diff --git a/python/lsst/daf/butler/transfers/_context.py b/python/lsst/daf/butler/transfers/_context.py
index 37af23a58d..af1a80d551 100644
--- a/python/lsst/daf/butler/transfers/_context.py
+++ b/python/lsst/daf/butler/transfers/_context.py
@@ -358,11 +358,10 @@ def _computeDatasetAssociations(self) -> dict[str, list[DatasetAssociation]]:
collectionTypes = {CollectionType.TAGGED}
if datasetType.isCalibration():
collectionTypes.add(CollectionType.CALIBRATION)
- resolved_collections = self._butler._registry.queryCollections(
+ resolved_collections = self._butler.collections.x_query(
self._collections.keys(),
- datasetType=datasetType,
- collectionTypes=collectionTypes,
- flattenChains=False,
+ collection_types=collectionTypes,
+ flatten_chains=False,
)
with self._butler._query() as query:
query = query.join_dataset_search(datasetType, resolved_collections)
diff --git a/python/lsst/daf/butler/transfers/_yaml.py b/python/lsst/daf/butler/transfers/_yaml.py
index dc9741bc6c..4911201bd5 100644
--- a/python/lsst/daf/butler/transfers/_yaml.py
+++ b/python/lsst/daf/butler/transfers/_yaml.py
@@ -301,7 +301,7 @@ class YamlRepoImportBackend(RepoImportBackend):
stream : `io.IO`
A readable file-like object.
registry : `SqlRegistry`
- The registry datasets will be imported into. Only used to retreive
+ The registry datasets will be imported into. Only used to retrieve
dataset types during construction; all write happen in `register`
and `load`.
"""
@@ -440,7 +440,7 @@ def __init__(self, stream: IO, registry: SqlRegistry):
# Must create the visit_system_membership records.
# But first create empty list for visits since other
# logic in this file depends on self.dimensions being
- # populated in an order consisteny with primary keys.
+ # populated in an order consistent with primary keys.
self.dimensions[self.registry.dimensions["visit"]] = []
element = self.registry.dimensions["visit_system_membership"]
RecordClass = element.RecordClass
@@ -509,7 +509,7 @@ def __init__(self, stream: IO, registry: SqlRegistry):
if not isinstance(child, str):
warnings.warn(
f"CHAINED collection {data['name']} includes restrictions on child "
- "collection searches, which are no longer suppored and will be ignored.",
+ "collection searches, which are no longer supported and will be ignored.",
stacklevel=find_outside_stacklevel("lsst.daf.butler"),
)
# Old form with dataset type restrictions only,
diff --git a/tests/test_butler.py b/tests/test_butler.py
index 5dc0ae6967..92604ada75 100644
--- a/tests/test_butler.py
+++ b/tests/test_butler.py
@@ -225,7 +225,7 @@ def create_butler(
"""
butler = self.create_empty_butler(run=run)
- collections = set(butler.registry.queryCollections())
+ collections = set(butler.collections.x_query("*"))
self.assertEqual(collections, {run})
# Create and register a DatasetType
dimensions = butler.dimensions.conform(["instrument", "visit"])
@@ -301,7 +301,7 @@ def runPutGetTest(self, storageClass: StorageClass, datasetTypeName: str) -> But
# this by using a distinct run collection each time
counter += 1
this_run = f"put_run_{counter}"
- butler.registry.registerCollection(this_run, type=CollectionType.RUN)
+ butler.collections.register(this_run)
expected_collections.update({this_run})
with self.subTest(args=args):
@@ -418,7 +418,7 @@ def runPutGetTest(self, storageClass: StorageClass, datasetTypeName: str) -> But
# Do explicit registry removal since we know they are
# empty
- butler.registry.removeCollection(this_run)
+ butler.collections.x_remove(this_run)
expected_collections.remove(this_run)
# Create DatasetRef for put using default run.
@@ -502,7 +502,7 @@ def runPutGetTest(self, storageClass: StorageClass, datasetTypeName: str) -> But
butler.get(ref, parameters={"unsupported": True})
# Check we have a collection
- collections = set(butler.registry.queryCollections())
+ collections = set(butler.collections.x_query("*"))
self.assertEqual(collections, expected_collections)
# Clean up to check that we can remove something that may have
@@ -575,9 +575,9 @@ def testDeferredCollectionPassing(self) -> None:
metric = makeExampleMetrics()
# Register a new run and put dataset.
run = "deferred"
- self.assertTrue(butler.registry.registerRun(run))
+ self.assertTrue(butler.collections.register(run))
# Second time it will be allowed but indicate no-op
- self.assertFalse(butler.registry.registerRun(run))
+ self.assertFalse(butler.collections.register(run))
ref = butler.put(metric, datasetType, dataId, run=run)
# Putting with no run should fail with TypeError.
with self.assertRaises(CollectionError):
@@ -594,7 +594,7 @@ def testDeferredCollectionPassing(self) -> None:
with self.assertRaises(CollectionError):
butler.get(datasetType, dataId)
# Associate the dataset with a different collection.
- butler.registry.registerCollection("tagged")
+ butler.collections.register("tagged", type=CollectionType.TAGGED)
butler.registry.associate("tagged", [ref])
# Deleting the dataset from the new collection should make it findable
# in the original collection.
@@ -642,7 +642,7 @@ def testConstructor(self) -> None:
butler = Butler.from_config(ResourcePath(config_dir, forceDirectory=True), run=self.default_run)
self.assertIsInstance(butler, Butler)
- collections = set(butler.registry.queryCollections())
+ collections = set(butler.collections.x_query("*"))
self.assertEqual(collections, {self.default_run})
# Check that some special characters can be included in run name.
@@ -652,7 +652,7 @@ def testConstructor(self) -> None:
self.assertEqual(collections, {special_run})
butler2 = Butler.from_config(butler=butler, collections=["other"])
- self.assertEqual(butler2.collections, ("other",))
+ self.assertEqual(butler2.collections.defaults, ("other",))
self.assertIsNone(butler2.run)
self.assertEqual(type(butler._datastore), type(butler2._datastore))
self.assertEqual(butler._datastore.config, butler2._datastore.config)
@@ -1092,7 +1092,7 @@ def testPickle(self) -> None:
butlerOut = pickle.loads(pickle.dumps(butler))
self.assertIsInstance(butlerOut, Butler)
self.assertEqual(butlerOut._config, butler._config)
- self.assertEqual(butlerOut.collections, butler.collections)
+ self.assertEqual(list(butlerOut.collections.defaults), list(butler.collections.defaults))
self.assertEqual(butlerOut.run, butler.run)
def testGetDatasetTypes(self) -> None:
@@ -1390,103 +1390,103 @@ def testGetDatasetCollectionCaching(self):
def testCollectionChainRedefine(self):
butler = self._setup_to_test_collection_chain()
- butler.collection_chains.redefine_chain("chain", "a")
+ butler.collections.redefine_chain("chain", "a")
self._check_chain(butler, ["a"])
# Duplicates are removed from the list of children
- butler.collection_chains.redefine_chain("chain", ["c", "b", "c"])
+ butler.collections.redefine_chain("chain", ["c", "b", "c"])
self._check_chain(butler, ["c", "b"])
# Empty list clears the chain
- butler.collection_chains.redefine_chain("chain", [])
+ butler.collections.redefine_chain("chain", [])
self._check_chain(butler, [])
- self._test_common_chain_functionality(butler, butler.collection_chains.redefine_chain)
+ self._test_common_chain_functionality(butler, butler.collections.redefine_chain)
def testCollectionChainPrepend(self):
butler = self._setup_to_test_collection_chain()
# Duplicates are removed from the list of children
- butler.collection_chains.prepend_chain("chain", ["c", "b", "c"])
+ butler.collections.prepend_chain("chain", ["c", "b", "c"])
self._check_chain(butler, ["c", "b"])
# Prepend goes on the front of existing chain
- butler.collection_chains.prepend_chain("chain", ["a"])
+ butler.collections.prepend_chain("chain", ["a"])
self._check_chain(butler, ["a", "c", "b"])
# Empty prepend does nothing
- butler.collection_chains.prepend_chain("chain", [])
+ butler.collections.prepend_chain("chain", [])
self._check_chain(butler, ["a", "c", "b"])
# Prepending children that already exist in the chain removes them from
# their current position.
- butler.collection_chains.prepend_chain("chain", ["d", "b", "c"])
+ butler.collections.prepend_chain("chain", ["d", "b", "c"])
self._check_chain(butler, ["d", "b", "c", "a"])
- self._test_common_chain_functionality(butler, butler.collection_chains.prepend_chain)
+ self._test_common_chain_functionality(butler, butler.collections.prepend_chain)
def testCollectionChainExtend(self):
butler = self._setup_to_test_collection_chain()
# Duplicates are removed from the list of children
- butler.collection_chains.extend_chain("chain", ["c", "b", "c"])
+ butler.collections.extend_chain("chain", ["c", "b", "c"])
self._check_chain(butler, ["c", "b"])
# Extend goes on the end of existing chain
- butler.collection_chains.extend_chain("chain", ["a"])
+ butler.collections.extend_chain("chain", ["a"])
self._check_chain(butler, ["c", "b", "a"])
# Empty extend does nothing
- butler.collection_chains.extend_chain("chain", [])
+ butler.collections.extend_chain("chain", [])
self._check_chain(butler, ["c", "b", "a"])
# Extending children that already exist in the chain removes them from
# their current position.
- butler.collection_chains.extend_chain("chain", ["d", "b", "c"])
+ butler.collections.extend_chain("chain", ["d", "b", "c"])
self._check_chain(butler, ["a", "d", "b", "c"])
- self._test_common_chain_functionality(butler, butler.collection_chains.extend_chain)
+ self._test_common_chain_functionality(butler, butler.collections.extend_chain)
def testCollectionChainRemove(self) -> None:
butler = self._setup_to_test_collection_chain()
- butler.registry.setCollectionChain("chain", ["a", "b", "c", "d"])
+ butler.collections.redefine_chain("chain", ["a", "b", "c", "d"])
- butler.collection_chains.remove_from_chain("chain", "c")
+ butler.collections.remove_from_chain("chain", "c")
self._check_chain(butler, ["a", "b", "d"])
# Duplicates are allowed in the list of children
- butler.collection_chains.remove_from_chain("chain", ["b", "b", "a"])
+ butler.collections.remove_from_chain("chain", ["b", "b", "a"])
self._check_chain(butler, ["d"])
# Empty remove does nothing
- butler.collection_chains.remove_from_chain("chain", [])
+ butler.collections.remove_from_chain("chain", [])
self._check_chain(butler, ["d"])
# Removing children that aren't in the chain does nothing
- butler.collection_chains.remove_from_chain("chain", ["a", "chain"])
+ butler.collections.remove_from_chain("chain", ["a", "chain"])
self._check_chain(butler, ["d"])
self._test_common_chain_functionality(
- butler, butler.collection_chains.remove_from_chain, skip_cycle_check=True
+ butler, butler.collections.remove_from_chain, skip_cycle_check=True
)
def _setup_to_test_collection_chain(self) -> Butler:
butler = self.create_empty_butler(writeable=True)
- butler.registry.registerCollection("chain", CollectionType.CHAINED)
+ butler.collections.register("chain", CollectionType.CHAINED)
runs = ["a", "b", "c", "d"]
for run in runs:
- butler.registry.registerCollection(run)
+ butler.collections.register(run)
- butler.registry.registerCollection("staticchain", CollectionType.CHAINED)
- butler.registry.setCollectionChain("staticchain", ["a", "b"])
+ butler.collections.register("staticchain", CollectionType.CHAINED)
+ butler.collections.redefine_chain("staticchain", ["a", "b"])
return butler
def _check_chain(self, butler: Butler, expected: list[str]) -> None:
- children = butler.registry.getCollectionChain("chain")
+ children = butler.collections.get_info("chain").children
self.assertEqual(expected, list(children))
def _test_common_chain_functionality(
@@ -1504,14 +1504,14 @@ def _test_common_chain_functionality(
# Prevent collection cycles
if not skip_cycle_check:
- butler.registry.registerCollection("chain2", CollectionType.CHAINED)
+ butler.collections.register("chain2", CollectionType.CHAINED)
func("chain2", "chain")
with self.assertRaises(CollectionCycleError):
func("chain", "chain2")
# Make sure none of the earlier operations interfered with unrelated
# chains.
- self.assertEqual(["a", "b"], list(butler.registry.getCollectionChain("staticchain")))
+ self.assertEqual(["a", "b"], list(butler.collections.get_info("staticchain").children))
with butler._caching_context():
with self.assertRaisesRegex(RuntimeError, "Chained collection modification not permitted"):
@@ -1706,9 +1706,9 @@ def testRemoveRuns(self) -> None:
butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
# Add some RUN-type collection.
run1 = "run1"
- butler.registry.registerRun(run1)
+ butler.collections.register(run1)
run2 = "run2"
- butler.registry.registerRun(run2)
+ butler.collections.register(run2)
# put a dataset in each
metric = makeExampleMetrics()
dimensions = butler.dimensions.conform(["instrument", "physical_filter"])
@@ -1729,9 +1729,9 @@ def testRemoveRuns(self) -> None:
# Should be nothing in registry for either one, and datastore should
# not think either exists.
with self.assertRaises(MissingCollectionError):
- butler.registry.getCollectionType(run1)
+ butler.collections.get_info(run1)
with self.assertRaises(MissingCollectionError):
- butler.registry.getCollectionType(run2)
+ butler.collections.get_info(run1)
self.assertFalse(butler.stored(ref1))
self.assertFalse(butler.stored(ref2))
# The ref we unstored should be gone according to the URI, but the
@@ -1767,9 +1767,9 @@ def testPruneDatasets(self) -> None:
butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
# Add some RUN-type collections.
run1 = "run1"
- butler.registry.registerRun(run1)
+ butler.collections.register(run1)
run2 = "run2"
- butler.registry.registerRun(run2)
+ butler.collections.register(run2)
# put some datasets. ref1 and ref2 have the same data ID, and are in
# different runs. ref3 has a different data ID.
metric = makeExampleMetrics()
@@ -2464,7 +2464,7 @@ def _absolute_transfer(self, transfer: str) -> None:
storageClass = self.storageClassFactory.getStorageClass(storageClassName)
datasetTypeName = "random_data"
run = "run1"
- self.source_butler.registry.registerCollection(run, CollectionType.RUN)
+ self.source_butler.collections.register(run)
dimensions = self.source_butler.dimensions.conform(())
datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
@@ -2513,7 +2513,7 @@ def assertButlerTransfers(
# Create the run collections in the source butler.
for run in runs:
- self.source_butler.registry.registerCollection(run, CollectionType.RUN)
+ self.source_butler.collections.register(run)
# Create dimensions in source butler.
n_exposures = 30
@@ -2746,7 +2746,7 @@ def assertButlerTransfers(
# Now prune run2 collection and create instead a CHAINED collection.
# This should block the transfer.
self.target_butler.removeRuns(["run2"], unstore=True)
- self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED)
+ self.target_butler.collections.register("run2", CollectionType.CHAINED)
with self.assertRaises(CollectionTypeError):
# Re-importing the run1 datasets can be problematic if they
# use integer IDs so filter those out.
@@ -2797,8 +2797,8 @@ def test_fallback(self) -> None:
self.assertIsInstance(butler._datastore, NullDatastore)
# Check that registry is working.
- butler.registry.registerRun("MYRUN")
- collections = butler.registry.queryCollections(...)
+ butler.collections.register("MYRUN")
+ collections = butler.collections.x_query("*")
self.assertIn("MYRUN", set(collections))
# Create a ref.
diff --git a/tests/test_cliCmdPruneDatasets.py b/tests/test_cliCmdPruneDatasets.py
index e816deedee..03f3e8ab38 100644
--- a/tests/test_cliCmdPruneDatasets.py
+++ b/tests/test_cliCmdPruneDatasets.py
@@ -35,7 +35,7 @@
import lsst.daf.butler.registry.sql_registry
import lsst.daf.butler.script
from astropy.table import Table
-from lsst.daf.butler import CollectionType
+from lsst.daf.butler import CollectionInfo, CollectionType
from lsst.daf.butler.cli.butler import cli as butlerCli
from lsst.daf.butler.cli.cmd.commands import (
pruneDatasets_askContinueMsg,
@@ -400,9 +400,9 @@ def test_purgeNoOp(self):
)
@patch.object(
- lsst.daf.butler.registry.sql_registry.SqlRegistry,
- "getCollectionType",
- side_effect=lambda x: CollectionType.RUN,
+ lsst.daf.butler.direct_butler._direct_butler_collections.DirectButlerCollections,
+ "get_info",
+ side_effect=lambda x: CollectionInfo(name="run", type=CollectionType.RUN),
)
def test_purgeImpliedArgs(self, mockGetCollectionType):
"""Verify the arguments implied by --purge.
@@ -432,9 +432,9 @@ def test_purgeImpliedArgs(self, mockGetCollectionType):
)
@patch.object(
- lsst.daf.butler.registry.sql_registry.SqlRegistry,
- "getCollectionType",
- side_effect=lambda x: CollectionType.RUN,
+ lsst.daf.butler.direct_butler._direct_butler_collections.DirectButlerCollections,
+ "get_info",
+ side_effect=lambda x: CollectionInfo(name="run", type=CollectionType.RUN),
)
def test_purgeImpliedArgsWithCollections(self, mockGetCollectionType):
"""Verify the arguments implied by --purge, with a COLLECTIONS."""
@@ -457,9 +457,9 @@ def test_purgeImpliedArgsWithCollections(self, mockGetCollectionType):
)
@patch.object(
- lsst.daf.butler.registry.sql_registry.SqlRegistry,
- "getCollectionType",
- side_effect=lambda x: CollectionType.TAGGED,
+ lsst.daf.butler.direct_butler._direct_butler_collections.DirectButlerCollections,
+ "get_info",
+ side_effect=lambda x: CollectionInfo(name="myTaggedCollection", type=CollectionType.TAGGED),
)
def test_purgeOnNonRunCollection(self, mockGetCollectionType):
"""Verify calling run on a non-run collection fails with expected
@@ -472,7 +472,7 @@ def test_purgeOnNonRunCollection(self, mockGetCollectionType):
exPruneDatasetsCallArgs=None,
exQueryDatasetsCallArgs=None,
exGetTablesCalled=False,
- exMsgs=(pruneDatasets_errPruneOnNotRun.format(collection=collectionName)),
+ exMsgs=(pruneDatasets_errPruneOnNotRun.format(collection=collectionName),),
exPruneDatasetsExitCode=1,
)
diff --git a/tests/test_cliCmdQueryDatasetTypes.py b/tests/test_cliCmdQueryDatasetTypes.py
index c656f4cbe3..8ce01e923c 100644
--- a/tests/test_cliCmdQueryDatasetTypes.py
+++ b/tests/test_cliCmdQueryDatasetTypes.py
@@ -46,7 +46,7 @@ class QueryDatasetTypesCmdTest(CliCmdTestBase, unittest.TestCase):
@staticmethod
def defaultExpected():
- return dict(repo=None, verbose=False, glob=())
+ return dict(repo=None, verbose=False, glob=(), collections=())
@staticmethod
def command():
diff --git a/tests/test_cliCmdQueryDatasets.py b/tests/test_cliCmdQueryDatasets.py
index e253f66f94..3670f5cd96 100644
--- a/tests/test_cliCmdQueryDatasets.py
+++ b/tests/test_cliCmdQueryDatasets.py
@@ -250,6 +250,10 @@ def testGlobDatasetType(self):
tables = self._queryDatasets(repo=self.repoDir)
expectedTables = (
+ AstropyTable(
+ array(("alt_test_metric_comp", "ingest/run", "DummyCamComp", "425", "R", "d-r")),
+ names=("type", "run", "instrument", "visit", "band", "physical_filter"),
+ ),
AstropyTable(
array(
(
@@ -259,10 +263,6 @@ def testGlobDatasetType(self):
),
names=("type", "run", "instrument", "visit", "band", "physical_filter"),
),
- AstropyTable(
- array(("alt_test_metric_comp", "ingest/run", "DummyCamComp", "425", "R", "d-r")),
- names=("type", "run", "instrument", "visit", "band", "physical_filter"),
- ),
)
self.assertAstropyTablesEqual(tables, expectedTables, filterColumns=True)
diff --git a/tests/test_query_remote.py b/tests/test_query_remote.py
index 2557243d27..27e90e2e42 100644
--- a/tests/test_query_remote.py
+++ b/tests/test_query_remote.py
@@ -38,13 +38,16 @@
try:
from lsst.daf.butler.tests.server import create_test_server
-except ImportError:
+
+ reason_text = ""
+except ImportError as e:
create_test_server = None
+ reason_text = str(e)
TESTDIR = os.path.abspath(os.path.dirname(__file__))
-@unittest.skipIf(create_test_server is None, "Server dependencies not installed.")
+@unittest.skipIf(create_test_server is None, f"Server dependencies not installed: {reason_text}")
class RemoteButlerQueryTests(ButlerQueryTests, unittest.TestCase):
"""Test query system using client/server butler."""
diff --git a/tests/test_remote_butler.py b/tests/test_remote_butler.py
index 41b1a119ff..b9df899567 100644
--- a/tests/test_remote_butler.py
+++ b/tests/test_remote_butler.py
@@ -42,20 +42,28 @@
try:
import httpx
from lsst.daf.butler.remote_butler import ButlerServerError, RemoteButler
-except ImportError:
+
+ remote_butler_import_fail_message = ""
+except ImportError as e:
# httpx is not available in rubin-env yet, so skip these tests if it's not
# available
RemoteButler = None
+ remote_butler_import_fail_message = str(e)
try:
from lsst.daf.butler.tests.server import create_test_server
-except ImportError:
+
+ server_import_fail_message = ""
+except ImportError as e:
create_test_server = None
+ server_import_fail_message = str(e)
TESTDIR = os.path.abspath(os.path.dirname(__file__))
-@unittest.skipIf(RemoteButler is None, "httpx is not installed")
+@unittest.skipIf(
+ RemoteButler is None, f"Remote butler can not be imported: {remote_butler_import_fail_message}"
+)
class RemoteButlerConfigTests(unittest.TestCase):
"""Test construction of RemoteButler via Butler()"""
@@ -64,7 +72,9 @@ def test_bad_config(self):
Butler({"cls": "lsst.daf.butler.remote_butler.RemoteButler", "remote_butler": {"url": "!"}})
-@unittest.skipIf(create_test_server is None, "Server dependencies not installed")
+@unittest.skipIf(
+ create_test_server is None, f"Server dependencies not installed: {server_import_fail_message}"
+)
class RemoteButlerErrorHandlingTests(unittest.TestCase):
"""Test RemoteButler error handling."""
@@ -200,7 +210,9 @@ def test_query_projection_drop_postprocessing(self):
pass
-@unittest.skipIf(create_test_server is None, "Server dependencies not installed.")
+@unittest.skipIf(
+ create_test_server is None, f"Server dependencies not installed: {server_import_fail_message}"
+)
class RemoteButlerSqliteRegistryTests(RemoteButlerRegistryTests, unittest.TestCase):
"""Tests for RemoteButler's registry shim, with a SQLite DB backing the
server.
@@ -209,7 +221,9 @@ class RemoteButlerSqliteRegistryTests(RemoteButlerRegistryTests, unittest.TestCa
postgres = None
-@unittest.skipIf(create_test_server is None, "Server dependencies not installed.")
+@unittest.skipIf(
+ create_test_server is None, f"Server dependencies not installed: {server_import_fail_message}"
+)
class RemoteButlerPostgresRegistryTests(RemoteButlerRegistryTests, unittest.TestCase):
"""Tests for RemoteButler's registry shim, with a Postgres DB backing the
server.
diff --git a/tests/test_server.py b/tests/test_server.py
index c3fb6c31a3..90c0e8c14c 100644
--- a/tests/test_server.py
+++ b/tests/test_server.py
@@ -41,8 +41,11 @@
from lsst.daf.butler.remote_butler.server import create_app
from lsst.daf.butler.remote_butler.server._dependencies import butler_factory_dependency
from lsst.daf.butler.tests.server import TEST_REPOSITORY_NAME, UnhandledServerError, create_test_server
-except ImportError:
+
+ reason_text = ""
+except ImportError as e:
create_test_server = None
+ reason_text = str(e)
from unittest.mock import NonCallableMock, patch
@@ -66,7 +69,7 @@
TESTDIR = os.path.abspath(os.path.dirname(__file__))
-@unittest.skipIf(create_test_server is None, "Server dependencies not installed.")
+@unittest.skipIf(create_test_server is None, f"Server dependencies not installed: {reason_text}")
class ButlerClientServerTestCase(unittest.TestCase):
"""Test for Butler client/server."""
@@ -219,7 +222,7 @@ def override_read(http_resource_path):
)
self.assertIsInstance(butler, RemoteButler)
self.assertEqual(butler._connection.server_url, server_url)
- self.assertEqual(butler.collections, ("collection1", "collection2"))
+ self.assertEqual(butler.collections.defaults, ("collection1", "collection2"))
self.assertEqual(butler.run, "collection2")
butler_factory = LabeledButlerFactory({"server": server_url})