From d8444aa59390e9448a8d75a04b9d506c37f0424c Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Wed, 14 Aug 2024 16:15:19 -0700
Subject: [PATCH 01/26] Fix test so that it is using a tuple not iterating over
 characters

---
 tests/test_cliCmdPruneDatasets.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_cliCmdPruneDatasets.py b/tests/test_cliCmdPruneDatasets.py
index e816deedee..137359d5d1 100644
--- a/tests/test_cliCmdPruneDatasets.py
+++ b/tests/test_cliCmdPruneDatasets.py
@@ -472,7 +472,7 @@ def test_purgeOnNonRunCollection(self, mockGetCollectionType):
             exPruneDatasetsCallArgs=None,
             exQueryDatasetsCallArgs=None,
             exGetTablesCalled=False,
-            exMsgs=(pruneDatasets_errPruneOnNotRun.format(collection=collectionName)),
+            exMsgs=(pruneDatasets_errPruneOnNotRun.format(collection=collectionName),),
             exPruneDatasetsExitCode=1,
         )
 

From 212d620e9fae083967f195d78d2e98b6164a3c64 Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Thu, 15 Aug 2024 11:12:43 -0700
Subject: [PATCH 02/26] Provide explicit reason when client/server tests are
 skipped

Sometimes it's not missing Safir or httpx but a circular import.
---
 tests/test_query_remote.py  |  7 +++++--
 tests/test_remote_butler.py | 26 ++++++++++++++++++++------
 tests/test_server.py        |  7 +++++--
 3 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/tests/test_query_remote.py b/tests/test_query_remote.py
index 2557243d27..27e90e2e42 100644
--- a/tests/test_query_remote.py
+++ b/tests/test_query_remote.py
@@ -38,13 +38,16 @@
 
 try:
     from lsst.daf.butler.tests.server import create_test_server
-except ImportError:
+
+    reason_text = ""
+except ImportError as e:
     create_test_server = None
+    reason_text = str(e)
 
 TESTDIR = os.path.abspath(os.path.dirname(__file__))
 
 
-@unittest.skipIf(create_test_server is None, "Server dependencies not installed.")
+@unittest.skipIf(create_test_server is None, f"Server dependencies not installed: {reason_text}")
 class RemoteButlerQueryTests(ButlerQueryTests, unittest.TestCase):
     """Test query system using client/server butler."""
 
diff --git a/tests/test_remote_butler.py b/tests/test_remote_butler.py
index 41b1a119ff..b9df899567 100644
--- a/tests/test_remote_butler.py
+++ b/tests/test_remote_butler.py
@@ -42,20 +42,28 @@
 try:
     import httpx
     from lsst.daf.butler.remote_butler import ButlerServerError, RemoteButler
-except ImportError:
+
+    remote_butler_import_fail_message = ""
+except ImportError as e:
     # httpx is not available in rubin-env yet, so skip these tests if it's not
     # available
     RemoteButler = None
+    remote_butler_import_fail_message = str(e)
 
 try:
     from lsst.daf.butler.tests.server import create_test_server
-except ImportError:
+
+    server_import_fail_message = ""
+except ImportError as e:
     create_test_server = None
+    server_import_fail_message = str(e)
 
 TESTDIR = os.path.abspath(os.path.dirname(__file__))
 
 
-@unittest.skipIf(RemoteButler is None, "httpx is not installed")
+@unittest.skipIf(
+    RemoteButler is None, f"Remote butler can not be imported: {remote_butler_import_fail_message}"
+)
 class RemoteButlerConfigTests(unittest.TestCase):
     """Test construction of RemoteButler via Butler()"""
 
@@ -64,7 +72,9 @@ def test_bad_config(self):
             Butler({"cls": "lsst.daf.butler.remote_butler.RemoteButler", "remote_butler": {"url": "!"}})
 
 
-@unittest.skipIf(create_test_server is None, "Server dependencies not installed")
+@unittest.skipIf(
+    create_test_server is None, f"Server dependencies not installed: {server_import_fail_message}"
+)
 class RemoteButlerErrorHandlingTests(unittest.TestCase):
     """Test RemoteButler error handling."""
 
@@ -200,7 +210,9 @@ def test_query_projection_drop_postprocessing(self):
         pass
 
 
-@unittest.skipIf(create_test_server is None, "Server dependencies not installed.")
+@unittest.skipIf(
+    create_test_server is None, f"Server dependencies not installed: {server_import_fail_message}"
+)
 class RemoteButlerSqliteRegistryTests(RemoteButlerRegistryTests, unittest.TestCase):
     """Tests for RemoteButler's registry shim, with a SQLite DB backing the
     server.
@@ -209,7 +221,9 @@ class RemoteButlerSqliteRegistryTests(RemoteButlerRegistryTests, unittest.TestCa
     postgres = None
 
 
-@unittest.skipIf(create_test_server is None, "Server dependencies not installed.")
+@unittest.skipIf(
+    create_test_server is None, f"Server dependencies not installed: {server_import_fail_message}"
+)
 class RemoteButlerPostgresRegistryTests(RemoteButlerRegistryTests, unittest.TestCase):
     """Tests for RemoteButler's registry shim, with a Postgres DB backing the
     server.
diff --git a/tests/test_server.py b/tests/test_server.py
index c3fb6c31a3..2c6c846608 100644
--- a/tests/test_server.py
+++ b/tests/test_server.py
@@ -41,8 +41,11 @@
     from lsst.daf.butler.remote_butler.server import create_app
     from lsst.daf.butler.remote_butler.server._dependencies import butler_factory_dependency
     from lsst.daf.butler.tests.server import TEST_REPOSITORY_NAME, UnhandledServerError, create_test_server
-except ImportError:
+
+    reason_text = ""
+except ImportError as e:
     create_test_server = None
+    reason_text = str(e)
 
 from unittest.mock import NonCallableMock, patch
 
@@ -66,7 +69,7 @@
 TESTDIR = os.path.abspath(os.path.dirname(__file__))
 
 
-@unittest.skipIf(create_test_server is None, "Server dependencies not installed.")
+@unittest.skipIf(create_test_server is None, f"Server dependencies not installed: {reason_text}")
 class ButlerClientServerTestCase(unittest.TestCase):
     """Test for Butler client/server."""
 

From ac61b4d274d207446133f114a19a8a9e591ba3ef Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Tue, 13 Aug 2024 14:53:48 -0700
Subject: [PATCH 03/26] Change butler.collections to be a ButlerCollections
 sequence

---
 python/lsst/daf/butler/_butler.py             | 10 ++++---
 python/lsst/daf/butler/_butler_collections.py | 29 +++++++++++++++++--
 .../butler/direct_butler/_direct_butler.py    | 15 +++-------
 .../_direct_butler_collections.py             |  6 +++-
 .../butler/remote_butler/_remote_butler.py    | 13 +++++----
 python/lsst/daf/butler/tests/hybrid_butler.py |  8 ++---
 tests/test_butler.py                          |  2 +-
 7 files changed, 54 insertions(+), 29 deletions(-)

diff --git a/python/lsst/daf/butler/_butler.py b/python/lsst/daf/butler/_butler.py
index 2c16478aa9..621be17b17 100644
--- a/python/lsst/daf/butler/_butler.py
+++ b/python/lsst/daf/butler/_butler.py
@@ -1422,15 +1422,17 @@ def collection_chains(self) -> ButlerCollections:
         """Object with methods for modifying collection chains
         (`~lsst.daf.butler.ButlerCollections`).
 
-        Use of this object is preferred over `registry` wherever possible.
+        Deprecated. Replaced with ``collections`` property.
         """
         raise NotImplementedError()
 
     @property
     @abstractmethod
-    def collections(self) -> Sequence[str]:
-        """The collections to search by default, in order
-        (`~collections.abc.Sequence` [ `str` ]).
+    def collections(self) -> ButlerCollections:
+        """Object with methods for modifying and querying collections
+        (`~lsst.daf.butler.ButlerCollections`).
+
+        Use of this object is preferred over `registry` wherever possible.
         """
         raise NotImplementedError()
 
diff --git a/python/lsst/daf/butler/_butler_collections.py b/python/lsst/daf/butler/_butler_collections.py
index e6032b5f2d..da457d34f5 100644
--- a/python/lsst/daf/butler/_butler_collections.py
+++ b/python/lsst/daf/butler/_butler_collections.py
@@ -30,12 +30,37 @@
 __all__ = ("ButlerCollections",)
 
 from abc import ABC, abstractmethod
-from collections.abc import Iterable
+from collections.abc import Iterable, Sequence
+from typing import Any, overload
 
 
-class ButlerCollections(ABC):
+class ButlerCollections(ABC, Sequence):
     """Methods for working with collections stored in the Butler."""
 
+    @overload
+    def __getitem__(self, index: int) -> str: ...
+
+    @overload
+    def __getitem__(self, index: slice) -> Sequence[str]: ...
+
+    def __getitem__(self, index: int | slice) -> str | Sequence[str]:
+        return self.defaults[index]
+
+    def __len__(self) -> int:
+        return len(self.defaults)
+
+    def __eq__(self, other: Any) -> bool:
+        # Do not try to compare registry instances.
+        if not isinstance(other, type(self)):
+            return False
+        return self.defaults == other.defaults
+
+    @property
+    @abstractmethod
+    def defaults(self) -> Sequence[str]:
+        """Collection defaults associated with this butler."""
+        raise NotImplementedError("Defaults must be implemented by a subclass")
+
     @abstractmethod
     def extend_chain(self, parent_collection_name: str, child_collection_names: str | Iterable[str]) -> None:
         """Add children to the end of a CHAINED collection.
diff --git a/python/lsst/daf/butler/direct_butler/_direct_butler.py b/python/lsst/daf/butler/direct_butler/_direct_butler.py
index e1a3a73fd6..71e22d045b 100644
--- a/python/lsst/daf/butler/direct_butler/_direct_butler.py
+++ b/python/lsst/daf/butler/direct_butler/_direct_butler.py
@@ -296,7 +296,7 @@ def __reduce__(self) -> tuple:
             DirectButler._unpickle,
             (
                 self._config,
-                self.collections,
+                self.collections.defaults,
                 self.run,
                 dict(self._registry.defaults.dataId.required),
                 self._registry.isWriteable(),
@@ -2154,16 +2154,9 @@ def collection_chains(self) -> DirectButlerCollections:
         return DirectButlerCollections(self._registry)
 
     @property
-    def collections(self) -> Sequence[str]:
-        """The collections to search by default, in order
-        (`~collections.abc.Sequence` [ `str` ]).
-
-        This is an alias for ``self.registry.defaults.collections``.  It cannot
-        be set directly in isolation, but all defaults may be changed together
-        by assigning a new `RegistryDefaults` instance to
-        ``self.registry.defaults``.
-        """
-        return self._registry.defaults.collections
+    def collections(self) -> DirectButlerCollections:
+        """Object with methods for modifying and inspecting collections."""
+        return DirectButlerCollections(self._registry)
 
     @property
     def run(self) -> str | None:
diff --git a/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py b/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py
index 1f5ac270b4..224d734132 100644
--- a/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py
+++ b/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py
@@ -29,7 +29,7 @@
 
 __all__ = ("DirectButlerCollections",)
 
-from collections.abc import Iterable
+from collections.abc import Iterable, Sequence
 
 from lsst.utils.iteration import ensure_iterable
 
@@ -49,6 +49,10 @@ class DirectButlerCollections(ButlerCollections):
     def __init__(self, registry: SqlRegistry):
         self._registry = registry
 
+    @property
+    def defaults(self) -> Sequence[str]:
+        return self._registry.defaults.collections
+
     def extend_chain(self, parent_collection_name: str, child_collection_names: str | Iterable[str]) -> None:
         return self._registry._managers.collections.extend_collection_chain(
             parent_collection_name, list(ensure_iterable(child_collection_names))
diff --git a/python/lsst/daf/butler/remote_butler/_remote_butler.py b/python/lsst/daf/butler/remote_butler/_remote_butler.py
index 3392fc8791..7a9cf40dbe 100644
--- a/python/lsst/daf/butler/remote_butler/_remote_butler.py
+++ b/python/lsst/daf/butler/remote_butler/_remote_butler.py
@@ -61,6 +61,7 @@
 from ._collection_args import convert_collection_arg_to_glob_string_list
 from ._query_driver import RemoteQueryDriver
 from ._ref_utils import apply_storage_class_override, normalize_dataset_type_name, simplify_dataId
+from ._remote_butler_collections import RemoteButlerCollections
 from .server_models import (
     CollectionList,
     FindDatasetRequestModel,
@@ -145,7 +146,12 @@ def isWriteable(self) -> bool:
     @property
     def collection_chains(self) -> ButlerCollections:
         """Object with methods for modifying collection chains."""
-        raise NotImplementedError()
+        return RemoteButlerCollections(self._registry)
+
+    @property
+    def collections(self) -> ButlerCollections:
+        """Object with methods for modifying collection chains."""
+        return RemoteButlerCollections(self._registry)
 
     @property
     def dimensions(self) -> DimensionUniverse:
@@ -510,11 +516,6 @@ def validateConfiguration(
         # Docstring inherited.
         raise NotImplementedError()
 
-    @property
-    def collections(self) -> Sequence[str]:
-        # Docstring inherited.
-        return self._registry_defaults.collections
-
     @property
     def run(self) -> str | None:
         # Docstring inherited.
diff --git a/python/lsst/daf/butler/tests/hybrid_butler.py b/python/lsst/daf/butler/tests/hybrid_butler.py
index c64d2a4ad3..9c66f1af1b 100644
--- a/python/lsst/daf/butler/tests/hybrid_butler.py
+++ b/python/lsst/daf/butler/tests/hybrid_butler.py
@@ -321,10 +321,6 @@ def validateConfiguration(
     ) -> None:
         return self._direct_butler.validateConfiguration(logFailures, datasetTypeNames, ignore)
 
-    @property
-    def collections(self) -> Sequence[str]:
-        return self._remote_butler.collections
-
     @property
     def run(self) -> str | None:
         return self._remote_butler.run
@@ -459,3 +455,7 @@ def _extract_all_dimension_records_from_data_ids(
     @property
     def collection_chains(self) -> ButlerCollections:
         return self._direct_butler.collection_chains
+
+    @property
+    def collections(self) -> ButlerCollections:
+        return self._remote_butler.collection_chains
diff --git a/tests/test_butler.py b/tests/test_butler.py
index 5dc0ae6967..f49173dcd3 100644
--- a/tests/test_butler.py
+++ b/tests/test_butler.py
@@ -652,7 +652,7 @@ def testConstructor(self) -> None:
         self.assertEqual(collections, {special_run})
 
         butler2 = Butler.from_config(butler=butler, collections=["other"])
-        self.assertEqual(butler2.collections, ("other",))
+        self.assertEqual(butler2.collections.defaults, ("other",))
         self.assertIsNone(butler2.run)
         self.assertEqual(type(butler._datastore), type(butler2._datastore))
         self.assertEqual(butler._datastore.config, butler2._datastore.config)

From 44602597494381cdb10ae1d6cf9f4252033bf9f3 Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Tue, 13 Aug 2024 15:10:36 -0700
Subject: [PATCH 04/26] Initial work on extending butler collections API

---
 python/lsst/daf/butler/_butler_collections.py | 84 +++++++++++++++-
 .../_direct_butler_collections.py             | 36 ++++++-
 .../butler/remote_butler/_remote_butler.py    |  8 +-
 .../_remote_butler_collections.py             | 97 +++++++++++++++++++
 .../daf/butler/script/queryCollections.py     | 52 +++++-----
 tests/test_server.py                          |  2 +-
 6 files changed, 242 insertions(+), 37 deletions(-)
 create mode 100644 python/lsst/daf/butler/remote_butler/_remote_butler_collections.py

diff --git a/python/lsst/daf/butler/_butler_collections.py b/python/lsst/daf/butler/_butler_collections.py
index da457d34f5..c2e1df1120 100644
--- a/python/lsst/daf/butler/_butler_collections.py
+++ b/python/lsst/daf/butler/_butler_collections.py
@@ -27,12 +27,31 @@
 
 from __future__ import annotations
 
-__all__ = ("ButlerCollections",)
+__all__ = ("ButlerCollections", "CollectionInfo")
 
 from abc import ABC, abstractmethod
-from collections.abc import Iterable, Sequence
+from collections.abc import Iterable, Sequence, Set
 from typing import Any, overload
 
+from pydantic import BaseModel
+
+from ._collection_type import CollectionType
+
+
+class CollectionInfo(BaseModel):
+    """Information about a single Butler collection."""
+
+    name: str
+    """Name of the collection."""
+    type: CollectionType
+    """Type of the collection."""
+    doc: str = ""
+    """Documentation string associated with this collection."""
+    children: tuple[str, ...] = tuple()
+    """Children of this collection (only if CHAINED)."""
+    parents: frozenset[str] = frozenset()
+    """Any parents of this collection."""
+
 
 class ButlerCollections(ABC, Sequence):
     """Methods for working with collections stored in the Butler."""
@@ -190,3 +209,64 @@ def remove_from_chain(
         transactions short.
         """
         raise NotImplementedError()
+
+    @abstractmethod
+    def query(
+        self,
+        expression: str | Iterable[str],
+        collection_types: Set[CollectionType] | CollectionType | None = None,
+        flatten_chains: bool = False,
+        include_chains: bool | None = None,
+    ) -> Sequence[str]:
+        """Query the butler for collections matching an expression.
+
+        Parameters
+        ----------
+        expression : `str` or `~collections.abc.Iterable` [ `str` ]
+            One or more collection names or globs to include in the search.
+        collection_types : `set` [`CollectionType`], `CollectionType` or `None`
+            Restrict the types of collections to be searched. If `None` all
+            collection types are searched.
+        flatten_chains : `bool`, optional
+            If `True` (`False` is default), recursively yield the child
+            collections of matching `~CollectionType.CHAINED` collections.
+        include_chains : `bool` or `None`, optional
+            If `True`, yield records for matching `~CollectionType.CHAINED`
+            collections. Default is the opposite of ``flatten_chains``:
+            include either CHAINED collections or their children, but not both.
+
+        Returns
+        -------
+        collections : `~collections.abc.Sequence` [ `str` ]
+            The names of collections that match ``expression``.
+
+        Notes
+        -----
+        The order in which collections are returned is unspecified, except that
+        the children of a `~CollectionType.CHAINED` collection are guaranteed
+        to be in the order in which they are searched.  When multiple parent
+        `~CollectionType.CHAINED` collections match the same criteria, the
+        order in which the two lists appear is unspecified, and the lists of
+        children may be incomplete if a child has multiple parents.
+        """
+        raise NotImplementedError()
+
+    @abstractmethod
+    def get_info(self, name: str, include_doc: bool = False, include_parents: bool = False) -> CollectionInfo:
+        """Obtain information for a specific collection.
+
+        Parameters
+        ----------
+        name : `str`
+            The name of the collection of interest.
+        include_doc : `bool`, optional
+            If `True` any documentation about this collection will be included.
+        include_parents : `bool`, optional
+           If `True` any parents of this collection will be included.
+
+        Returns
+        -------
+        info : `CollectionInfo`
+            Information on the requested collection.
+        """
+        raise NotImplementedError()
diff --git a/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py b/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py
index 224d734132..3705e2d3e8 100644
--- a/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py
+++ b/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py
@@ -29,11 +29,13 @@
 
 __all__ = ("DirectButlerCollections",)
 
-from collections.abc import Iterable, Sequence
+from collections.abc import Iterable, Sequence, Set
 
 from lsst.utils.iteration import ensure_iterable
 
-from .._butler_collections import ButlerCollections
+from .._butler_collections import ButlerCollections, CollectionInfo
+from .._collection_type import CollectionType
+from ..registry.interfaces import ChainedCollectionRecord
 from ..registry.sql_registry import SqlRegistry
 
 
@@ -76,3 +78,33 @@ def remove_from_chain(
         return self._registry._managers.collections.remove_from_collection_chain(
             parent_collection_name, list(ensure_iterable(child_collection_names))
         )
+
+    def query(
+        self,
+        expression: str | Iterable[str],
+        collection_types: Set[CollectionType] | CollectionType | None = None,
+        flatten_chains: bool = False,
+        include_chains: bool | None = None,
+    ) -> Sequence[str]:
+        if collection_types is None:
+            collection_types = CollectionType.all()
+        return self._registry.queryCollections(
+            expression,
+            collectionTypes=collection_types,
+            flattenChains=flatten_chains,
+            includeChains=include_chains,
+        )
+
+    def get_info(self, name: str, include_doc: bool = False, include_parents: bool = False) -> CollectionInfo:
+        record = self._registry.get_collection_record(name)
+        doc = ""
+        if include_doc:
+            doc = self._registry.getCollectionDocumentation(name) or ""
+        children: tuple[str, ...] = tuple()
+        if record.type == CollectionType.CHAINED:
+            assert isinstance(record, ChainedCollectionRecord)
+            children = tuple(record.children)
+        parents: set[str] = set()
+        if include_parents:
+            parents = self._registry.getCollectionParentChains(name)
+        return CollectionInfo(name=name, type=record.type, doc=doc, parents=parents, children=children)
diff --git a/python/lsst/daf/butler/remote_butler/_remote_butler.py b/python/lsst/daf/butler/remote_butler/_remote_butler.py
index 7a9cf40dbe..fbabfd0f02 100644
--- a/python/lsst/daf/butler/remote_butler/_remote_butler.py
+++ b/python/lsst/daf/butler/remote_butler/_remote_butler.py
@@ -146,12 +146,16 @@ def isWriteable(self) -> bool:
     @property
     def collection_chains(self) -> ButlerCollections:
         """Object with methods for modifying collection chains."""
-        return RemoteButlerCollections(self._registry)
+        from ._registry import RemoteButlerRegistry
+
+        return RemoteButlerCollections(cast(RemoteButlerRegistry, self._registry))
 
     @property
     def collections(self) -> ButlerCollections:
         """Object with methods for modifying collection chains."""
-        return RemoteButlerCollections(self._registry)
+        from ._registry import RemoteButlerRegistry
+
+        return RemoteButlerCollections(cast(RemoteButlerRegistry, self._registry))
 
     @property
     def dimensions(self) -> DimensionUniverse:
diff --git a/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py b/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py
new file mode 100644
index 0000000000..ae572e701c
--- /dev/null
+++ b/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py
@@ -0,0 +1,97 @@
+# This file is part of daf_butler.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (http://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This software is dual licensed under the GNU General Public License and also
+# under a 3-clause BSD license. Recipients may choose which of these licenses
+# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
+# respectively.  If you choose the GPL option then the following text applies
+# (but note that there is still no warranty even if you opt for BSD instead):
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+from __future__ import annotations
+
+__all__ = ("RemoteButlerCollections",)
+
+from collections.abc import Iterable, Sequence, Set
+from typing import TYPE_CHECKING
+
+from .._butler_collections import ButlerCollections, CollectionInfo
+from .._collection_type import CollectionType
+
+if TYPE_CHECKING:
+    from ._registry import RemoteButlerRegistry
+
+
+class RemoteButlerCollections(ButlerCollections):
+    """Implementation of ButlerCollections for RemoteButler.
+
+    Parameters
+    ----------
+    registry : `~lsst.daf.butler.registry.sql_registry.SqlRegistry`
+        Registry object used to work with the collections database.
+    """
+
+    def __init__(self, registry: RemoteButlerRegistry):
+        self._registry = registry
+
+    @property
+    def defaults(self) -> Sequence[str]:
+        return self._registry.defaults.collections
+
+    def extend_chain(self, parent_collection_name: str, child_collection_names: str | Iterable[str]) -> None:
+        raise NotImplementedError("Not yet available")
+
+    def prepend_chain(self, parent_collection_name: str, child_collection_names: str | Iterable[str]) -> None:
+        raise NotImplementedError("Not yet available")
+
+    def redefine_chain(
+        self, parent_collection_name: str, child_collection_names: str | Iterable[str]
+    ) -> None:
+        raise NotImplementedError("Not yet available")
+
+    def remove_from_chain(
+        self, parent_collection_name: str, child_collection_names: str | Iterable[str]
+    ) -> None:
+        raise NotImplementedError("Not yet available")
+
+    def query(
+        self,
+        expression: str | Iterable[str],
+        collection_types: Set[CollectionType] | CollectionType | None = None,
+        flatten_chains: bool = False,
+        include_chains: bool | None = None,
+    ) -> Sequence[str]:
+        if collection_types is None:
+            collection_types = CollectionType.all()
+        return self._registry.queryCollections(
+            expression,
+            collectionTypes=collection_types,
+            flattenChains=flatten_chains,
+            includeChains=include_chains,
+        )
+
+    def get_info(self, name: str, include_doc: bool = False, include_parents: bool = False) -> CollectionInfo:
+        info = self._registry._get_collection_info(
+            name, include_doc=include_doc, include_parents=include_parents
+        )
+        doc = info.doc or ""
+        children = info.children or ()
+        parents = info.parents or set()
+        return CollectionInfo(name=name, type=info.type, doc=doc, parents=parents, children=children)
diff --git a/python/lsst/daf/butler/script/queryCollections.py b/python/lsst/daf/butler/script/queryCollections.py
index 2951080c98..82d2b6f1ce 100644
--- a/python/lsst/daf/butler/script/queryCollections.py
+++ b/python/lsst/daf/butler/script/queryCollections.py
@@ -71,38 +71,34 @@ def _getTable(
         dtype=(str, str, str),
     )
     butler = Butler.from_config(repo)
-    names = sorted(
-        butler.registry.queryCollections(collectionTypes=frozenset(collection_type), expression=glob or ...)
-    )
+    names = sorted(butler.collections.query(glob or "*", collection_types=frozenset(collection_type)))
     if inverse:
         for name in names:
-            type = butler.registry.getCollectionType(name)
-            parentNames = butler.registry.getCollectionParentChains(name)
-            if parentNames:
+            info = butler.collections.get_info(name, include_parents=True)
+            if info.parents:
                 first = True
-                for parentName in sorted(parentNames):
-                    table.add_row((name if first else "", type.name if first else "", parentName))
+                for parentName in sorted(info.parents):
+                    table.add_row((name if first else "", info.type.name if first else "", parentName))
                     first = False
             else:
-                table.add_row((name, type.name, ""))
+                table.add_row((name, info.type.name, ""))
         # If none of the datasets has a parent dataset then remove the
         # description column.
         if not any(c for c in table[descriptionCol]):
             del table[descriptionCol]
     else:
         for name in names:
-            type = butler.registry.getCollectionType(name)
-            if type == CollectionType.CHAINED:
-                children = butler.registry.getCollectionChain(name)
-                if children:
+            info = butler.collections.get_info(name)
+            if info.type == CollectionType.CHAINED:
+                if info.children:
                     first = True
-                    for child in children:
-                        table.add_row((name if first else "", type.name if first else "", child))
+                    for child in info.children:
+                        table.add_row((name if first else "", info.type.name if first else "", child))
                         first = False
                 else:
-                    table.add_row((name, type.name, ""))
+                    table.add_row((name, info.type.name, ""))
             else:
-                table.add_row((name, type.name, ""))
+                table.add_row((name, info.type.name, ""))
         # If there aren't any CHAINED datasets in the results then remove the
         # description column.
         if not any(columnVal == CollectionType.CHAINED.name for columnVal in table[typeCol]):
@@ -147,21 +143,17 @@ def _getTree(
     butler = Butler.from_config(repo, without_datastore=True)
 
     def addCollection(name: str, level: int = 0) -> None:
-        collectionType = butler.registry.getCollectionType(name)
-        table.add_row(("  " * level + name, collectionType.name))
+        info = butler.collections.get_info(name, include_parents=inverse)
+        table.add_row(("  " * level + name, info.type.name))
         if inverse:
-            parentNames = butler.registry.getCollectionParentChains(name)
-            for pname in sorted(parentNames):
+            for pname in sorted(info.parents):
                 addCollection(pname, level + 1)
         else:
-            if collectionType == CollectionType.CHAINED:
-                childNames = butler.registry.getCollectionChain(name)
-                for name in childNames:
+            if info.type == CollectionType.CHAINED:
+                for name in info.children:
                     addCollection(name, level + 1)
 
-    collections = butler.registry.queryCollections(
-        collectionTypes=frozenset(collection_type), expression=glob or ...
-    )
+    collections = butler.collections.query(glob or "*", collection_types=frozenset(collection_type))
     for collection in sorted(collections):
         addCollection(collection)
     return table
@@ -174,12 +166,12 @@ def _getFlatten(
 ) -> Table:
     butler = Butler.from_config(repo)
     collectionNames = list(
-        butler.registry.queryCollections(
-            collectionTypes=frozenset(collection_type), flattenChains=True, expression=glob or ...
+        butler.collections.query(
+            glob or "*", collection_types=frozenset(collection_type), flatten_chains=True
         )
     )
 
-    collectionTypes = [butler.registry.getCollectionType(c).name for c in collectionNames]
+    collectionTypes = [butler.collections.get_info(c).type.name for c in collectionNames]
     return Table((collectionNames, collectionTypes), names=("Name", "Type"))
 
 
diff --git a/tests/test_server.py b/tests/test_server.py
index 2c6c846608..90c0e8c14c 100644
--- a/tests/test_server.py
+++ b/tests/test_server.py
@@ -222,7 +222,7 @@ def override_read(http_resource_path):
                     )
                     self.assertIsInstance(butler, RemoteButler)
                     self.assertEqual(butler._connection.server_url, server_url)
-                    self.assertEqual(butler.collections, ("collection1", "collection2"))
+                    self.assertEqual(butler.collections.defaults, ("collection1", "collection2"))
                     self.assertEqual(butler.run, "collection2")
 
                 butler_factory = LabeledButlerFactory({"server": server_url})

From 1ad0ab1effe2bdb7d0f933309b3036b2cd026cc4 Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Wed, 14 Aug 2024 16:16:49 -0700
Subject: [PATCH 05/26] Use new collections API in more scripts

---
 .../lsst/daf/butler/script/_pruneDatasets.py  |  4 ++--
 .../daf/butler/script/certifyCalibrations.py  | 11 +++++-----
 .../lsst/daf/butler/script/collectionChain.py | 11 +++++-----
 python/lsst/daf/butler/script/exportCalibs.py | 16 +++++++-------
 python/lsst/daf/butler/script/queryDataIds.py |  4 ++--
 .../lsst/daf/butler/script/queryDatasets.py   |  5 ++---
 .../butler/script/queryDimensionRecords.py    |  4 ++--
 .../daf/butler/script/removeCollections.py    | 22 +++++--------------
 python/lsst/daf/butler/script/removeRuns.py   | 16 +++++---------
 .../daf/butler/script/retrieveArtifacts.py    |  5 ++---
 .../daf/butler/script/transferDatasets.py     |  5 ++---
 tests/test_cliCmdPruneDatasets.py             | 20 ++++++++---------
 12 files changed, 51 insertions(+), 72 deletions(-)

diff --git a/python/lsst/daf/butler/script/_pruneDatasets.py b/python/lsst/daf/butler/script/_pruneDatasets.py
index 1c8caf2fd5..f3a7a155c8 100644
--- a/python/lsst/daf/butler/script/_pruneDatasets.py
+++ b/python/lsst/daf/butler/script/_pruneDatasets.py
@@ -223,8 +223,8 @@ def pruneDatasets(
     # If purging, verify that the collection to purge is RUN type collection.
     if purge_run:
         butler = Butler.from_config(repo, without_datastore=True)
-        collectionType = butler.registry.getCollectionType(purge_run)
-        if collectionType is not CollectionType.RUN:
+        collection_info = butler.collections.get_info(purge_run)
+        if collection_info.type is not CollectionType.RUN:
             return PruneDatasetsResult(
                 state=PruneDatasetsResult.State.ERR_PRUNE_ON_NOT_RUN, errDict=dict(collection=purge_run)
             )
diff --git a/python/lsst/daf/butler/script/certifyCalibrations.py b/python/lsst/daf/butler/script/certifyCalibrations.py
index cec8142aed..77759badf7 100644
--- a/python/lsst/daf/butler/script/certifyCalibrations.py
+++ b/python/lsst/daf/butler/script/certifyCalibrations.py
@@ -70,13 +70,14 @@ def certifyCalibrations(
         collection, instead of just the most recent one.
     """
     butler = Butler.from_config(repo, writeable=True, without_datastore=True)
-    registry = butler.registry
     timespan = Timespan(
         begin=astropy.time.Time(begin_date, scale="tai") if begin_date is not None else None,
         end=astropy.time.Time(end_date, scale="tai") if end_date is not None else None,
     )
-    if not search_all_inputs and registry.getCollectionType(input_collection) is CollectionType.CHAINED:
-        input_collection = next(iter(registry.getCollectionChain(input_collection)))
+    if not search_all_inputs:
+        collection_info = butler.collections.get_info(input_collection)
+        if collection_info.type is CollectionType.CHAINED:
+            input_collection = collection_info.children[0]
 
     with butler._query() as query:
         results = query.datasets(dataset_type_name, collections=input_collection)
@@ -86,5 +87,5 @@ def certifyCalibrations(
             raise RuntimeError(
                 f"No inputs found for dataset {dataset_type_name} in {input_collection}. {explanation}"
             )
-    registry.registerCollection(output_collection, type=CollectionType.CALIBRATION)
-    registry.certify(output_collection, refs, timespan)
+    butler.registry.registerCollection(output_collection, type=CollectionType.CALIBRATION)
+    butler.registry.certify(output_collection, refs, timespan)
diff --git a/python/lsst/daf/butler/script/collectionChain.py b/python/lsst/daf/butler/script/collectionChain.py
index 36a7b2b138..03054a22bb 100644
--- a/python/lsst/daf/butler/script/collectionChain.py
+++ b/python/lsst/daf/butler/script/collectionChain.py
@@ -32,7 +32,6 @@
 from .._butler import Butler
 from .._collection_type import CollectionType
 from ..registry import MissingCollectionError
-from ..registry.wildcards import CollectionWildcard
 
 
 def collectionChain(
@@ -80,7 +79,7 @@ def collectionChain(
         raise RuntimeError(f"Must provide children when defining a collection chain in mode {mode}.")
 
     try:
-        butler.registry.getCollectionType(parent)
+        butler.collections.get_info(parent)
     except MissingCollectionError:
         # Create it -- but only if mode can work with empty chain.
         if mode in ("redefine", "extend", "prepend"):
@@ -96,12 +95,11 @@ def collectionChain(
     if flatten:
         if mode not in ("redefine", "prepend", "extend"):
             raise RuntimeError(f"'flatten' flag is not allowed for {mode}")
-        wildcard = CollectionWildcard.from_names(children)
-        children = butler.registry.queryCollections(wildcard, flattenChains=True)
+        children = butler.collections.query(children, flatten_chains=True)
 
     _modify_collection_chain(butler, mode, parent, children)
 
-    return tuple(butler.registry.getCollectionChain(parent))
+    return butler.collections.get_info(parent).children
 
 
 def _modify_collection_chain(butler: Butler, mode: str, parent: str, children: Iterable[str]) -> None:
@@ -125,7 +123,8 @@ def _find_children_to_pop(butler: Butler, parent: str, children: Iterable[str])
     the given indexes.
     """
     children = list(children)
-    current = butler.registry.getCollectionChain(parent)
+    collection_info = butler.collections.get_info(parent)
+    current = collection_info.children
     n_current = len(current)
     if children:
 
diff --git a/python/lsst/daf/butler/script/exportCalibs.py b/python/lsst/daf/butler/script/exportCalibs.py
index 25573f0c7b..6d2a9e8233 100644
--- a/python/lsst/daf/butler/script/exportCalibs.py
+++ b/python/lsst/daf/butler/script/exportCalibs.py
@@ -68,7 +68,7 @@ def find_calibration_datasets(
     RuntimeError
         Raised if the collection to search is not a CALIBRATION collection.
     """
-    if butler.registry.getCollectionType(collection) != CollectionType.CALIBRATION:
+    if butler.collections.get_info(collection).type != CollectionType.CALIBRATION:
         raise RuntimeError(f"Collection {collection} is not a CALIBRATION collection.")
 
     exportDatasets = []
@@ -122,7 +122,7 @@ def exportCalibs(
     butler = Butler.from_config(repo, writeable=False)
 
     dataset_type_query = dataset_type or ...
-    collections_query = collections or ...
+    collections_query = collections or "*"
 
     calibTypes = [
         datasetType
@@ -133,18 +133,18 @@ def exportCalibs(
     collectionsToExport = []
     datasetsToExport = []
 
-    for collection in butler.registry.queryCollections(
+    for collection in butler.collections.query(
         collections_query,
-        flattenChains=True,
-        includeChains=True,
-        collectionTypes={CollectionType.CALIBRATION, CollectionType.CHAINED},
+        flatten_chains=True,
+        include_chains=True,
+        collection_types={CollectionType.CALIBRATION, CollectionType.CHAINED},
     ):
         log.info("Checking collection: %s", collection)
 
         # Get collection information.
         collectionsToExport.append(collection)
-        collectionType = butler.registry.getCollectionType(collection)
-        if collectionType == CollectionType.CALIBRATION:
+        info = butler.collections.get_info(collection)
+        if info.type == CollectionType.CALIBRATION:
             exportDatasets = find_calibration_datasets(butler, collection, calibTypes)
             datasetsToExport.extend(exportDatasets)
 
diff --git a/python/lsst/daf/butler/script/queryDataIds.py b/python/lsst/daf/butler/script/queryDataIds.py
index c6053b5127..dd75eb4824 100644
--- a/python/lsst/daf/butler/script/queryDataIds.py
+++ b/python/lsst/daf/butler/script/queryDataIds.py
@@ -168,9 +168,9 @@ def queryDataIds(
     with butler._query() as query:
         if datasets:
             # Need to constrain results based on dataset type and collection.
-            query_collections = collections or ...
+            query_collections = collections or "*"
 
-            expanded_collections = butler.registry.queryCollections(query_collections)
+            expanded_collections = butler.collections.query(query_collections)
 
             sub_query = query.join_dataset_search(dataset_types.pop(0), collections=expanded_collections)
             for dt in dataset_types:
diff --git a/python/lsst/daf/butler/script/queryDatasets.py b/python/lsst/daf/butler/script/queryDatasets.py
index 73dde6b96a..325d2bb8e4 100644
--- a/python/lsst/daf/butler/script/queryDatasets.py
+++ b/python/lsst/daf/butler/script/queryDatasets.py
@@ -29,7 +29,6 @@
 import dataclasses
 from collections import defaultdict
 from collections.abc import Iterable, Iterator
-from types import EllipsisType
 from typing import TYPE_CHECKING
 
 import numpy as np
@@ -214,14 +213,14 @@ def getDatasets(self) -> Iterator[DatasetRef]:
             Dataset references matching the given query criteria.
         """
         datasetTypes = self._dataset_type_glob or ...
-        query_collections: Iterable[str] | EllipsisType = self._collections_wildcard or ...
+        query_collections: Iterable[str] = self._collections_wildcard or ["*"]
 
         # Currently need to use old interface to get all the matching
         # dataset types and loop over the dataset types executing a new
         # query each time.
         dataset_types = self.butler.registry.queryDatasetTypes(datasetTypes)
         with self.butler._query() as query:
-            query_collections = self.butler.registry.queryCollections(query_collections)
+            query_collections = self.butler.collections.query(query_collections)
             # Accumulate over dataset types.
             for dt in dataset_types:
                 results = query.datasets(dt, collections=query_collections, find_first=self._find_first)
diff --git a/python/lsst/daf/butler/script/queryDimensionRecords.py b/python/lsst/daf/butler/script/queryDimensionRecords.py
index ee8ad09995..074efac915 100644
--- a/python/lsst/daf/butler/script/queryDimensionRecords.py
+++ b/python/lsst/daf/butler/script/queryDimensionRecords.py
@@ -79,8 +79,8 @@ def queryDimensionRecords(
     with butler._query() as query:
 
         if datasets:
-            query_collections = collections or ...
-            expanded_collections = butler.registry.queryCollections(query_collections)
+            query_collections = collections or "*"
+            expanded_collections = butler.collections.query(query_collections)
             dataset_types = list(butler.registry.queryDatasetTypes(datasets))
 
             sub_query = query.join_dataset_search(dataset_types.pop(0), collections=expanded_collections)
diff --git a/python/lsst/daf/butler/script/removeCollections.py b/python/lsst/daf/butler/script/removeCollections.py
index 0b8a1ec87e..df3f41b106 100644
--- a/python/lsst/daf/butler/script/removeCollections.py
+++ b/python/lsst/daf/butler/script/removeCollections.py
@@ -85,30 +85,18 @@ def _getCollectionInfo(
     """
     butler = Butler.from_config(repo, without_datastore=True)
     try:
-        names = sorted(
-            butler.registry.queryCollections(
-                collectionTypes=frozenset(
-                    (
-                        CollectionType.RUN,
-                        CollectionType.TAGGED,
-                        CollectionType.CHAINED,
-                        CollectionType.CALIBRATION,
-                    )
-                ),
-                expression=collection,
-                includeChains=True,
-            )
-        )
+        names = sorted(butler.collections.query(collection, include_chains=True))
     except MissingCollectionError:
+        # Hide the error and act like no collections should be removed.
         names = []
     collections = Table(names=("Collection", "Collection Type"), dtype=(str, str))
     runCollections = Table(names=("Collection",), dtype=(str,))
     for name in names:
-        collectionType = butler.registry.getCollectionType(name).name
-        if collectionType == "RUN":
+        collection_info = butler.collections.get_info(name)
+        if collection_info.type == CollectionType.RUN:
             runCollections.add_row((name,))
         else:
-            collections.add_row((name, collectionType))
+            collections.add_row((name, collection_info.type.name))
 
     return CollectionInfo(collections, runCollections)
 
diff --git a/python/lsst/daf/butler/script/removeRuns.py b/python/lsst/daf/butler/script/removeRuns.py
index efd93e4a9d..50ad5b9b2b 100644
--- a/python/lsst/daf/butler/script/removeRuns.py
+++ b/python/lsst/daf/butler/script/removeRuns.py
@@ -87,23 +87,17 @@ def _getCollectionInfo(
     """
     butler = Butler.from_config(repo)
     try:
-        collectionNames = list(
-            butler.registry.queryCollections(
-                collectionTypes=frozenset((CollectionType.RUN,)),
-                expression=collection,
-                includeChains=False,
-            )
-        )
+        collectionNames = butler.collections.query(collection, CollectionType.RUN, include_chains=False)
     except MissingCollectionError:
+        # Act as if no collections matched.
         collectionNames = []
-
     dataset_types = butler.registry.queryDatasetTypes(...)
     runs = []
     datasets: dict[str, int] = defaultdict(int)
     for collectionName in collectionNames:
-        assert butler.registry.getCollectionType(collectionName).name == "RUN"
-        parents = butler.registry.getCollectionParentChains(collectionName)
-        runs.append(RemoveRun(collectionName, list(parents)))
+        collection_info = butler.collections.get_info(collectionName, include_parents=True)
+        assert collection_info.type == CollectionType.RUN
+        runs.append(RemoveRun(collectionName, list(collection_info.parents)))
         with butler._query() as query:
             for dt in dataset_types:
                 results = query.datasets(dt, collections=collectionName)
diff --git a/python/lsst/daf/butler/script/retrieveArtifacts.py b/python/lsst/daf/butler/script/retrieveArtifacts.py
index bbfc74f95d..ca25999344 100644
--- a/python/lsst/daf/butler/script/retrieveArtifacts.py
+++ b/python/lsst/daf/butler/script/retrieveArtifacts.py
@@ -30,7 +30,6 @@
 __all__ = ("retrieveArtifacts",)
 
 import logging
-from types import EllipsisType
 from typing import TYPE_CHECKING
 
 from .._butler import Butler
@@ -85,7 +84,7 @@ def retrieveArtifacts(
         The destination URIs of every transferred artifact.
     """
     query_types = dataset_type or ...
-    query_collections: tuple[str, ...] | EllipsisType = collections or ...
+    query_collections: tuple[str, ...] = collections or ("*",)
 
     butler = Butler.from_config(repo, writeable=False)
 
@@ -94,7 +93,7 @@ def retrieveArtifacts(
     dataset_types = butler.registry.queryDatasetTypes(query_types)
     refs: list[DatasetRef] = []
     with butler._query() as query:
-        expanded_collections = butler.registry.queryCollections(query_collections)
+        expanded_collections = butler.collections.query(query_collections)
         for dt in dataset_types:
             results = query.datasets(dt, collections=expanded_collections, find_first=find_first)
             if where:
diff --git a/python/lsst/daf/butler/script/transferDatasets.py b/python/lsst/daf/butler/script/transferDatasets.py
index 5b9eeab9ec..e5e390bc88 100644
--- a/python/lsst/daf/butler/script/transferDatasets.py
+++ b/python/lsst/daf/butler/script/transferDatasets.py
@@ -29,7 +29,6 @@
 __all__ = ("transferDatasets",)
 
 import logging
-from types import EllipsisType
 
 from lsst.daf.butler import DatasetRef
 
@@ -79,12 +78,12 @@ def transferDatasets(
     dest_butler = Butler.from_config(dest, writeable=True)
 
     dataset_type_expr = dataset_type or ...
-    collections_expr: tuple[str, ...] | EllipsisType = collections or ...
+    collections_expr: tuple[str, ...] = collections or ("*",)
 
     dataset_types = source_butler.registry.queryDatasetTypes(dataset_type_expr)
     source_refs: list[DatasetRef] = []
     with source_butler._query() as query:
-        query_collections = source_butler.registry.queryCollections(collections_expr)
+        query_collections = source_butler.collections.query(collections_expr)
         # Loop over dataset types and accumulate.
         for dt in dataset_types:
             results = query.datasets(dt, collections=query_collections, find_first=find_first)
diff --git a/tests/test_cliCmdPruneDatasets.py b/tests/test_cliCmdPruneDatasets.py
index 137359d5d1..03f3e8ab38 100644
--- a/tests/test_cliCmdPruneDatasets.py
+++ b/tests/test_cliCmdPruneDatasets.py
@@ -35,7 +35,7 @@
 import lsst.daf.butler.registry.sql_registry
 import lsst.daf.butler.script
 from astropy.table import Table
-from lsst.daf.butler import CollectionType
+from lsst.daf.butler import CollectionInfo, CollectionType
 from lsst.daf.butler.cli.butler import cli as butlerCli
 from lsst.daf.butler.cli.cmd.commands import (
     pruneDatasets_askContinueMsg,
@@ -400,9 +400,9 @@ def test_purgeNoOp(self):
         )
 
     @patch.object(
-        lsst.daf.butler.registry.sql_registry.SqlRegistry,
-        "getCollectionType",
-        side_effect=lambda x: CollectionType.RUN,
+        lsst.daf.butler.direct_butler._direct_butler_collections.DirectButlerCollections,
+        "get_info",
+        side_effect=lambda x: CollectionInfo(name="run", type=CollectionType.RUN),
     )
     def test_purgeImpliedArgs(self, mockGetCollectionType):
         """Verify the arguments implied by --purge.
@@ -432,9 +432,9 @@ def test_purgeImpliedArgs(self, mockGetCollectionType):
         )
 
     @patch.object(
-        lsst.daf.butler.registry.sql_registry.SqlRegistry,
-        "getCollectionType",
-        side_effect=lambda x: CollectionType.RUN,
+        lsst.daf.butler.direct_butler._direct_butler_collections.DirectButlerCollections,
+        "get_info",
+        side_effect=lambda x: CollectionInfo(name="run", type=CollectionType.RUN),
     )
     def test_purgeImpliedArgsWithCollections(self, mockGetCollectionType):
         """Verify the arguments implied by --purge, with a COLLECTIONS."""
@@ -457,9 +457,9 @@ def test_purgeImpliedArgsWithCollections(self, mockGetCollectionType):
         )
 
     @patch.object(
-        lsst.daf.butler.registry.sql_registry.SqlRegistry,
-        "getCollectionType",
-        side_effect=lambda x: CollectionType.TAGGED,
+        lsst.daf.butler.direct_butler._direct_butler_collections.DirectButlerCollections,
+        "get_info",
+        side_effect=lambda x: CollectionInfo(name="myTaggedCollection", type=CollectionType.TAGGED),
     )
     def test_purgeOnNonRunCollection(self, mockGetCollectionType):
         """Verify calling run on a non-run collection fails with expected

From 3c3d3b799c5967778590d7e210f2ee4cd71981ca Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Thu, 15 Aug 2024 11:13:25 -0700
Subject: [PATCH 06/26] Replace queryCollections call in export

---
 python/lsst/daf/butler/transfers/_context.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/python/lsst/daf/butler/transfers/_context.py b/python/lsst/daf/butler/transfers/_context.py
index 37af23a58d..0c293c830d 100644
--- a/python/lsst/daf/butler/transfers/_context.py
+++ b/python/lsst/daf/butler/transfers/_context.py
@@ -358,11 +358,10 @@ def _computeDatasetAssociations(self) -> dict[str, list[DatasetAssociation]]:
             collectionTypes = {CollectionType.TAGGED}
             if datasetType.isCalibration():
                 collectionTypes.add(CollectionType.CALIBRATION)
-            resolved_collections = self._butler._registry.queryCollections(
+            resolved_collections = self._butler.collections.query(
                 self._collections.keys(),
-                datasetType=datasetType,
-                collectionTypes=collectionTypes,
-                flattenChains=False,
+                collection_types=collectionTypes,
+                flatten_chains=False,
             )
             with self._butler._query() as query:
                 query = query.join_dataset_search(datasetType, resolved_collections)

From 077b5e521e6ab45599d03ed32b1d041dc4d9ef6a Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Thu, 15 Aug 2024 12:55:16 -0700
Subject: [PATCH 07/26] Add butler.collections.register and use it

---
 python/lsst/daf/butler/_butler_collections.py | 27 +++++++++++++++++++
 .../butler/direct_butler/_direct_butler.py    |  2 +-
 .../_direct_butler_collections.py             |  3 +++
 .../_remote_butler_collections.py             |  3 +++
 python/lsst/daf/butler/script/_associate.py   |  2 +-
 .../daf/butler/script/certifyCalibrations.py  |  2 +-
 .../lsst/daf/butler/script/collectionChain.py |  2 +-
 python/lsst/daf/butler/tests/utils.py         |  4 +--
 8 files changed, 39 insertions(+), 6 deletions(-)

diff --git a/python/lsst/daf/butler/_butler_collections.py b/python/lsst/daf/butler/_butler_collections.py
index c2e1df1120..68c91aa919 100644
--- a/python/lsst/daf/butler/_butler_collections.py
+++ b/python/lsst/daf/butler/_butler_collections.py
@@ -270,3 +270,30 @@ def get_info(self, name: str, include_doc: bool = False, include_parents: bool =
             Information on the requested collection.
         """
         raise NotImplementedError()
+
+    @abstractmethod
+    def register(self, name: str, type: CollectionType = CollectionType.RUN, doc: str | None = None) -> bool:
+        """Add a new collection if one with the given name does not exist.
+
+        Parameters
+        ----------
+        name : `str`
+            The name of the collection to create.
+        type : `CollectionType`, optional
+            Enum value indicating the type of collection to create. Default
+            is to create a RUN collection.
+        doc : `str`, optional
+            Documentation string for the collection.
+
+        Returns
+        -------
+        registered : `bool`
+            Boolean indicating whether the collection was already registered
+            or was created by this call.
+
+        Notes
+        -----
+        This method cannot be called within transactions, as it needs to be
+        able to perform its own transaction to be concurrent
+        """
+        raise NotImplementedError()
diff --git a/python/lsst/daf/butler/direct_butler/_direct_butler.py b/python/lsst/daf/butler/direct_butler/_direct_butler.py
index 71e22d045b..2465a026ba 100644
--- a/python/lsst/daf/butler/direct_butler/_direct_butler.py
+++ b/python/lsst/daf/butler/direct_butler/_direct_butler.py
@@ -1975,7 +1975,7 @@ def transfer_from(
                     if registry := getattr(source_butler, "registry", None):
                         run_doc = registry.getCollectionDocumentation(run)
                     if not dry_run:
-                        registered = self._registry.registerRun(run, doc=run_doc)
+                        registered = self.collections.register(run, doc=run_doc)
                     else:
                         registered = True
                     handled_collections.add(run)
diff --git a/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py b/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py
index 3705e2d3e8..a2804a0c47 100644
--- a/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py
+++ b/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py
@@ -108,3 +108,6 @@ def get_info(self, name: str, include_doc: bool = False, include_parents: bool =
         if include_parents:
             parents = self._registry.getCollectionParentChains(name)
         return CollectionInfo(name=name, type=record.type, doc=doc, parents=parents, children=children)
+
+    def register(self, name: str, type: CollectionType = CollectionType.RUN, doc: str | None = None) -> bool:
+        return self._registry.registerCollection(name, type, doc)
diff --git a/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py b/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py
index ae572e701c..13baf3a12d 100644
--- a/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py
+++ b/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py
@@ -95,3 +95,6 @@ def get_info(self, name: str, include_doc: bool = False, include_parents: bool =
         children = info.children or ()
         parents = info.parents or set()
         return CollectionInfo(name=name, type=info.type, doc=doc, parents=parents, children=children)
+
+    def register(self, name: str, type: CollectionType = CollectionType.RUN, doc: str | None = None) -> bool:
+        raise NotImplementedError("Not yet available.")
diff --git a/python/lsst/daf/butler/script/_associate.py b/python/lsst/daf/butler/script/_associate.py
index 74a4bb5baa..3e1f16e743 100644
--- a/python/lsst/daf/butler/script/_associate.py
+++ b/python/lsst/daf/butler/script/_associate.py
@@ -60,7 +60,7 @@ def associate(
     """
     butler = Butler.from_config(repo, writeable=True, without_datastore=True)
 
-    butler.registry.registerCollection(collection, CollectionType.TAGGED)
+    butler.collections.register(collection, CollectionType.TAGGED)
 
     results = QueryDatasets(
         butler=butler,
diff --git a/python/lsst/daf/butler/script/certifyCalibrations.py b/python/lsst/daf/butler/script/certifyCalibrations.py
index 77759badf7..0eb2dfbe69 100644
--- a/python/lsst/daf/butler/script/certifyCalibrations.py
+++ b/python/lsst/daf/butler/script/certifyCalibrations.py
@@ -87,5 +87,5 @@ def certifyCalibrations(
             raise RuntimeError(
                 f"No inputs found for dataset {dataset_type_name} in {input_collection}. {explanation}"
             )
-    butler.registry.registerCollection(output_collection, type=CollectionType.CALIBRATION)
+    butler.collections.register(output_collection, type=CollectionType.CALIBRATION)
     butler.registry.certify(output_collection, refs, timespan)
diff --git a/python/lsst/daf/butler/script/collectionChain.py b/python/lsst/daf/butler/script/collectionChain.py
index 03054a22bb..8ccc9271ac 100644
--- a/python/lsst/daf/butler/script/collectionChain.py
+++ b/python/lsst/daf/butler/script/collectionChain.py
@@ -85,7 +85,7 @@ def collectionChain(
         if mode in ("redefine", "extend", "prepend"):
             if not doc:
                 doc = None
-            butler.registry.registerCollection(parent, CollectionType.CHAINED, doc)
+            butler.collections.register(parent, CollectionType.CHAINED, doc)
         else:
             raise RuntimeError(
                 f"Mode '{mode}' requires that the collection exists "
diff --git a/python/lsst/daf/butler/tests/utils.py b/python/lsst/daf/butler/tests/utils.py
index 8efbe446e5..81a898f5a7 100644
--- a/python/lsst/daf/butler/tests/utils.py
+++ b/python/lsst/daf/butler/tests/utils.py
@@ -281,7 +281,7 @@ def _do_init(self, butler: Butler, butlerConfigFile: str) -> None:
 
         # New datasets will be added to run and tag, but we will only look in
         # tag when looking up datasets.
-        self.butler.registry.registerCollection(self._DEFAULT_TAG, CollectionType.TAGGED)
+        self.butler.collections.register(self._DEFAULT_TAG, CollectionType.TAGGED)
 
         # Create and register a DatasetType
         self.datasetType = addDatasetType(
@@ -351,7 +351,7 @@ def addDataset(
             A reference to the added dataset.
         """
         if run:
-            self.butler.registry.registerCollection(run, type=CollectionType.RUN)
+            self.butler.collections.register(run)
         else:
             run = self._DEFAULT_RUN
         metric = self._makeExampleMetrics()

From 8c7236b160000d03f594bb82b7d77f3e02fc65ed Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Thu, 15 Aug 2024 14:21:37 -0700
Subject: [PATCH 08/26] Change collections.query to collections.x_query for
 "experimental"

---
 python/lsst/daf/butler/_butler_collections.py               | 2 +-
 .../daf/butler/direct_butler/_direct_butler_collections.py  | 2 +-
 .../daf/butler/remote_butler/_remote_butler_collections.py  | 2 +-
 python/lsst/daf/butler/script/collectionChain.py            | 2 +-
 python/lsst/daf/butler/script/exportCalibs.py               | 2 +-
 python/lsst/daf/butler/script/queryCollections.py           | 6 +++---
 python/lsst/daf/butler/script/queryDataIds.py               | 2 +-
 python/lsst/daf/butler/script/queryDatasets.py              | 2 +-
 python/lsst/daf/butler/script/queryDimensionRecords.py      | 2 +-
 python/lsst/daf/butler/script/removeCollections.py          | 2 +-
 python/lsst/daf/butler/script/removeRuns.py                 | 2 +-
 python/lsst/daf/butler/script/retrieveArtifacts.py          | 2 +-
 python/lsst/daf/butler/script/transferDatasets.py           | 2 +-
 python/lsst/daf/butler/transfers/_context.py                | 2 +-
 14 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/python/lsst/daf/butler/_butler_collections.py b/python/lsst/daf/butler/_butler_collections.py
index 68c91aa919..814c4635a8 100644
--- a/python/lsst/daf/butler/_butler_collections.py
+++ b/python/lsst/daf/butler/_butler_collections.py
@@ -211,7 +211,7 @@ def remove_from_chain(
         raise NotImplementedError()
 
     @abstractmethod
-    def query(
+    def x_query(
         self,
         expression: str | Iterable[str],
         collection_types: Set[CollectionType] | CollectionType | None = None,
diff --git a/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py b/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py
index a2804a0c47..9ce20a3efa 100644
--- a/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py
+++ b/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py
@@ -79,7 +79,7 @@ def remove_from_chain(
             parent_collection_name, list(ensure_iterable(child_collection_names))
         )
 
-    def query(
+    def x_query(
         self,
         expression: str | Iterable[str],
         collection_types: Set[CollectionType] | CollectionType | None = None,
diff --git a/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py b/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py
index 13baf3a12d..4e4d195e62 100644
--- a/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py
+++ b/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py
@@ -71,7 +71,7 @@ def remove_from_chain(
     ) -> None:
         raise NotImplementedError("Not yet available")
 
-    def query(
+    def x_query(
         self,
         expression: str | Iterable[str],
         collection_types: Set[CollectionType] | CollectionType | None = None,
diff --git a/python/lsst/daf/butler/script/collectionChain.py b/python/lsst/daf/butler/script/collectionChain.py
index 8ccc9271ac..463fc35aa1 100644
--- a/python/lsst/daf/butler/script/collectionChain.py
+++ b/python/lsst/daf/butler/script/collectionChain.py
@@ -95,7 +95,7 @@ def collectionChain(
     if flatten:
         if mode not in ("redefine", "prepend", "extend"):
             raise RuntimeError(f"'flatten' flag is not allowed for {mode}")
-        children = butler.collections.query(children, flatten_chains=True)
+        children = butler.collections.x_query(children, flatten_chains=True)
 
     _modify_collection_chain(butler, mode, parent, children)
 
diff --git a/python/lsst/daf/butler/script/exportCalibs.py b/python/lsst/daf/butler/script/exportCalibs.py
index 6d2a9e8233..cddd0f3cfd 100644
--- a/python/lsst/daf/butler/script/exportCalibs.py
+++ b/python/lsst/daf/butler/script/exportCalibs.py
@@ -133,7 +133,7 @@ def exportCalibs(
     collectionsToExport = []
     datasetsToExport = []
 
-    for collection in butler.collections.query(
+    for collection in butler.collections.x_query(
         collections_query,
         flatten_chains=True,
         include_chains=True,
diff --git a/python/lsst/daf/butler/script/queryCollections.py b/python/lsst/daf/butler/script/queryCollections.py
index 82d2b6f1ce..43cee5d2ab 100644
--- a/python/lsst/daf/butler/script/queryCollections.py
+++ b/python/lsst/daf/butler/script/queryCollections.py
@@ -71,7 +71,7 @@ def _getTable(
         dtype=(str, str, str),
     )
     butler = Butler.from_config(repo)
-    names = sorted(butler.collections.query(glob or "*", collection_types=frozenset(collection_type)))
+    names = sorted(butler.collections.x_query(glob or "*", collection_types=frozenset(collection_type)))
     if inverse:
         for name in names:
             info = butler.collections.get_info(name, include_parents=True)
@@ -153,7 +153,7 @@ def addCollection(name: str, level: int = 0) -> None:
                 for name in info.children:
                     addCollection(name, level + 1)
 
-    collections = butler.collections.query(glob or "*", collection_types=frozenset(collection_type))
+    collections = butler.collections.x_query(glob or "*", collection_types=frozenset(collection_type))
     for collection in sorted(collections):
         addCollection(collection)
     return table
@@ -166,7 +166,7 @@ def _getFlatten(
 ) -> Table:
     butler = Butler.from_config(repo)
     collectionNames = list(
-        butler.collections.query(
+        butler.collections.x_query(
             glob or "*", collection_types=frozenset(collection_type), flatten_chains=True
         )
     )
diff --git a/python/lsst/daf/butler/script/queryDataIds.py b/python/lsst/daf/butler/script/queryDataIds.py
index dd75eb4824..a70aba1887 100644
--- a/python/lsst/daf/butler/script/queryDataIds.py
+++ b/python/lsst/daf/butler/script/queryDataIds.py
@@ -170,7 +170,7 @@ def queryDataIds(
             # Need to constrain results based on dataset type and collection.
             query_collections = collections or "*"
 
-            expanded_collections = butler.collections.query(query_collections)
+            expanded_collections = butler.collections.x_query(query_collections)
 
             sub_query = query.join_dataset_search(dataset_types.pop(0), collections=expanded_collections)
             for dt in dataset_types:
diff --git a/python/lsst/daf/butler/script/queryDatasets.py b/python/lsst/daf/butler/script/queryDatasets.py
index 325d2bb8e4..c95e56fad9 100644
--- a/python/lsst/daf/butler/script/queryDatasets.py
+++ b/python/lsst/daf/butler/script/queryDatasets.py
@@ -220,7 +220,7 @@ def getDatasets(self) -> Iterator[DatasetRef]:
         # query each time.
         dataset_types = self.butler.registry.queryDatasetTypes(datasetTypes)
         with self.butler._query() as query:
-            query_collections = self.butler.collections.query(query_collections)
+            query_collections = self.butler.collections.x_query(query_collections)
             # Accumulate over dataset types.
             for dt in dataset_types:
                 results = query.datasets(dt, collections=query_collections, find_first=self._find_first)
diff --git a/python/lsst/daf/butler/script/queryDimensionRecords.py b/python/lsst/daf/butler/script/queryDimensionRecords.py
index 074efac915..11470e8f17 100644
--- a/python/lsst/daf/butler/script/queryDimensionRecords.py
+++ b/python/lsst/daf/butler/script/queryDimensionRecords.py
@@ -80,7 +80,7 @@ def queryDimensionRecords(
 
         if datasets:
             query_collections = collections or "*"
-            expanded_collections = butler.collections.query(query_collections)
+            expanded_collections = butler.collections.x_query(query_collections)
             dataset_types = list(butler.registry.queryDatasetTypes(datasets))
 
             sub_query = query.join_dataset_search(dataset_types.pop(0), collections=expanded_collections)
diff --git a/python/lsst/daf/butler/script/removeCollections.py b/python/lsst/daf/butler/script/removeCollections.py
index df3f41b106..d6962791aa 100644
--- a/python/lsst/daf/butler/script/removeCollections.py
+++ b/python/lsst/daf/butler/script/removeCollections.py
@@ -85,7 +85,7 @@ def _getCollectionInfo(
     """
     butler = Butler.from_config(repo, without_datastore=True)
     try:
-        names = sorted(butler.collections.query(collection, include_chains=True))
+        names = sorted(butler.collections.x_query(collection, include_chains=True))
     except MissingCollectionError:
         # Hide the error and act like no collections should be removed.
         names = []
diff --git a/python/lsst/daf/butler/script/removeRuns.py b/python/lsst/daf/butler/script/removeRuns.py
index 50ad5b9b2b..9daea8b55e 100644
--- a/python/lsst/daf/butler/script/removeRuns.py
+++ b/python/lsst/daf/butler/script/removeRuns.py
@@ -87,7 +87,7 @@ def _getCollectionInfo(
     """
     butler = Butler.from_config(repo)
     try:
-        collectionNames = butler.collections.query(collection, CollectionType.RUN, include_chains=False)
+        collectionNames = butler.collections.x_query(collection, CollectionType.RUN, include_chains=False)
     except MissingCollectionError:
         # Act as if no collections matched.
         collectionNames = []
diff --git a/python/lsst/daf/butler/script/retrieveArtifacts.py b/python/lsst/daf/butler/script/retrieveArtifacts.py
index ca25999344..f9abc50669 100644
--- a/python/lsst/daf/butler/script/retrieveArtifacts.py
+++ b/python/lsst/daf/butler/script/retrieveArtifacts.py
@@ -93,7 +93,7 @@ def retrieveArtifacts(
     dataset_types = butler.registry.queryDatasetTypes(query_types)
     refs: list[DatasetRef] = []
     with butler._query() as query:
-        expanded_collections = butler.collections.query(query_collections)
+        expanded_collections = butler.collections.x_query(query_collections)
         for dt in dataset_types:
             results = query.datasets(dt, collections=expanded_collections, find_first=find_first)
             if where:
diff --git a/python/lsst/daf/butler/script/transferDatasets.py b/python/lsst/daf/butler/script/transferDatasets.py
index e5e390bc88..5060065bad 100644
--- a/python/lsst/daf/butler/script/transferDatasets.py
+++ b/python/lsst/daf/butler/script/transferDatasets.py
@@ -83,7 +83,7 @@ def transferDatasets(
     dataset_types = source_butler.registry.queryDatasetTypes(dataset_type_expr)
     source_refs: list[DatasetRef] = []
     with source_butler._query() as query:
-        query_collections = source_butler.collections.query(collections_expr)
+        query_collections = source_butler.collections.x_query(collections_expr)
         # Loop over dataset types and accumulate.
         for dt in dataset_types:
             results = query.datasets(dt, collections=query_collections, find_first=find_first)
diff --git a/python/lsst/daf/butler/transfers/_context.py b/python/lsst/daf/butler/transfers/_context.py
index 0c293c830d..af1a80d551 100644
--- a/python/lsst/daf/butler/transfers/_context.py
+++ b/python/lsst/daf/butler/transfers/_context.py
@@ -358,7 +358,7 @@ def _computeDatasetAssociations(self) -> dict[str, list[DatasetAssociation]]:
             collectionTypes = {CollectionType.TAGGED}
             if datasetType.isCalibration():
                 collectionTypes.add(CollectionType.CALIBRATION)
-            resolved_collections = self._butler.collections.query(
+            resolved_collections = self._butler.collections.x_query(
                 self._collections.keys(),
                 collection_types=collectionTypes,
                 flatten_chains=False,

From 5c500572165ea195c924dc605b0eeab5561b4d5c Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Thu, 15 Aug 2024 14:40:25 -0700
Subject: [PATCH 09/26] Add butler.collections.x_remove

---
 python/lsst/daf/butler/_butler_collections.py | 33 +++++++++++++++++++
 .../_direct_butler_collections.py             |  3 ++
 .../_remote_butler_collections.py             |  3 ++
 .../daf/butler/script/removeCollections.py    |  2 +-
 4 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/python/lsst/daf/butler/_butler_collections.py b/python/lsst/daf/butler/_butler_collections.py
index 814c4635a8..969c05f55d 100644
--- a/python/lsst/daf/butler/_butler_collections.py
+++ b/python/lsst/daf/butler/_butler_collections.py
@@ -297,3 +297,36 @@ def register(self, name: str, type: CollectionType = CollectionType.RUN, doc: st
         able to perform its own transaction to be concurrent
         """
         raise NotImplementedError()
+
+    @abstractmethod
+    def x_remove(self, name: str) -> None:
+        """Remove the given collection from the registry.
+
+        Parameters
+        ----------
+        name : `str`
+            The name of the collection to remove.
+
+        Raises
+        ------
+        lsst.daf.butler.registry.MissingCollectionError
+            Raised if no collection with the given name exists.
+        sqlalchemy.exc.IntegrityError
+            Raised if the database rows associated with the collection are
+            still referenced by some other table, such as a dataset in a
+            datastore (for `~CollectionType.RUN` collections only) or a
+            `~CollectionType.CHAINED` collection of which this collection is
+            a child.
+
+        Notes
+        -----
+        If this is a `~CollectionType.RUN` collection, all datasets and quanta
+        in it will removed from the `Registry` database.  This requires that
+        those datasets be removed (or at least trashed) from any datastores
+        that hold them first.
+
+        A collection may not be deleted as long as it is referenced by a
+        `~CollectionType.CHAINED` collection; the ``CHAINED`` collection must
+        be deleted or redefined first.
+        """
+        raise NotImplementedError()
diff --git a/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py b/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py
index 9ce20a3efa..ab4b0665bc 100644
--- a/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py
+++ b/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py
@@ -111,3 +111,6 @@ def get_info(self, name: str, include_doc: bool = False, include_parents: bool =
 
     def register(self, name: str, type: CollectionType = CollectionType.RUN, doc: str | None = None) -> bool:
         return self._registry.registerCollection(name, type, doc)
+
+    def x_remove(self, name: str) -> None:
+        self._registry.removeCollection(name)
diff --git a/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py b/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py
index 4e4d195e62..3d052b9aa7 100644
--- a/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py
+++ b/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py
@@ -98,3 +98,6 @@ def get_info(self, name: str, include_doc: bool = False, include_parents: bool =
 
     def register(self, name: str, type: CollectionType = CollectionType.RUN, doc: str | None = None) -> bool:
         raise NotImplementedError("Not yet available.")
+
+    def x_remove(self, name: str) -> None:
+        raise NotImplementedError("Not yet available.")
diff --git a/python/lsst/daf/butler/script/removeCollections.py b/python/lsst/daf/butler/script/removeCollections.py
index d6962791aa..1ae062cac1 100644
--- a/python/lsst/daf/butler/script/removeCollections.py
+++ b/python/lsst/daf/butler/script/removeCollections.py
@@ -126,7 +126,7 @@ def _doRemove(collections: Table) -> None:
         """Perform the prune collection step."""
         butler = Butler.from_config(repo, writeable=True, without_datastore=True)
         for name in collections["Collection"]:
-            butler.registry.removeCollection(name)
+            butler.collections.x_remove(name)
 
     result = RemoveCollectionResult(
         onConfirmation=partial(_doRemove, collectionInfo.nonRunCollections),

From 970f5890d4174ceb9b3bb4458bca649a6e36bc4f Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Thu, 15 Aug 2024 16:00:37 -0700
Subject: [PATCH 10/26] Add hybrid butler collections class for hybrid butler

---
 .../lsst/daf/butler/script/collectionChain.py |  10 +-
 python/lsst/daf/butler/script/removeRuns.py   |   2 +-
 .../lsst/daf/butler/tests/butler_queries.py   |   6 +-
 python/lsst/daf/butler/tests/hybrid_butler.py |   5 +-
 .../butler/tests/hybrid_butler_collections.py | 103 ++++++++++++++++++
 tests/test_butler.py                          |  38 +++----
 6 files changed, 134 insertions(+), 30 deletions(-)
 create mode 100644 python/lsst/daf/butler/tests/hybrid_butler_collections.py

diff --git a/python/lsst/daf/butler/script/collectionChain.py b/python/lsst/daf/butler/script/collectionChain.py
index 463fc35aa1..1857507f57 100644
--- a/python/lsst/daf/butler/script/collectionChain.py
+++ b/python/lsst/daf/butler/script/collectionChain.py
@@ -104,16 +104,16 @@ def collectionChain(
 
 def _modify_collection_chain(butler: Butler, mode: str, parent: str, children: Iterable[str]) -> None:
     if mode == "prepend":
-        butler.collection_chains.prepend_chain(parent, children)
+        butler.collections.prepend_chain(parent, children)
     elif mode == "redefine":
-        butler.collection_chains.redefine_chain(parent, children)
+        butler.collections.redefine_chain(parent, children)
     elif mode == "remove":
-        butler.collection_chains.remove_from_chain(parent, children)
+        butler.collections.remove_from_chain(parent, children)
     elif mode == "pop":
         children_to_pop = _find_children_to_pop(butler, parent, children)
-        butler.collection_chains.remove_from_chain(parent, children_to_pop)
+        butler.collections.remove_from_chain(parent, children_to_pop)
     elif mode == "extend":
-        butler.collection_chains.extend_chain(parent, children)
+        butler.collections.extend_chain(parent, children)
     else:
         raise ValueError(f"Unrecognized update mode: '{mode}'")
 
diff --git a/python/lsst/daf/butler/script/removeRuns.py b/python/lsst/daf/butler/script/removeRuns.py
index 9daea8b55e..bf17c2f1d5 100644
--- a/python/lsst/daf/butler/script/removeRuns.py
+++ b/python/lsst/daf/butler/script/removeRuns.py
@@ -134,7 +134,7 @@ def _doRemove(runs: Sequence[RemoveRun]) -> None:
         with butler.transaction():
             for run in runs:
                 for parent in run.parents:
-                    butler.collection_chains.remove_from_chain(parent, run.name)
+                    butler.collections.remove_from_chain(parent, run.name)
             butler.removeRuns([r.name for r in runs], unstore=True)
 
     result = RemoveRunsResult(
diff --git a/python/lsst/daf/butler/tests/butler_queries.py b/python/lsst/daf/butler/tests/butler_queries.py
index b641bafb84..f39cd89830 100644
--- a/python/lsst/daf/butler/tests/butler_queries.py
+++ b/python/lsst/daf/butler/tests/butler_queries.py
@@ -434,9 +434,9 @@ def test_dataset_constrained_record_query(self) -> None:
         """
         butler = self.make_butler("base.yaml", "datasets.yaml")
         butler.registry.insertDimensionData("instrument", {"name": "Cam2"})
-        butler.registry.registerCollection("empty", CollectionType.RUN)
-        butler.registry.registerCollection("chain", CollectionType.CHAINED)
-        butler.registry.setCollectionChain("chain", ["imported_g", "empty", "imported_r"])
+        butler.collections.register("empty", CollectionType.RUN)
+        butler.collections.register("chain", CollectionType.CHAINED)
+        butler.collections.redefine_chain("chain", ["imported_g", "empty", "imported_r"])
         with butler._query() as query:
             # No collections here or in defaults is an error.
             with self.assertRaises(NoDefaultCollectionError):
diff --git a/python/lsst/daf/butler/tests/hybrid_butler.py b/python/lsst/daf/butler/tests/hybrid_butler.py
index 9c66f1af1b..86c9c05c74 100644
--- a/python/lsst/daf/butler/tests/hybrid_butler.py
+++ b/python/lsst/daf/butler/tests/hybrid_butler.py
@@ -57,6 +57,7 @@
 from ..registry import Registry
 from ..remote_butler import RemoteButler
 from ..transfers import RepoExportContext
+from .hybrid_butler_collections import HybridButlerCollections
 from .hybrid_butler_registry import HybridButlerRegistry
 
 
@@ -454,8 +455,8 @@ def _extract_all_dimension_records_from_data_ids(
 
     @property
     def collection_chains(self) -> ButlerCollections:
-        return self._direct_butler.collection_chains
+        return HybridButlerCollections(self)
 
     @property
     def collections(self) -> ButlerCollections:
-        return self._remote_butler.collection_chains
+        return HybridButlerCollections(self)
diff --git a/python/lsst/daf/butler/tests/hybrid_butler_collections.py b/python/lsst/daf/butler/tests/hybrid_butler_collections.py
new file mode 100644
index 0000000000..b89fd30f10
--- /dev/null
+++ b/python/lsst/daf/butler/tests/hybrid_butler_collections.py
@@ -0,0 +1,103 @@
+# This file is part of daf_butler.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (http://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This software is dual licensed under the GNU General Public License and also
+# under a 3-clause BSD license. Recipients may choose which of these licenses
+# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
+# respectively.  If you choose the GPL option then the following text applies
+# (but note that there is still no warranty even if you opt for BSD instead):
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+from __future__ import annotations
+
+__all__ = ("HybridButlerCollections",)
+
+from collections.abc import Iterable, Sequence, Set
+from typing import TYPE_CHECKING
+
+from .._butler_collections import ButlerCollections, CollectionInfo
+from .._collection_type import CollectionType
+
+if TYPE_CHECKING:
+    from .hybrid_butler import HybridButler
+
+
+class HybridButlerCollections(ButlerCollections):
+    """Implementation of ButlerCollections for HybridButler.
+
+    Parameters
+    ----------
+    butler : `~lsst.daf.butler.tests.hybrid_butler.HybridButler`
+        Hybrid butler to use.
+    """
+
+    def __init__(self, butler: HybridButler):
+        self._hybrid = butler
+
+    @property
+    def defaults(self) -> Sequence[str]:
+        return self._hybrid._remote_butler.collections.defaults
+
+    def extend_chain(self, parent_collection_name: str, child_collection_names: str | Iterable[str]) -> None:
+        return self._hybrid._direct_butler.collections.extend_chain(
+            parent_collection_name, child_collection_names
+        )
+
+    def prepend_chain(self, parent_collection_name: str, child_collection_names: str | Iterable[str]) -> None:
+        return self._hybrid._direct_butler.collections.prepend_chain(
+            parent_collection_name, child_collection_names
+        )
+
+    def redefine_chain(
+        self, parent_collection_name: str, child_collection_names: str | Iterable[str]
+    ) -> None:
+        self._hybrid._direct_butler.collections.redefine_chain(parent_collection_name, child_collection_names)
+
+    def remove_from_chain(
+        self, parent_collection_name: str, child_collection_names: str | Iterable[str]
+    ) -> None:
+        return self._hybrid._direct_butler.collections.remove_from_chain(
+            parent_collection_name, child_collection_names
+        )
+
+    def x_query(
+        self,
+        expression: str | Iterable[str],
+        collection_types: Set[CollectionType] | CollectionType | None = None,
+        flatten_chains: bool = False,
+        include_chains: bool | None = None,
+    ) -> Sequence[str]:
+        return self._hybrid._remote_butler.collections.x_query(
+            expression,
+            collection_types=collection_types,
+            flatten_chains=flatten_chains,
+            include_chains=include_chains,
+        )
+
+    def get_info(self, name: str, include_doc: bool = False, include_parents: bool = False) -> CollectionInfo:
+        return self._hybrid._remote_butler.collections.get_info(
+            name, include_doc=include_doc, include_parents=include_parents
+        )
+
+    def register(self, name: str, type: CollectionType = CollectionType.RUN, doc: str | None = None) -> bool:
+        return self._hybrid._direct_butler.collections.register(name, type=type, doc=doc)
+
+    def x_remove(self, name: str) -> None:
+        self._hybrid._direct_butler.collections.x_remove(name)
diff --git a/tests/test_butler.py b/tests/test_butler.py
index f49173dcd3..d395d24f2a 100644
--- a/tests/test_butler.py
+++ b/tests/test_butler.py
@@ -1390,85 +1390,85 @@ def testGetDatasetCollectionCaching(self):
     def testCollectionChainRedefine(self):
         butler = self._setup_to_test_collection_chain()
 
-        butler.collection_chains.redefine_chain("chain", "a")
+        butler.collections.redefine_chain("chain", "a")
         self._check_chain(butler, ["a"])
 
         # Duplicates are removed from the list of children
-        butler.collection_chains.redefine_chain("chain", ["c", "b", "c"])
+        butler.collections.redefine_chain("chain", ["c", "b", "c"])
         self._check_chain(butler, ["c", "b"])
 
         # Empty list clears the chain
-        butler.collection_chains.redefine_chain("chain", [])
+        butler.collections.redefine_chain("chain", [])
         self._check_chain(butler, [])
 
-        self._test_common_chain_functionality(butler, butler.collection_chains.redefine_chain)
+        self._test_common_chain_functionality(butler, butler.collections.redefine_chain)
 
     def testCollectionChainPrepend(self):
         butler = self._setup_to_test_collection_chain()
 
         # Duplicates are removed from the list of children
-        butler.collection_chains.prepend_chain("chain", ["c", "b", "c"])
+        butler.collections.prepend_chain("chain", ["c", "b", "c"])
         self._check_chain(butler, ["c", "b"])
 
         # Prepend goes on the front of existing chain
-        butler.collection_chains.prepend_chain("chain", ["a"])
+        butler.collections.prepend_chain("chain", ["a"])
         self._check_chain(butler, ["a", "c", "b"])
 
         # Empty prepend does nothing
-        butler.collection_chains.prepend_chain("chain", [])
+        butler.collections.prepend_chain("chain", [])
         self._check_chain(butler, ["a", "c", "b"])
 
         # Prepending children that already exist in the chain removes them from
         # their current position.
-        butler.collection_chains.prepend_chain("chain", ["d", "b", "c"])
+        butler.collections.prepend_chain("chain", ["d", "b", "c"])
         self._check_chain(butler, ["d", "b", "c", "a"])
 
-        self._test_common_chain_functionality(butler, butler.collection_chains.prepend_chain)
+        self._test_common_chain_functionality(butler, butler.collections.prepend_chain)
 
     def testCollectionChainExtend(self):
         butler = self._setup_to_test_collection_chain()
 
         # Duplicates are removed from the list of children
-        butler.collection_chains.extend_chain("chain", ["c", "b", "c"])
+        butler.collections.extend_chain("chain", ["c", "b", "c"])
         self._check_chain(butler, ["c", "b"])
 
         # Extend goes on the end of existing chain
-        butler.collection_chains.extend_chain("chain", ["a"])
+        butler.collections.extend_chain("chain", ["a"])
         self._check_chain(butler, ["c", "b", "a"])
 
         # Empty extend does nothing
-        butler.collection_chains.extend_chain("chain", [])
+        butler.collections.extend_chain("chain", [])
         self._check_chain(butler, ["c", "b", "a"])
 
         # Extending children that already exist in the chain removes them from
         # their current position.
-        butler.collection_chains.extend_chain("chain", ["d", "b", "c"])
+        butler.collections.extend_chain("chain", ["d", "b", "c"])
         self._check_chain(butler, ["a", "d", "b", "c"])
 
-        self._test_common_chain_functionality(butler, butler.collection_chains.extend_chain)
+        self._test_common_chain_functionality(butler, butler.collections.extend_chain)
 
     def testCollectionChainRemove(self) -> None:
         butler = self._setup_to_test_collection_chain()
 
         butler.registry.setCollectionChain("chain", ["a", "b", "c", "d"])
 
-        butler.collection_chains.remove_from_chain("chain", "c")
+        butler.collections.remove_from_chain("chain", "c")
         self._check_chain(butler, ["a", "b", "d"])
 
         # Duplicates are allowed in the list of children
-        butler.collection_chains.remove_from_chain("chain", ["b", "b", "a"])
+        butler.collections.remove_from_chain("chain", ["b", "b", "a"])
         self._check_chain(butler, ["d"])
 
         # Empty remove does nothing
-        butler.collection_chains.remove_from_chain("chain", [])
+        butler.collections.remove_from_chain("chain", [])
         self._check_chain(butler, ["d"])
 
         # Removing children that aren't in the chain does nothing
-        butler.collection_chains.remove_from_chain("chain", ["a", "chain"])
+        butler.collections.remove_from_chain("chain", ["a", "chain"])
         self._check_chain(butler, ["d"])
 
         self._test_common_chain_functionality(
-            butler, butler.collection_chains.remove_from_chain, skip_cycle_check=True
+            butler, butler.collections.remove_from_chain, skip_cycle_check=True
         )
 
     def _setup_to_test_collection_chain(self) -> Butler:

From ebbc097db35bac93d03d199b8bc5762f26dca352 Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Thu, 15 Aug 2024 16:28:53 -0700
Subject: [PATCH 11/26] Use new collections API in test_butler.py

---
 tests/test_butler.py | 54 ++++++++++++++++++++++----------------------
 1 file changed, 27 insertions(+), 27 deletions(-)

diff --git a/tests/test_butler.py b/tests/test_butler.py
index d395d24f2a..f29a3148ae 100644
--- a/tests/test_butler.py
+++ b/tests/test_butler.py
@@ -225,7 +225,7 @@ def create_butler(
         """
         butler = self.create_empty_butler(run=run)
 
-        collections = set(butler.registry.queryCollections())
+        collections = set(butler.collections.x_query("*"))
         self.assertEqual(collections, {run})
         # Create and register a DatasetType
         dimensions = butler.dimensions.conform(["instrument", "visit"])
@@ -301,7 +301,7 @@ def runPutGetTest(self, storageClass: StorageClass, datasetTypeName: str) -> But
             # this by using a distinct run collection each time
             counter += 1
             this_run = f"put_run_{counter}"
-            butler.registry.registerCollection(this_run, type=CollectionType.RUN)
+            butler.collections.register(this_run)
             expected_collections.update({this_run})
 
             with self.subTest(args=args):
@@ -418,7 +418,7 @@ def runPutGetTest(self, storageClass: StorageClass, datasetTypeName: str) -> But
 
                 # Do explicit registry removal since we know they are
                 # empty
-                butler.registry.removeCollection(this_run)
+                butler.collections.x_remove(this_run)
                 expected_collections.remove(this_run)
 
         # Create DatasetRef for put using default run.
@@ -502,7 +502,7 @@ def runPutGetTest(self, storageClass: StorageClass, datasetTypeName: str) -> But
             butler.get(ref, parameters={"unsupported": True})
 
         # Check we have a collection
-        collections = set(butler.registry.queryCollections())
+        collections = set(butler.collections.x_query("*"))
         self.assertEqual(collections, expected_collections)
 
         # Clean up to check that we can remove something that may have
@@ -575,9 +575,9 @@ def testDeferredCollectionPassing(self) -> None:
         metric = makeExampleMetrics()
         # Register a new run and put dataset.
         run = "deferred"
-        self.assertTrue(butler.registry.registerRun(run))
+        self.assertTrue(butler.collections.register(run))
         # Second time it will be allowed but indicate no-op
-        self.assertFalse(butler.registry.registerRun(run))
+        self.assertFalse(butler.collections.register(run))
         ref = butler.put(metric, datasetType, dataId, run=run)
         # Putting with no run should fail with TypeError.
         with self.assertRaises(CollectionError):
@@ -594,7 +594,7 @@ def testDeferredCollectionPassing(self) -> None:
         with self.assertRaises(CollectionError):
             butler.get(datasetType, dataId)
         # Associate the dataset with a different collection.
-        butler.registry.registerCollection("tagged")
+        butler.collections.register("tagged", type=CollectionType.TAGGED)
         butler.registry.associate("tagged", [ref])
         # Deleting the dataset from the new collection should make it findable
         # in the original collection.
@@ -642,7 +642,7 @@ def testConstructor(self) -> None:
             butler = Butler.from_config(ResourcePath(config_dir, forceDirectory=True), run=self.default_run)
             self.assertIsInstance(butler, Butler)
 
-        collections = set(butler.registry.queryCollections())
+        collections = set(butler.collections.x_query("*"))
         self.assertEqual(collections, {self.default_run})
 
         # Check that some special characters can be included in run name.
@@ -1450,7 +1450,7 @@ def testCollectionChainExtend(self):
     def testCollectionChainRemove(self) -> None:
         butler = self._setup_to_test_collection_chain()
 
-        butler.registry.setCollectionChain("chain", ["a", "b", "c", "d"])
+        butler.collections.redefine_chain("chain", ["a", "b", "c", "d"])
 
         butler.collections.remove_from_chain("chain", "c")
         self._check_chain(butler, ["a", "b", "d"])
@@ -1474,19 +1474,19 @@ def testCollectionChainRemove(self) -> None:
     def _setup_to_test_collection_chain(self) -> Butler:
         butler = self.create_empty_butler(writeable=True)
 
-        butler.registry.registerCollection("chain", CollectionType.CHAINED)
+        butler.collections.register("chain", CollectionType.CHAINED)
 
         runs = ["a", "b", "c", "d"]
         for run in runs:
-            butler.registry.registerCollection(run)
+            butler.collections.register(run)
 
-        butler.registry.registerCollection("staticchain", CollectionType.CHAINED)
-        butler.registry.setCollectionChain("staticchain", ["a", "b"])
+        butler.collections.register("staticchain", CollectionType.CHAINED)
+        butler.collections.redefine_chain("staticchain", ["a", "b"])
 
         return butler
 
     def _check_chain(self, butler: Butler, expected: list[str]) -> None:
-        children = butler.registry.getCollectionChain("chain")
+        children = butler.collections.get_info("chain").children
         self.assertEqual(expected, list(children))
 
     def _test_common_chain_functionality(
@@ -1504,14 +1504,14 @@ def _test_common_chain_functionality(
 
         # Prevent collection cycles
         if not skip_cycle_check:
-            butler.registry.registerCollection("chain2", CollectionType.CHAINED)
+            butler.collections.register("chain2", CollectionType.CHAINED)
             func("chain2", "chain")
             with self.assertRaises(CollectionCycleError):
                 func("chain", "chain2")
 
         # Make sure none of the earlier operations interfered with unrelated
         # chains.
-        self.assertEqual(["a", "b"], list(butler.registry.getCollectionChain("staticchain")))
+        self.assertEqual(["a", "b"], list(butler.collections.get_info("staticchain").children))
 
         with butler._caching_context():
             with self.assertRaisesRegex(RuntimeError, "Chained collection modification not permitted"):
@@ -1706,9 +1706,9 @@ def testRemoveRuns(self) -> None:
         butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
         # Add some RUN-type collection.
         run1 = "run1"
-        butler.registry.registerRun(run1)
+        butler.collections.register(run1)
         run2 = "run2"
-        butler.registry.registerRun(run2)
+        butler.collections.register(run2)
         # put a dataset in each
         metric = makeExampleMetrics()
         dimensions = butler.dimensions.conform(["instrument", "physical_filter"])
@@ -1729,9 +1729,9 @@ def testRemoveRuns(self) -> None:
         # Should be nothing in registry for either one, and datastore should
         # not think either exists.
         with self.assertRaises(MissingCollectionError):
-            butler.registry.getCollectionType(run1)
+            butler.collections.get_info(run1)
         with self.assertRaises(MissingCollectionError):
-            butler.registry.getCollectionType(run2)
+            butler.collections.get_info(run1)
         self.assertFalse(butler.stored(ref1))
         self.assertFalse(butler.stored(ref2))
         # The ref we unstored should be gone according to the URI, but the
@@ -1767,9 +1767,9 @@ def testPruneDatasets(self) -> None:
         butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
         # Add some RUN-type collections.
         run1 = "run1"
-        butler.registry.registerRun(run1)
+        butler.collections.register(run1)
         run2 = "run2"
-        butler.registry.registerRun(run2)
+        butler.collections.register(run2)
         # put some datasets.  ref1 and ref2 have the same data ID, and are in
         # different runs.  ref3 has a different data ID.
         metric = makeExampleMetrics()
@@ -2464,7 +2464,7 @@ def _absolute_transfer(self, transfer: str) -> None:
         storageClass = self.storageClassFactory.getStorageClass(storageClassName)
         datasetTypeName = "random_data"
         run = "run1"
-        self.source_butler.registry.registerCollection(run, CollectionType.RUN)
+        self.source_butler.collections.register(run)
 
         dimensions = self.source_butler.dimensions.conform(())
         datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
@@ -2513,7 +2513,7 @@ def assertButlerTransfers(
 
         # Create the run collections in the source butler.
         for run in runs:
-            self.source_butler.registry.registerCollection(run, CollectionType.RUN)
+            self.source_butler.collections.register(run)
 
         # Create dimensions in source butler.
         n_exposures = 30
@@ -2746,7 +2746,7 @@ def assertButlerTransfers(
         # Now prune run2 collection and create instead a CHAINED collection.
         # This should block the transfer.
         self.target_butler.removeRuns(["run2"], unstore=True)
-        self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED)
+        self.target_butler.collections.register("run2", CollectionType.CHAINED)
         with self.assertRaises(CollectionTypeError):
             # Re-importing the run1 datasets can be problematic if they
             # use integer IDs so filter those out.
@@ -2797,8 +2797,8 @@ def test_fallback(self) -> None:
         self.assertIsInstance(butler._datastore, NullDatastore)
 
         # Check that registry is working.
-        butler.registry.registerRun("MYRUN")
-        collections = butler.registry.queryCollections(...)
+        butler.collections.register("MYRUN")
+        collections = butler.collections.x_query("*")
         self.assertIn("MYRUN", set(collections))
 
         # Create a ref.

From 9027b0aa096cdf8f391f9086fb410ca024c532cd Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Fri, 16 Aug 2024 09:12:17 -0700
Subject: [PATCH 12/26] Remove __eq__ from ButlerCollections and fix test

---
 python/lsst/daf/butler/_butler_collections.py | 8 +-------
 tests/test_butler.py                          | 2 +-
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/python/lsst/daf/butler/_butler_collections.py b/python/lsst/daf/butler/_butler_collections.py
index 969c05f55d..418a465ed5 100644
--- a/python/lsst/daf/butler/_butler_collections.py
+++ b/python/lsst/daf/butler/_butler_collections.py
@@ -31,7 +31,7 @@
 
 from abc import ABC, abstractmethod
 from collections.abc import Iterable, Sequence, Set
-from typing import Any, overload
+from typing import overload
 
 from pydantic import BaseModel
 
@@ -68,12 +68,6 @@ def __getitem__(self, index: int | slice) -> str | Sequence[str]:
     def __len__(self) -> int:
         return len(self.defaults)
 
-    def __eq__(self, other: Any) -> bool:
-        # Do not try to compare registry instances.
-        if not isinstance(other, type(self)):
-            return False
-        return self.defaults == other.defaults
-
     @property
     @abstractmethod
     def defaults(self) -> Sequence[str]:
diff --git a/tests/test_butler.py b/tests/test_butler.py
index f29a3148ae..92604ada75 100644
--- a/tests/test_butler.py
+++ b/tests/test_butler.py
@@ -1092,7 +1092,7 @@ def testPickle(self) -> None:
         butlerOut = pickle.loads(pickle.dumps(butler))
         self.assertIsInstance(butlerOut, Butler)
         self.assertEqual(butlerOut._config, butler._config)
-        self.assertEqual(butlerOut.collections, butler.collections)
+        self.assertEqual(list(butlerOut.collections.defaults), list(butler.collections.defaults))
         self.assertEqual(butlerOut.run, butler.run)
 
     def testGetDatasetTypes(self) -> None:

From 4cc657e5691f76c12db2db4f1b2594f7832c6827 Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Fri, 16 Aug 2024 10:20:24 -0700
Subject: [PATCH 13/26] Allow CollectionInfo to be sorted by name

---
 python/lsst/daf/butler/_butler_collections.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/python/lsst/daf/butler/_butler_collections.py b/python/lsst/daf/butler/_butler_collections.py
index 418a465ed5..9c9ff6a8bf 100644
--- a/python/lsst/daf/butler/_butler_collections.py
+++ b/python/lsst/daf/butler/_butler_collections.py
@@ -31,7 +31,7 @@
 
 from abc import ABC, abstractmethod
 from collections.abc import Iterable, Sequence, Set
-from typing import overload
+from typing import Any, overload
 
 from pydantic import BaseModel
 
@@ -52,6 +52,12 @@ class CollectionInfo(BaseModel):
     parents: frozenset[str] = frozenset()
     """Any parents of this collection."""
 
+    def __lt__(self, other: Any) -> bool:
+        """Compare objects by collection name."""
+        if not isinstance(other, type(self)):
+            return NotImplemented
+        return self.name < other.name
+
 
 class ButlerCollections(ABC, Sequence):
     """Methods for working with collections stored in the Butler."""

From b234be3c8e94469d2bd7592480786337208e6db1 Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Fri, 16 Aug 2024 11:09:03 -0700
Subject: [PATCH 14/26] Add collections.x_query_info and simplify some usage of
 x_query

---
 python/lsst/daf/butler/_butler_collections.py | 60 ++++++++++++++++++-
 .../_direct_butler_collections.py             | 23 +++++++
 .../_remote_butler_collections.py             | 14 +++--
 python/lsst/daf/butler/script/exportCalibs.py | 22 +++----
 .../daf/butler/script/queryCollections.py     | 48 ++++++++-------
 .../daf/butler/script/removeCollections.py    | 11 ++--
 python/lsst/daf/butler/script/removeRuns.py   | 13 ++--
 .../butler/tests/hybrid_butler_collections.py |  8 ++-
 8 files changed, 147 insertions(+), 52 deletions(-)

diff --git a/python/lsst/daf/butler/_butler_collections.py b/python/lsst/daf/butler/_butler_collections.py
index 9c9ff6a8bf..75de1d3b49 100644
--- a/python/lsst/daf/butler/_butler_collections.py
+++ b/python/lsst/daf/butler/_butler_collections.py
@@ -210,7 +210,6 @@ def remove_from_chain(
         """
         raise NotImplementedError()
 
-    @abstractmethod
     def x_query(
         self,
         expression: str | Iterable[str],
@@ -220,6 +219,8 @@ def x_query(
     ) -> Sequence[str]:
         """Query the butler for collections matching an expression.
 
+        **This is an experimental interface that can change at any time.**
+
         Parameters
         ----------
         expression : `str` or `~collections.abc.Iterable` [ `str` ]
@@ -240,6 +241,61 @@ def x_query(
         collections : `~collections.abc.Sequence` [ `str` ]
             The names of collections that match ``expression``.
 
+        Notes
+        -----
+        The order in which collections are returned is unspecified, except that
+        the children of a `~CollectionType.CHAINED` collection are guaranteed
+        to be in the order in which they are searched.  When multiple parent
+        `~CollectionType.CHAINED` collections match the same criteria, the
+        order in which the two lists appear is unspecified, and the lists of
+        children may be incomplete if a child has multiple parents.
+
+        The default implementation is a wrapper around `x_query_info`.
+        """
+        collections_info = self.x_query_info(
+            expression,
+            collection_types=collection_types,
+            flatten_chains=flatten_chains,
+            include_chains=include_chains,
+        )
+        return [info.name for info in collections_info]
+
+    @abstractmethod
+    def x_query_info(
+        self,
+        expression: str | Iterable[str],
+        collection_types: Set[CollectionType] | CollectionType | None = None,
+        flatten_chains: bool = False,
+        include_chains: bool | None = None,
+        include_parents: bool = False,
+    ) -> Sequence[CollectionInfo]:
+        """Query the butler for collections matching an expression and
+        return detailed information about those collections.
+
+        **This is an experimental interface that can change at any time.**
+
+        Parameters
+        ----------
+        expression : `str` or `~collections.abc.Iterable` [ `str` ]
+            One or more collection names or globs to include in the search.
+        collection_types : `set` [`CollectionType`], `CollectionType` or `None`
+            Restrict the types of collections to be searched. If `None` all
+            collection types are searched.
+        flatten_chains : `bool`, optional
+            If `True` (`False` is default), recursively yield the child
+            collections of matching `~CollectionType.CHAINED` collections.
+        include_chains : `bool` or `None`, optional
+            If `True`, yield records for matching `~CollectionType.CHAINED`
+            collections. Default is the opposite of ``flatten_chains``:
+            include either CHAINED collections or their children, but not both.
+        include_parents : `bool`, optional
+            Whether the returned information includes parents.
+
+        Returns
+        -------
+        collections : `~collections.abc.Sequence` [ `CollectionInfo` ]
+            The names of collections that match ``expression``.
+
         Notes
         -----
         The order in which collections are returned is unspecified, except that
@@ -302,6 +358,8 @@ def register(self, name: str, type: CollectionType = CollectionType.RUN, doc: st
     def x_remove(self, name: str) -> None:
         """Remove the given collection from the registry.
 
+        **This is an experimental interface that can change at any time.**
+
         Parameters
         ----------
         name : `str`
diff --git a/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py b/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py
index ab4b0665bc..10b438c824 100644
--- a/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py
+++ b/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py
@@ -88,6 +88,8 @@ def x_query(
     ) -> Sequence[str]:
         if collection_types is None:
             collection_types = CollectionType.all()
+        # Do not use base implementation for now to avoid the additional
+        # unused queries.
         return self._registry.queryCollections(
             expression,
             collectionTypes=collection_types,
@@ -95,6 +97,27 @@ def x_query(
             includeChains=include_chains,
         )
 
+    def x_query_info(
+        self,
+        expression: str | Iterable[str],
+        collection_types: Set[CollectionType] | CollectionType | None = None,
+        flatten_chains: bool = False,
+        include_chains: bool | None = None,
+        include_parents: bool = False,
+    ) -> Sequence[CollectionInfo]:
+        info = []
+        with self._registry.caching_context():
+            if collection_types is None:
+                collection_types = CollectionType.all()
+            for name in self._registry.queryCollections(
+                expression,
+                collectionTypes=collection_types,
+                flattenChains=flatten_chains,
+                includeChains=include_chains,
+            ):
+                info.append(self.get_info(name, include_parents=include_parents))
+        return info
+
     def get_info(self, name: str, include_doc: bool = False, include_parents: bool = False) -> CollectionInfo:
         record = self._registry.get_collection_record(name)
         doc = ""
diff --git a/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py b/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py
index 3d052b9aa7..1e62d9c3e9 100644
--- a/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py
+++ b/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py
@@ -71,21 +71,27 @@ def remove_from_chain(
     ) -> None:
         raise NotImplementedError("Not yet available")
 
-    def x_query(
+    def x_query_info(
         self,
         expression: str | Iterable[str],
         collection_types: Set[CollectionType] | CollectionType | None = None,
         flatten_chains: bool = False,
         include_chains: bool | None = None,
-    ) -> Sequence[str]:
+        include_parents: bool = False,
+    ) -> Sequence[CollectionInfo]:
+        # This should become a single call on the server in the future.
         if collection_types is None:
             collection_types = CollectionType.all()
-        return self._registry.queryCollections(
+
+        info = []
+        for name in self._registry.queryCollections(
             expression,
             collectionTypes=collection_types,
             flattenChains=flatten_chains,
             includeChains=include_chains,
-        )
+        ):
+            info.append(self.get_info(name, include_parents=include_parents))
+        return info
 
     def get_info(self, name: str, include_doc: bool = False, include_parents: bool = False) -> CollectionInfo:
         info = self._registry._get_collection_info(
diff --git a/python/lsst/daf/butler/script/exportCalibs.py b/python/lsst/daf/butler/script/exportCalibs.py
index cddd0f3cfd..4581082d19 100644
--- a/python/lsst/daf/butler/script/exportCalibs.py
+++ b/python/lsst/daf/butler/script/exportCalibs.py
@@ -35,6 +35,7 @@
 from astropy.table import Table
 
 from .._butler import Butler
+from .._butler_collections import CollectionInfo
 from .._collection_type import CollectionType
 
 if TYPE_CHECKING:
@@ -44,7 +45,7 @@
 
 
 def find_calibration_datasets(
-    butler: Butler, collection: str, datasetTypes: Iterable[DatasetType]
+    butler: Butler, collection: CollectionInfo, datasetTypes: Iterable[DatasetType]
 ) -> list[DatasetRef]:
     """Search a calibration collection for calibration datasets.
 
@@ -52,7 +53,7 @@ def find_calibration_datasets(
     ----------
     butler : `lsst.daf.butler.Butler`
         Butler to use.
-    collection : `str`
+    collection : `CollectionInfo`
         Collection to search.  This should be a CALIBRATION
         collection.
     datasetTypes : `list` [`lsst.daf.Butler.DatasetType`]
@@ -68,18 +69,18 @@ def find_calibration_datasets(
     RuntimeError
         Raised if the collection to search is not a CALIBRATION collection.
     """
-    if butler.collections.get_info(collection).type != CollectionType.CALIBRATION:
-        raise RuntimeError(f"Collection {collection} is not a CALIBRATION collection.")
+    if collection.type != CollectionType.CALIBRATION:
+        raise RuntimeError(f"Collection {collection.name} is not a CALIBRATION collection.")
 
     exportDatasets = []
     for calibType in datasetTypes:
         with butler._query() as query:
-            results = query.datasets(calibType, collections=collection, find_first=False)
+            results = query.datasets(calibType, collections=collection.name, find_first=False)
 
             try:
                 refs = list(results.with_dimension_records())
             except Exception as e:
-                e.add_note(f"Error from querying dataset type {calibType} and collection {collection}")
+                e.add_note(f"Error from querying dataset type {calibType} and collection {collection.name}")
                 raise
             exportDatasets.extend(refs)
 
@@ -133,18 +134,17 @@ def exportCalibs(
     collectionsToExport = []
     datasetsToExport = []
 
-    for collection in butler.collections.x_query(
+    for collection in butler.collections.x_query_info(
         collections_query,
         flatten_chains=True,
         include_chains=True,
         collection_types={CollectionType.CALIBRATION, CollectionType.CHAINED},
     ):
-        log.info("Checking collection: %s", collection)
+        log.info("Checking collection: %s", collection.name)
 
         # Get collection information.
-        collectionsToExport.append(collection)
-        info = butler.collections.get_info(collection)
-        if info.type == CollectionType.CALIBRATION:
+        collectionsToExport.append(collection.name)
+        if collection.type == CollectionType.CALIBRATION:
             exportDatasets = find_calibration_datasets(butler, collection, calibTypes)
             datasetsToExport.extend(exportDatasets)
 
diff --git a/python/lsst/daf/butler/script/queryCollections.py b/python/lsst/daf/butler/script/queryCollections.py
index 43cee5d2ab..6f1b1ef856 100644
--- a/python/lsst/daf/butler/script/queryCollections.py
+++ b/python/lsst/daf/butler/script/queryCollections.py
@@ -32,6 +32,7 @@
 from astropy.table import Table
 
 from .._butler import Butler
+from .._butler_collections import CollectionInfo
 from .._collection_type import CollectionType
 
 
@@ -71,34 +72,36 @@ def _getTable(
         dtype=(str, str, str),
     )
     butler = Butler.from_config(repo)
-    names = sorted(butler.collections.x_query(glob or "*", collection_types=frozenset(collection_type)))
+    collections = sorted(
+        butler.collections.x_query_info(
+            glob or "*", collection_types=frozenset(collection_type), include_parents=inverse
+        )
+    )
     if inverse:
-        for name in names:
-            info = butler.collections.get_info(name, include_parents=True)
+        for info in collections:
             if info.parents:
                 first = True
                 for parentName in sorted(info.parents):
-                    table.add_row((name if first else "", info.type.name if first else "", parentName))
+                    table.add_row((info.name if first else "", info.type.name if first else "", parentName))
                     first = False
             else:
-                table.add_row((name, info.type.name, ""))
+                table.add_row((info.name, info.type.name, ""))
         # If none of the datasets has a parent dataset then remove the
         # description column.
         if not any(c for c in table[descriptionCol]):
             del table[descriptionCol]
     else:
-        for name in names:
-            info = butler.collections.get_info(name)
+        for info in collections:
             if info.type == CollectionType.CHAINED:
                 if info.children:
                     first = True
                     for child in info.children:
-                        table.add_row((name if first else "", info.type.name if first else "", child))
+                        table.add_row((info.name if first else "", info.type.name if first else "", child))
                         first = False
                 else:
-                    table.add_row((name, info.type.name, ""))
+                    table.add_row((info.name, info.type.name, ""))
             else:
-                table.add_row((name, info.type.name, ""))
+                table.add_row((info.name, info.type.name, ""))
         # If there aren't any CHAINED datasets in the results then remove the
         # description column.
         if not any(columnVal == CollectionType.CHAINED.name for columnVal in table[typeCol]):
@@ -142,18 +145,21 @@ def _getTree(
     )
     butler = Butler.from_config(repo, without_datastore=True)
 
-    def addCollection(name: str, level: int = 0) -> None:
-        info = butler.collections.get_info(name, include_parents=inverse)
-        table.add_row(("  " * level + name, info.type.name))
+    def addCollection(info: CollectionInfo, level: int = 0) -> None:
+        table.add_row(("  " * level + info.name, info.type.name))
         if inverse:
             for pname in sorted(info.parents):
-                addCollection(pname, level + 1)
+                pinfo = butler.collections.get_info(pname, include_parents=inverse)
+                addCollection(pinfo, level + 1)
         else:
             if info.type == CollectionType.CHAINED:
                 for name in info.children:
-                    addCollection(name, level + 1)
+                    cinfo = butler.collections.get_info(name)
+                    addCollection(cinfo, level + 1)
 
-    collections = butler.collections.x_query(glob or "*", collection_types=frozenset(collection_type))
+    collections = butler.collections.x_query_info(
+        glob or "*", collection_types=frozenset(collection_type), include_parents=inverse
+    )
     for collection in sorted(collections):
         addCollection(collection)
     return table
@@ -165,14 +171,14 @@ def _getFlatten(
     collection_type: Iterable[CollectionType],
 ) -> Table:
     butler = Butler.from_config(repo)
-    collectionNames = list(
-        butler.collections.x_query(
+    collections = list(
+        butler.collections.x_query_info(
             glob or "*", collection_types=frozenset(collection_type), flatten_chains=True
         )
     )
-
-    collectionTypes = [butler.collections.get_info(c).type.name for c in collectionNames]
-    return Table((collectionNames, collectionTypes), names=("Name", "Type"))
+    names = [c.name for c in collections]
+    types = [c.type.name for c in collections]
+    return Table((names, types), names=("Name", "Type"))
 
 
 def queryCollections(
diff --git a/python/lsst/daf/butler/script/removeCollections.py b/python/lsst/daf/butler/script/removeCollections.py
index 1ae062cac1..747abaf1dc 100644
--- a/python/lsst/daf/butler/script/removeCollections.py
+++ b/python/lsst/daf/butler/script/removeCollections.py
@@ -85,18 +85,17 @@ def _getCollectionInfo(
     """
     butler = Butler.from_config(repo, without_datastore=True)
     try:
-        names = sorted(butler.collections.x_query(collection, include_chains=True))
+        collections_info = sorted(butler.collections.x_query_info(collection, include_chains=True))
     except MissingCollectionError:
         # Hide the error and act like no collections should be removed.
-        names = []
+        collections_info = []
     collections = Table(names=("Collection", "Collection Type"), dtype=(str, str))
     runCollections = Table(names=("Collection",), dtype=(str,))
-    for name in names:
-        collection_info = butler.collections.get_info(name)
+    for collection_info in collections_info:
         if collection_info.type == CollectionType.RUN:
-            runCollections.add_row((name,))
+            runCollections.add_row((collection_info.name,))
         else:
-            collections.add_row((name, collection_info.type.name))
+            collections.add_row((collection_info.name, collection_info.type.name))
 
     return CollectionInfo(collections, runCollections)
 
diff --git a/python/lsst/daf/butler/script/removeRuns.py b/python/lsst/daf/butler/script/removeRuns.py
index bf17c2f1d5..dc1cbe1c94 100644
--- a/python/lsst/daf/butler/script/removeRuns.py
+++ b/python/lsst/daf/butler/script/removeRuns.py
@@ -87,20 +87,21 @@ def _getCollectionInfo(
     """
     butler = Butler.from_config(repo)
     try:
-        collectionNames = butler.collections.x_query(collection, CollectionType.RUN, include_chains=False)
+        collections = butler.collections.x_query_info(
+            collection, CollectionType.RUN, include_chains=False, include_parents=True
+        )
     except MissingCollectionError:
         # Act as if no collections matched.
-        collectionNames = []
+        collections = []
     dataset_types = butler.registry.queryDatasetTypes(...)
     runs = []
     datasets: dict[str, int] = defaultdict(int)
-    for collectionName in collectionNames:
-        collection_info = butler.collections.get_info(collectionName, include_parents=True)
+    for collection_info in collections:
         assert collection_info.type == CollectionType.RUN
-        runs.append(RemoveRun(collectionName, list(collection_info.parents)))
+        runs.append(RemoveRun(collection_info.name, list(collection_info.parents)))
         with butler._query() as query:
             for dt in dataset_types:
-                results = query.datasets(dt, collections=collectionName)
+                results = query.datasets(dt, collections=collection_info.name)
                 count = results.count(exact=False)
                 if count:
                     datasets[dt.name] += count
diff --git a/python/lsst/daf/butler/tests/hybrid_butler_collections.py b/python/lsst/daf/butler/tests/hybrid_butler_collections.py
index b89fd30f10..1a0875e1c8 100644
--- a/python/lsst/daf/butler/tests/hybrid_butler_collections.py
+++ b/python/lsst/daf/butler/tests/hybrid_butler_collections.py
@@ -77,18 +77,20 @@ def remove_from_chain(
             parent_collection_name, child_collection_names
         )
 
-    def x_query(
+    def x_query_info(
         self,
         expression: str | Iterable[str],
         collection_types: Set[CollectionType] | CollectionType | None = None,
         flatten_chains: bool = False,
         include_chains: bool | None = None,
-    ) -> Sequence[str]:
-        return self._hybrid._remote_butler.collections.x_query(
+        include_parents: bool = False,
+    ) -> Sequence[CollectionInfo]:
+        return self._hybrid._remote_butler.collections.x_query_info(
             expression,
             collection_types=collection_types,
             flatten_chains=flatten_chains,
             include_chains=include_chains,
+            include_parents=include_parents,
         )
 
     def get_info(self, name: str, include_doc: bool = False, include_parents: bool = False) -> CollectionInfo:

From a49c3fa872e9d0fbb0575f5d2ede08e89f117891 Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Fri, 16 Aug 2024 12:36:29 -0700
Subject: [PATCH 15/26] Deprecate Butler.collection_chains

Now called collections
---
 python/lsst/daf/butler/direct_butler/_direct_butler.py | 6 ++++++
 python/lsst/daf/butler/remote_butler/_remote_butler.py | 6 ++++++
 2 files changed, 12 insertions(+)

diff --git a/python/lsst/daf/butler/direct_butler/_direct_butler.py b/python/lsst/daf/butler/direct_butler/_direct_butler.py
index 2465a026ba..8a2f67212b 100644
--- a/python/lsst/daf/butler/direct_butler/_direct_butler.py
+++ b/python/lsst/daf/butler/direct_butler/_direct_butler.py
@@ -46,6 +46,7 @@
 from collections.abc import Iterable, Iterator, MutableMapping, Sequence
 from typing import TYPE_CHECKING, Any, ClassVar, TextIO, cast
 
+from deprecated.sphinx import deprecated
 from lsst.resources import ResourcePath, ResourcePathExpression
 from lsst.utils.introspection import get_class_of
 from lsst.utils.logging import VERBOSE, getLogger
@@ -2149,6 +2150,11 @@ def validateConfiguration(
             raise ValidationError(";\n".join(messages))
 
     @property
+    @deprecated(
+        "Please use 'collections' instead. collection_chains will be removed after v28.",
+        version="v28",
+        category=FutureWarning,
+    )
     def collection_chains(self) -> DirectButlerCollections:
         """Object with methods for modifying collection chains."""
         return DirectButlerCollections(self._registry)
diff --git a/python/lsst/daf/butler/remote_butler/_remote_butler.py b/python/lsst/daf/butler/remote_butler/_remote_butler.py
index fbabfd0f02..bc9d78b8dc 100644
--- a/python/lsst/daf/butler/remote_butler/_remote_butler.py
+++ b/python/lsst/daf/butler/remote_butler/_remote_butler.py
@@ -35,6 +35,7 @@
 from dataclasses import dataclass
 from typing import TYPE_CHECKING, Any, TextIO, cast
 
+from deprecated.sphinx import deprecated
 from lsst.daf.butler.datastores.file_datastore.retrieve_artifacts import (
     determine_destination_for_retrieved_artifact,
 )
@@ -144,6 +145,11 @@ def isWriteable(self) -> bool:
         return False
 
     @property
+    @deprecated(
+        "Please use 'collections' instead. collection_chains will be removed after v28.",
+        version="v28",
+        category=FutureWarning,
+    )
     def collection_chains(self) -> ButlerCollections:
         """Object with methods for modifying collection chains."""
         from ._registry import RemoteButlerRegistry

From 066bea5ade1d545550121aa886d978d6be7966b2 Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Fri, 16 Aug 2024 12:45:49 -0700
Subject: [PATCH 16/26] Always include the collection docs in
 collections.get_info

---
 python/lsst/daf/butler/_butler_collections.py               | 4 +---
 .../daf/butler/direct_butler/_direct_butler_collections.py  | 6 ++----
 python/lsst/daf/butler/remote_butler/_remote_butler.py      | 2 +-
 .../daf/butler/remote_butler/_remote_butler_collections.py  | 6 ++----
 python/lsst/daf/butler/tests/hybrid_butler_collections.py   | 6 ++----
 5 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/python/lsst/daf/butler/_butler_collections.py b/python/lsst/daf/butler/_butler_collections.py
index 75de1d3b49..9e5a3c843c 100644
--- a/python/lsst/daf/butler/_butler_collections.py
+++ b/python/lsst/daf/butler/_butler_collections.py
@@ -308,15 +308,13 @@ def x_query_info(
         raise NotImplementedError()
 
     @abstractmethod
-    def get_info(self, name: str, include_doc: bool = False, include_parents: bool = False) -> CollectionInfo:
+    def get_info(self, name: str, include_parents: bool = False) -> CollectionInfo:
         """Obtain information for a specific collection.
 
         Parameters
         ----------
         name : `str`
             The name of the collection of interest.
-        include_doc : `bool`, optional
-            If `True` any documentation about this collection will be included.
         include_parents : `bool`, optional
            If `True` any parents of this collection will be included.
 
diff --git a/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py b/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py
index 10b438c824..e047cdb6f7 100644
--- a/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py
+++ b/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py
@@ -118,11 +118,9 @@ def x_query_info(
                 info.append(self.get_info(name, include_parents=include_parents))
         return info
 
-    def get_info(self, name: str, include_doc: bool = False, include_parents: bool = False) -> CollectionInfo:
+    def get_info(self, name: str, include_parents: bool = False) -> CollectionInfo:
         record = self._registry.get_collection_record(name)
-        doc = ""
-        if include_doc:
-            doc = self._registry.getCollectionDocumentation(name) or ""
+        doc = self._registry.getCollectionDocumentation(name) or ""
         children: tuple[str, ...] = tuple()
         if record.type == CollectionType.CHAINED:
             assert isinstance(record, ChainedCollectionRecord)
diff --git a/python/lsst/daf/butler/remote_butler/_remote_butler.py b/python/lsst/daf/butler/remote_butler/_remote_butler.py
index bc9d78b8dc..f09a1a62bc 100644
--- a/python/lsst/daf/butler/remote_butler/_remote_butler.py
+++ b/python/lsst/daf/butler/remote_butler/_remote_butler.py
@@ -158,7 +158,7 @@ def collection_chains(self) -> ButlerCollections:
 
     @property
     def collections(self) -> ButlerCollections:
-        """Object with methods for modifying collection chains."""
+        """Object with methods for modifying and querying collections."""
         from ._registry import RemoteButlerRegistry
 
         return RemoteButlerCollections(cast(RemoteButlerRegistry, self._registry))
diff --git a/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py b/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py
index 1e62d9c3e9..7e907a9235 100644
--- a/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py
+++ b/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py
@@ -93,10 +93,8 @@ def x_query_info(
             info.append(self.get_info(name, include_parents=include_parents))
         return info
 
-    def get_info(self, name: str, include_doc: bool = False, include_parents: bool = False) -> CollectionInfo:
-        info = self._registry._get_collection_info(
-            name, include_doc=include_doc, include_parents=include_parents
-        )
+    def get_info(self, name: str, include_parents: bool = False) -> CollectionInfo:
+        info = self._registry._get_collection_info(name, include_doc=True, include_parents=include_parents)
         doc = info.doc or ""
         children = info.children or ()
         parents = info.parents or set()
diff --git a/python/lsst/daf/butler/tests/hybrid_butler_collections.py b/python/lsst/daf/butler/tests/hybrid_butler_collections.py
index 1a0875e1c8..4ac8688e6d 100644
--- a/python/lsst/daf/butler/tests/hybrid_butler_collections.py
+++ b/python/lsst/daf/butler/tests/hybrid_butler_collections.py
@@ -93,10 +93,8 @@ def x_query_info(
             include_parents=include_parents,
         )
 
-    def get_info(self, name: str, include_doc: bool = False, include_parents: bool = False) -> CollectionInfo:
-        return self._hybrid._remote_butler.collections.get_info(
-            name, include_doc=include_doc, include_parents=include_parents
-        )
+    def get_info(self, name: str, include_parents: bool = False) -> CollectionInfo:
+        return self._hybrid._remote_butler.collections.get_info(name, include_parents=include_parents)
 
     def register(self, name: str, type: CollectionType = CollectionType.RUN, doc: str | None = None) -> bool:
         return self._hybrid._direct_butler.collections.register(name, type=type, doc=doc)

From ff8f7710248d0d343eaa1a0c0636b4985f5621bb Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Fri, 16 Aug 2024 13:01:10 -0700
Subject: [PATCH 17/26] Default CollectionInfo.parents to None

---
 python/lsst/daf/butler/_butler_collections.py              | 7 +++++--
 .../daf/butler/direct_butler/_direct_butler_collections.py | 2 +-
 .../daf/butler/remote_butler/_remote_butler_collections.py | 3 +--
 python/lsst/daf/butler/script/queryCollections.py          | 1 +
 python/lsst/daf/butler/script/removeRuns.py                | 2 +-
 5 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/python/lsst/daf/butler/_butler_collections.py b/python/lsst/daf/butler/_butler_collections.py
index 9e5a3c843c..5d40d20de0 100644
--- a/python/lsst/daf/butler/_butler_collections.py
+++ b/python/lsst/daf/butler/_butler_collections.py
@@ -49,8 +49,11 @@ class CollectionInfo(BaseModel):
     """Documentation string associated with this collection."""
     children: tuple[str, ...] = tuple()
     """Children of this collection (only if CHAINED)."""
-    parents: frozenset[str] = frozenset()
-    """Any parents of this collection."""
+    parents: frozenset[str] | None = None
+    """Any parents of this collection.
+
+    `None` if the parents were not requested.
+    """
 
     def __lt__(self, other: Any) -> bool:
         """Compare objects by collection name."""
diff --git a/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py b/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py
index e047cdb6f7..30b3b401bc 100644
--- a/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py
+++ b/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py
@@ -125,7 +125,7 @@ def get_info(self, name: str, include_parents: bool = False) -> CollectionInfo:
         if record.type == CollectionType.CHAINED:
             assert isinstance(record, ChainedCollectionRecord)
             children = tuple(record.children)
-        parents: set[str] = set()
+        parents: set[str] | None = None
         if include_parents:
             parents = self._registry.getCollectionParentChains(name)
         return CollectionInfo(name=name, type=record.type, doc=doc, parents=parents, children=children)
diff --git a/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py b/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py
index 7e907a9235..35b3d67be0 100644
--- a/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py
+++ b/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py
@@ -97,8 +97,7 @@ def get_info(self, name: str, include_parents: bool = False) -> CollectionInfo:
         info = self._registry._get_collection_info(name, include_doc=True, include_parents=include_parents)
         doc = info.doc or ""
         children = info.children or ()
-        parents = info.parents or set()
-        return CollectionInfo(name=name, type=info.type, doc=doc, parents=parents, children=children)
+        return CollectionInfo(name=name, type=info.type, doc=doc, parents=info.parents, children=children)
 
     def register(self, name: str, type: CollectionType = CollectionType.RUN, doc: str | None = None) -> bool:
         raise NotImplementedError("Not yet available.")
diff --git a/python/lsst/daf/butler/script/queryCollections.py b/python/lsst/daf/butler/script/queryCollections.py
index 6f1b1ef856..f80b8e71cf 100644
--- a/python/lsst/daf/butler/script/queryCollections.py
+++ b/python/lsst/daf/butler/script/queryCollections.py
@@ -148,6 +148,7 @@ def _getTree(
     def addCollection(info: CollectionInfo, level: int = 0) -> None:
         table.add_row(("  " * level + info.name, info.type.name))
         if inverse:
+            assert info.parents is not None  # For mypy.
             for pname in sorted(info.parents):
                 pinfo = butler.collections.get_info(pname, include_parents=inverse)
                 addCollection(pinfo, level + 1)
diff --git a/python/lsst/daf/butler/script/removeRuns.py b/python/lsst/daf/butler/script/removeRuns.py
index dc1cbe1c94..5edb2b2351 100644
--- a/python/lsst/daf/butler/script/removeRuns.py
+++ b/python/lsst/daf/butler/script/removeRuns.py
@@ -97,7 +97,7 @@ def _getCollectionInfo(
     runs = []
     datasets: dict[str, int] = defaultdict(int)
     for collection_info in collections:
-        assert collection_info.type == CollectionType.RUN
+        assert collection_info.type == CollectionType.RUN and collection_info.parents is not None
         runs.append(RemoveRun(collection_info.name, list(collection_info.parents)))
         with butler._query() as query:
             for dt in dataset_types:

From 3d1722331c3b988482ddaef1708b9fe793552655 Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Fri, 16 Aug 2024 13:30:55 -0700
Subject: [PATCH 18/26] Add dataset_types and governors to CollectionInfo

---
 python/lsst/daf/butler/_butler_collections.py | 22 +++++++++++++++-
 .../_direct_butler_collections.py             | 26 ++++++++++++++++---
 .../_remote_butler_collections.py             | 23 +++++++++++++---
 .../butler/tests/hybrid_butler_collections.py | 10 +++++--
 4 files changed, 72 insertions(+), 9 deletions(-)

diff --git a/python/lsst/daf/butler/_butler_collections.py b/python/lsst/daf/butler/_butler_collections.py
index 5d40d20de0..4789a62010 100644
--- a/python/lsst/daf/butler/_butler_collections.py
+++ b/python/lsst/daf/butler/_butler_collections.py
@@ -54,6 +54,17 @@ class CollectionInfo(BaseModel):
 
     `None` if the parents were not requested.
     """
+    dataset_types: frozenset[str] | None = None
+    """Names of any dataset types associated with datasets in this collection.
+
+    `None` if no dataset type information was requested
+    """
+    governors: dict[str, frozenset[str]] | None = None
+    """Values of any governor dimensions associated with datasets in this
+    collection.
+
+    `None` if no governor information was requested.
+    """
 
     def __lt__(self, other: Any) -> bool:
         """Compare objects by collection name."""
@@ -271,6 +282,7 @@ def x_query_info(
         flatten_chains: bool = False,
         include_chains: bool | None = None,
         include_parents: bool = False,
+        include_summary: bool = False,
     ) -> Sequence[CollectionInfo]:
         """Query the butler for collections matching an expression and
         return detailed information about those collections.
@@ -293,6 +305,9 @@ def x_query_info(
             include either CHAINED collections or their children, but not both.
         include_parents : `bool`, optional
             Whether the returned information includes parents.
+        include_summary : `bool`, optional
+            Whether the returned information includes dataset type and
+            governor information for the collections.
 
         Returns
         -------
@@ -311,7 +326,9 @@ def x_query_info(
         raise NotImplementedError()
 
     @abstractmethod
-    def get_info(self, name: str, include_parents: bool = False) -> CollectionInfo:
+    def get_info(
+        self, name: str, include_parents: bool = False, include_summary: bool = False
+    ) -> CollectionInfo:
         """Obtain information for a specific collection.
 
         Parameters
@@ -320,6 +337,9 @@ def get_info(self, name: str, include_parents: bool = False) -> CollectionInfo:
             The name of the collection of interest.
         include_parents : `bool`, optional
            If `True` any parents of this collection will be included.
+        include_summary : `bool`, optional
+           If `True` dataset type names and governor dimensions of datasets
+           stored in this collection will be included in the result.
 
         Returns
         -------
diff --git a/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py b/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py
index 30b3b401bc..15a3f5d3a1 100644
--- a/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py
+++ b/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py
@@ -104,6 +104,7 @@ def x_query_info(
         flatten_chains: bool = False,
         include_chains: bool | None = None,
         include_parents: bool = False,
+        include_summary: bool = False,
     ) -> Sequence[CollectionInfo]:
         info = []
         with self._registry.caching_context():
@@ -115,10 +116,14 @@ def x_query_info(
                 flattenChains=flatten_chains,
                 includeChains=include_chains,
             ):
-                info.append(self.get_info(name, include_parents=include_parents))
+                info.append(
+                    self.get_info(name, include_parents=include_parents, include_summary=include_summary)
+                )
         return info
 
-    def get_info(self, name: str, include_parents: bool = False) -> CollectionInfo:
+    def get_info(
+        self, name: str, include_parents: bool = False, include_summary: bool = False
+    ) -> CollectionInfo:
         record = self._registry.get_collection_record(name)
         doc = self._registry.getCollectionDocumentation(name) or ""
         children: tuple[str, ...] = tuple()
@@ -128,7 +133,22 @@ def get_info(self, name: str, include_parents: bool = False) -> CollectionInfo:
         parents: set[str] | None = None
         if include_parents:
             parents = self._registry.getCollectionParentChains(name)
-        return CollectionInfo(name=name, type=record.type, doc=doc, parents=parents, children=children)
+        governors: dict[str, frozenset[str]] | None = None
+        dataset_types: Set[str] | None = None
+        if include_summary:
+            summary = self._registry.getCollectionSummary(name)
+            dataset_types = frozenset([dt.name for dt in summary.dataset_types])
+            governors = {k: frozenset(v) for k, v in summary.governors.items()}
+
+        return CollectionInfo(
+            name=name,
+            type=record.type,
+            doc=doc,
+            parents=parents,
+            children=children,
+            dataset_types=dataset_types,
+            governors=governors,
+        )
 
     def register(self, name: str, type: CollectionType = CollectionType.RUN, doc: str | None = None) -> bool:
         return self._registry.registerCollection(name, type, doc)
diff --git a/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py b/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py
index 35b3d67be0..c5cddbef17 100644
--- a/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py
+++ b/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py
@@ -78,6 +78,7 @@ def x_query_info(
         flatten_chains: bool = False,
         include_chains: bool | None = None,
         include_parents: bool = False,
+        include_summary: bool = False,
     ) -> Sequence[CollectionInfo]:
         # This should become a single call on the server in the future.
         if collection_types is None:
@@ -90,14 +91,30 @@ def x_query_info(
             flattenChains=flatten_chains,
             includeChains=include_chains,
         ):
-            info.append(self.get_info(name, include_parents=include_parents))
+            info.append(self.get_info(name, include_parents=include_parents, include_summary=include_summary))
         return info
 
-    def get_info(self, name: str, include_parents: bool = False) -> CollectionInfo:
+    def get_info(
+        self, name: str, include_parents: bool = False, include_summary: bool = False
+    ) -> CollectionInfo:
         info = self._registry._get_collection_info(name, include_doc=True, include_parents=include_parents)
         doc = info.doc or ""
         children = info.children or ()
-        return CollectionInfo(name=name, type=info.type, doc=doc, parents=info.parents, children=children)
+        governors: dict[str, frozenset[str]] | None = None
+        dataset_types: Set[str] | None = None
+        if include_summary:
+            summary = self._registry.getCollectionSummary(name)
+            dataset_types = frozenset([dt.name for dt in summary.dataset_types])
+            governors = {k: frozenset(v) for k, v in summary.governors.items()}
+        return CollectionInfo(
+            name=name,
+            type=info.type,
+            doc=doc,
+            parents=info.parents,
+            children=children,
+            dataset_types=dataset_types,
+            governors=governors,
+        )
 
     def register(self, name: str, type: CollectionType = CollectionType.RUN, doc: str | None = None) -> bool:
         raise NotImplementedError("Not yet available.")
diff --git a/python/lsst/daf/butler/tests/hybrid_butler_collections.py b/python/lsst/daf/butler/tests/hybrid_butler_collections.py
index 4ac8688e6d..6c9e438e79 100644
--- a/python/lsst/daf/butler/tests/hybrid_butler_collections.py
+++ b/python/lsst/daf/butler/tests/hybrid_butler_collections.py
@@ -84,6 +84,7 @@ def x_query_info(
         flatten_chains: bool = False,
         include_chains: bool | None = None,
         include_parents: bool = False,
+        include_summary: bool = False,
     ) -> Sequence[CollectionInfo]:
         return self._hybrid._remote_butler.collections.x_query_info(
             expression,
@@ -91,10 +92,15 @@ def x_query_info(
             flatten_chains=flatten_chains,
             include_chains=include_chains,
             include_parents=include_parents,
+            include_summary=include_summary,
         )
 
-    def get_info(self, name: str, include_parents: bool = False) -> CollectionInfo:
-        return self._hybrid._remote_butler.collections.get_info(name, include_parents=include_parents)
+    def get_info(
+        self, name: str, include_parents: bool = False, include_summary: bool = False
+    ) -> CollectionInfo:
+        return self._hybrid._remote_butler.collections.get_info(
+            name, include_parents=include_parents, include_summary=include_summary
+        )
 
     def register(self, name: str, type: CollectionType = CollectionType.RUN, doc: str | None = None) -> bool:
         return self._hybrid._direct_butler.collections.register(name, type=type, doc=doc)

From e6251b864a9df6d141e392a39485ea223a60a326 Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Fri, 16 Aug 2024 14:35:25 -0700
Subject: [PATCH 19/26] Use collection summaries to speed up butler
 query-datasets

Also sort the dataset types.
---
 .../lsst/daf/butler/script/queryDatasets.py   | 22 ++++++++++++++++---
 tests/test_cliCmdQueryDatasets.py             |  8 +++----
 2 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/python/lsst/daf/butler/script/queryDatasets.py b/python/lsst/daf/butler/script/queryDatasets.py
index c95e56fad9..dab6aba5ea 100644
--- a/python/lsst/daf/butler/script/queryDatasets.py
+++ b/python/lsst/daf/butler/script/queryDatasets.py
@@ -218,11 +218,27 @@ def getDatasets(self) -> Iterator[DatasetRef]:
         # Currently need to use old interface to get all the matching
         # dataset types and loop over the dataset types executing a new
         # query each time.
-        dataset_types = self.butler.registry.queryDatasetTypes(datasetTypes)
+        dataset_types: set[str] = {d.name for d in self.butler.registry.queryDatasetTypes(datasetTypes)}
         with self.butler._query() as query:
-            query_collections = self.butler.collections.x_query(query_collections)
+            # If there is more than one dataset type being requested include
+            # the summary information so we can pre-filter. With only one
+            # let query.datasets work it out.
+            include_summary = True if len(dataset_types) > 1 else False
+            query_collections_info = self.butler.collections.x_query_info(
+                query_collections, include_summary=include_summary
+            )
+            query_collections = [c.name for c in query_collections_info]
+
+            # Only iterate over dataset types that are relevant for the query.
+            if include_summary:
+                collection_dataset_types: set[str] = set()
+                for info in query_collections_info:
+                    assert info.dataset_types is not None  # For mypy.
+                    collection_dataset_types.update(info.dataset_types)
+                dataset_types = dataset_types.intersection(collection_dataset_types)
+
             # Accumulate over dataset types.
-            for dt in dataset_types:
+            for dt in sorted(dataset_types):
                 results = query.datasets(dt, collections=query_collections, find_first=self._find_first)
                 if self._where:
                     results = results.where(self._where)
diff --git a/tests/test_cliCmdQueryDatasets.py b/tests/test_cliCmdQueryDatasets.py
index e253f66f94..3670f5cd96 100644
--- a/tests/test_cliCmdQueryDatasets.py
+++ b/tests/test_cliCmdQueryDatasets.py
@@ -250,6 +250,10 @@ def testGlobDatasetType(self):
         tables = self._queryDatasets(repo=self.repoDir)
 
         expectedTables = (
+            AstropyTable(
+                array(("alt_test_metric_comp", "ingest/run", "DummyCamComp", "425", "R", "d-r")),
+                names=("type", "run", "instrument", "visit", "band", "physical_filter"),
+            ),
             AstropyTable(
                 array(
                     (
@@ -259,10 +263,6 @@ def testGlobDatasetType(self):
                 ),
                 names=("type", "run", "instrument", "visit", "band", "physical_filter"),
             ),
-            AstropyTable(
-                array(("alt_test_metric_comp", "ingest/run", "DummyCamComp", "425", "R", "d-r")),
-                names=("type", "run", "instrument", "visit", "band", "physical_filter"),
-            ),
         )
 
         self.assertAstropyTablesEqual(tables, expectedTables, filterColumns=True)

From eaafc03e64da0ac2c5f5d56d08f304036749f1e4 Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Fri, 16 Aug 2024 15:01:00 -0700
Subject: [PATCH 20/26] Improve the error message with table comparisons.

---
 python/lsst/daf/butler/tests/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/lsst/daf/butler/tests/utils.py b/python/lsst/daf/butler/tests/utils.py
index 81a898f5a7..3e74b82db5 100644
--- a/python/lsst/daf/butler/tests/utils.py
+++ b/python/lsst/daf/butler/tests/utils.py
@@ -183,7 +183,7 @@ def assertAstropyTablesEqual(
             original_max = self.maxDiff
             self.maxDiff = None  # This is required to get the full diff.
             try:
-                self.assertEqual(table1, expected1)
+                self.assertEqual(table1, expected1, f"Table:\n{table}\n\nvs Expected:\n{expected}")
             finally:
                 self.maxDiff = original_max
 

From af98c98d58c2349682f689c5e7804a9d549ffc90 Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Mon, 19 Aug 2024 08:24:31 -0700
Subject: [PATCH 21/26] Remove governors from CollectionInfo until we need it

---
 python/lsst/daf/butler/_butler_collections.py               | 6 ------
 .../daf/butler/direct_butler/_direct_butler_collections.py  | 3 ---
 .../daf/butler/remote_butler/_remote_butler_collections.py  | 3 ---
 3 files changed, 12 deletions(-)

diff --git a/python/lsst/daf/butler/_butler_collections.py b/python/lsst/daf/butler/_butler_collections.py
index 4789a62010..d42d6f5b30 100644
--- a/python/lsst/daf/butler/_butler_collections.py
+++ b/python/lsst/daf/butler/_butler_collections.py
@@ -59,12 +59,6 @@ class CollectionInfo(BaseModel):
 
     `None` if no dataset type information was requested
     """
-    governors: dict[str, frozenset[str]] | None = None
-    """Values of any governor dimensions associated with datasets in this
-    collection.
-
-    `None` if no governor information was requested.
-    """
 
     def __lt__(self, other: Any) -> bool:
         """Compare objects by collection name."""
diff --git a/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py b/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py
index 15a3f5d3a1..baa1c92176 100644
--- a/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py
+++ b/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py
@@ -133,12 +133,10 @@ def get_info(
         parents: set[str] | None = None
         if include_parents:
             parents = self._registry.getCollectionParentChains(name)
-        governors: dict[str, frozenset[str]] | None = None
         dataset_types: Set[str] | None = None
         if include_summary:
             summary = self._registry.getCollectionSummary(name)
             dataset_types = frozenset([dt.name for dt in summary.dataset_types])
-            governors = {k: frozenset(v) for k, v in summary.governors.items()}
 
         return CollectionInfo(
             name=name,
@@ -147,7 +145,6 @@ def get_info(
             parents=parents,
             children=children,
             dataset_types=dataset_types,
-            governors=governors,
         )
 
     def register(self, name: str, type: CollectionType = CollectionType.RUN, doc: str | None = None) -> bool:
diff --git a/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py b/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py
index c5cddbef17..f021781a05 100644
--- a/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py
+++ b/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py
@@ -100,12 +100,10 @@ def get_info(
         info = self._registry._get_collection_info(name, include_doc=True, include_parents=include_parents)
         doc = info.doc or ""
         children = info.children or ()
-        governors: dict[str, frozenset[str]] | None = None
         dataset_types: Set[str] | None = None
         if include_summary:
             summary = self._registry.getCollectionSummary(name)
             dataset_types = frozenset([dt.name for dt in summary.dataset_types])
-            governors = {k: frozenset(v) for k, v in summary.governors.items()}
         return CollectionInfo(
             name=name,
             type=info.type,
@@ -113,7 +111,6 @@ def get_info(
             parents=info.parents,
             children=children,
             dataset_types=dataset_types,
-            governors=governors,
         )
 
     def register(self, name: str, type: CollectionType = CollectionType.RUN, doc: str | None = None) -> bool:

From a859c71347a594f509af8bb1198827ddb0200eb5 Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Mon, 19 Aug 2024 08:31:06 -0700
Subject: [PATCH 22/26] Fix some spelling errors

---
 python/lsst/daf/butler/registry/tests/_registry.py | 8 ++++----
 python/lsst/daf/butler/script/removeCollections.py | 4 ++--
 python/lsst/daf/butler/transfers/_yaml.py          | 6 +++---
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/python/lsst/daf/butler/registry/tests/_registry.py b/python/lsst/daf/butler/registry/tests/_registry.py
index 0732e03556..0a66d7797b 100644
--- a/python/lsst/daf/butler/registry/tests/_registry.py
+++ b/python/lsst/daf/butler/registry/tests/_registry.py
@@ -1060,7 +1060,7 @@ def testNestedTransaction(self):
                     # be committed).
                     registry.insertDimensionData(dimension, dataId2)
                     checkpointReached = True
-                    # This should conflict and raise, triggerring a rollback
+                    # This should conflict and raise, triggering a rollback
                     # of the previous insertion within the same transaction
                     # context, but not the original insertion in the outer
                     # block.
@@ -2968,7 +2968,7 @@ def testQueryResultSummaries(self):
         # Second query should yield no results, which we should see when
         # we attempt to expand the data ID.
         query2 = registry.queryDataIds(["physical_filter"], band="h")
-        # There's no execute=False, exact=Fals test here because the behavior
+        # There's no execute=False, exact=False test here because the behavior
         # not something we want to guarantee in this case (and exact=False
         # says either answer is legal).
         self.assertFalse(query2.any(execute=True, exact=False))
@@ -3684,7 +3684,7 @@ def test_skypix_constraint_queries(self) -> None:
         else:
             raise RuntimeError("Could not find usable skypix ID for this dimension configuration.")
         # New query system does not support non-common skypix constraints
-        # and we are deprecating it to replace with region-based constrints.
+        # and we are deprecating it to replace with region-based constraints.
         # TODO: Drop this tests once we remove support for non-common skypix.
         with contextlib.suppress(NotImplementedError):
             self.assertEqual(
@@ -3895,7 +3895,7 @@ def test_query_find_datasets_drop_postprocessing(self) -> None:
         # against only one of the two collections.  This should work even
         # though the relation returned by queryDataIds ends with
         # iteration-engine region-filtering, because we can recognize before
-        # running the query that there is only one collecton to search and
+        # running the query that there is only one collection to search and
         # hence the (default) findFirst=True is irrelevant, and joining in the
         # dataset query commutes past the iteration-engine postprocessing.
         query1 = registry.queryDataIds(
diff --git a/python/lsst/daf/butler/script/removeCollections.py b/python/lsst/daf/butler/script/removeCollections.py
index 747abaf1dc..71606a5461 100644
--- a/python/lsst/daf/butler/script/removeCollections.py
+++ b/python/lsst/daf/butler/script/removeCollections.py
@@ -41,7 +41,7 @@
 class RemoveCollectionResult:
     """Container to return to the cli command; holds tables describing the
     collections that will be removed, as well as any found RUN collections
-    which can not be removed by this command. Also holds the callback funciton
+    which can not be removed by this command. Also holds the callback function
     to execute the remove upon user confirmation.
     """
 
@@ -73,7 +73,7 @@ def _getCollectionInfo(
     Parameters
     ----------
     repo : `str`
-        The URI to the repostiory.
+        The URI to the repository.
     collection : `str`
         The collection string to search for. Same as the `expression`
         argument to `registry.queryCollections`.
diff --git a/python/lsst/daf/butler/transfers/_yaml.py b/python/lsst/daf/butler/transfers/_yaml.py
index dc9741bc6c..4911201bd5 100644
--- a/python/lsst/daf/butler/transfers/_yaml.py
+++ b/python/lsst/daf/butler/transfers/_yaml.py
@@ -301,7 +301,7 @@ class YamlRepoImportBackend(RepoImportBackend):
     stream : `io.IO`
         A readable file-like object.
     registry : `SqlRegistry`
-        The registry datasets will be imported into.  Only used to retreive
+        The registry datasets will be imported into.  Only used to retrieve
         dataset types during construction; all write happen in `register`
         and `load`.
     """
@@ -440,7 +440,7 @@ def __init__(self, stream: IO, registry: SqlRegistry):
                         # Must create the visit_system_membership records.
                         # But first create empty list for visits since other
                         # logic in this file depends on self.dimensions being
-                        # populated in an order consisteny with primary keys.
+                        # populated in an order consistent with primary keys.
                         self.dimensions[self.registry.dimensions["visit"]] = []
                         element = self.registry.dimensions["visit_system_membership"]
                         RecordClass = element.RecordClass
@@ -509,7 +509,7 @@ def __init__(self, stream: IO, registry: SqlRegistry):
                         if not isinstance(child, str):
                             warnings.warn(
                                 f"CHAINED collection {data['name']} includes restrictions on child "
-                                "collection searches, which are no longer suppored and will be ignored.",
+                                "collection searches, which are no longer supported and will be ignored.",
                                 stacklevel=find_outside_stacklevel("lsst.daf.butler"),
                             )
                             # Old form with dataset type restrictions only,

From c6d1cb06c1f2cfa2ca5b7bfd2115490acfe20dd8 Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Mon, 19 Aug 2024 12:53:35 -0700
Subject: [PATCH 23/26] Rewrite command-lines to filter dataset types by
 collection summary

---
 python/lsst/daf/butler/_butler_collections.py | 12 +++++++
 python/lsst/daf/butler/script/queryDataIds.py | 21 ++++++++++---
 .../lsst/daf/butler/script/queryDatasets.py   | 31 ++++++++++++-------
 .../butler/script/queryDimensionRecords.py    |  9 ++++--
 python/lsst/daf/butler/script/removeRuns.py   |  8 +++--
 .../daf/butler/script/retrieveArtifacts.py    |  6 ++--
 .../daf/butler/script/transferDatasets.py     | 10 ++++--
 7 files changed, 72 insertions(+), 25 deletions(-)

diff --git a/python/lsst/daf/butler/_butler_collections.py b/python/lsst/daf/butler/_butler_collections.py
index d42d6f5b30..71045f2937 100644
--- a/python/lsst/daf/butler/_butler_collections.py
+++ b/python/lsst/daf/butler/_butler_collections.py
@@ -403,3 +403,15 @@ def x_remove(self, name: str) -> None:
         be deleted or redefined first.
         """
         raise NotImplementedError()
+
+    def _filter_dataset_types(
+        self, dataset_types: Iterable[str], collections: Iterable[CollectionInfo]
+    ) -> Iterable[str]:
+        dataset_types_set = set(dataset_types)
+        collection_dataset_types: set[str] = set()
+        for info in collections:
+            if info.dataset_types is None:
+                raise RuntimeError("Can only filter by collections if include_summary was True")
+            collection_dataset_types.update(info.dataset_types)
+        dataset_types_set = dataset_types_set.intersection(collection_dataset_types)
+        return dataset_types_set
diff --git a/python/lsst/daf/butler/script/queryDataIds.py b/python/lsst/daf/butler/script/queryDataIds.py
index a70aba1887..41bbfd4d6d 100644
--- a/python/lsst/daf/butler/script/queryDataIds.py
+++ b/python/lsst/daf/butler/script/queryDataIds.py
@@ -169,11 +169,22 @@ def queryDataIds(
         if datasets:
             # Need to constrain results based on dataset type and collection.
             query_collections = collections or "*"
-
-            expanded_collections = butler.collections.x_query(query_collections)
-
-            sub_query = query.join_dataset_search(dataset_types.pop(0), collections=expanded_collections)
-            for dt in dataset_types:
+            collections_info = butler.collections.x_query_info(query_collections, include_summary=True)
+            expanded_collections = [info.name for info in collections_info]
+            filtered_dataset_types = list(
+                butler.collections._filter_dataset_types([dt.name for dt in dataset_types], collections_info)
+            )
+            if not filtered_dataset_types:
+                return (
+                    None,
+                    f"No datasets of type {datasets!r} existed in the specified "
+                    f"collections {','.join(expanded_collections)}.",
+                )
+
+            sub_query = query.join_dataset_search(
+                filtered_dataset_types.pop(0), collections=expanded_collections
+            )
+            for dt in filtered_dataset_types:
                 sub_query = sub_query.join_dataset_search(dt, collections=expanded_collections)
 
             results = sub_query.data_ids(dimensions)
diff --git a/python/lsst/daf/butler/script/queryDatasets.py b/python/lsst/daf/butler/script/queryDatasets.py
index dab6aba5ea..5403a44f5b 100644
--- a/python/lsst/daf/butler/script/queryDatasets.py
+++ b/python/lsst/daf/butler/script/queryDatasets.py
@@ -27,6 +27,7 @@
 from __future__ import annotations
 
 import dataclasses
+import logging
 from collections import defaultdict
 from collections.abc import Iterable, Iterator
 from typing import TYPE_CHECKING
@@ -42,6 +43,9 @@
     from lsst.resources import ResourcePath
 
 
+_LOG = logging.getLogger(__name__)
+
+
 @dataclasses.dataclass(frozen=True)
 class _RefInfo:
     datasetRef: DatasetRef
@@ -204,6 +208,7 @@ def getTables(self) -> list[AstropyTable]:
 
         return [table.getAstropyTable(datasetTypeName) for datasetTypeName, table in tables.items()]
 
+    # @profile
     def getDatasets(self) -> Iterator[DatasetRef]:
         """Get the datasets as a list.
 
@@ -219,23 +224,27 @@ def getDatasets(self) -> Iterator[DatasetRef]:
         # dataset types and loop over the dataset types executing a new
         # query each time.
         dataset_types: set[str] = {d.name for d in self.butler.registry.queryDatasetTypes(datasetTypes)}
+        n_dataset_types = len(dataset_types)
         with self.butler._query() as query:
-            # If there is more than one dataset type being requested include
-            # the summary information so we can pre-filter. With only one
-            # let query.datasets work it out.
-            include_summary = True if len(dataset_types) > 1 else False
+            # Expand the collections query and include summary information.
             query_collections_info = self.butler.collections.x_query_info(
-                query_collections, include_summary=include_summary
+                query_collections, include_summary=True
             )
             query_collections = [c.name for c in query_collections_info]
 
             # Only iterate over dataset types that are relevant for the query.
-            if include_summary:
-                collection_dataset_types: set[str] = set()
-                for info in query_collections_info:
-                    assert info.dataset_types is not None  # For mypy.
-                    collection_dataset_types.update(info.dataset_types)
-                dataset_types = dataset_types.intersection(collection_dataset_types)
+            dataset_types = set(
+                self.butler.collections._filter_dataset_types(dataset_types, query_collections_info)
+            )
+
+            if (n_filtered := len(dataset_types)) != n_dataset_types:
+                _LOG.info("Filtered %d dataset types down to %d", n_dataset_types, n_filtered)
+            elif n_dataset_types == 0:
+                _LOG.info("The given dataset type, %s, is not known to this butler.", datasetTypes)
+            else:
+                _LOG.info(
+                    "Processing %d dataset type%s", n_dataset_types, "" if n_dataset_types == 1 else "s"
+                )
 
             # Accumulate over dataset types.
             for dt in sorted(dataset_types):
diff --git a/python/lsst/daf/butler/script/queryDimensionRecords.py b/python/lsst/daf/butler/script/queryDimensionRecords.py
index 11470e8f17..60ee77c506 100644
--- a/python/lsst/daf/butler/script/queryDimensionRecords.py
+++ b/python/lsst/daf/butler/script/queryDimensionRecords.py
@@ -80,8 +80,13 @@ def queryDimensionRecords(
 
         if datasets:
             query_collections = collections or "*"
-            expanded_collections = butler.collections.x_query(query_collections)
-            dataset_types = list(butler.registry.queryDatasetTypes(datasets))
+            collections_info = butler.collections.x_query_info(query_collections, include_summary=True)
+            expanded_collections = [info.name for info in collections_info]
+            dataset_types = [dt.name for dt in butler.registry.queryDatasetTypes(datasets)]
+            dataset_types = list(butler.collections._filter_dataset_types(dataset_types, collections_info))
+
+            if not dataset_types:
+                return None
 
             sub_query = query.join_dataset_search(dataset_types.pop(0), collections=expanded_collections)
             for dt in dataset_types:
diff --git a/python/lsst/daf/butler/script/removeRuns.py b/python/lsst/daf/butler/script/removeRuns.py
index 5edb2b2351..94e85ce26d 100644
--- a/python/lsst/daf/butler/script/removeRuns.py
+++ b/python/lsst/daf/butler/script/removeRuns.py
@@ -88,12 +88,14 @@ def _getCollectionInfo(
     butler = Butler.from_config(repo)
     try:
         collections = butler.collections.x_query_info(
-            collection, CollectionType.RUN, include_chains=False, include_parents=True
+            collection, CollectionType.RUN, include_chains=False, include_parents=True, include_summary=True
         )
     except MissingCollectionError:
         # Act as if no collections matched.
         collections = []
-    dataset_types = butler.registry.queryDatasetTypes(...)
+    dataset_types = [dt.name for dt in butler.registry.queryDatasetTypes(...)]
+    dataset_types = list(butler.collections._filter_dataset_types(dataset_types, collections))
+
     runs = []
     datasets: dict[str, int] = defaultdict(int)
     for collection_info in collections:
@@ -104,7 +106,7 @@ def _getCollectionInfo(
                 results = query.datasets(dt, collections=collection_info.name)
                 count = results.count(exact=False)
                 if count:
-                    datasets[dt.name] += count
+                    datasets[dt] += count
 
     return runs, {k: datasets[k] for k in sorted(datasets.keys())}
 
diff --git a/python/lsst/daf/butler/script/retrieveArtifacts.py b/python/lsst/daf/butler/script/retrieveArtifacts.py
index f9abc50669..066c8b6d56 100644
--- a/python/lsst/daf/butler/script/retrieveArtifacts.py
+++ b/python/lsst/daf/butler/script/retrieveArtifacts.py
@@ -90,10 +90,12 @@ def retrieveArtifacts(
 
     # Need to store in list so we can count the number to give some feedback
     # to caller.
-    dataset_types = butler.registry.queryDatasetTypes(query_types)
+    dataset_types = [dt.name for dt in butler.registry.queryDatasetTypes(query_types)]
     refs: list[DatasetRef] = []
     with butler._query() as query:
-        expanded_collections = butler.collections.x_query(query_collections)
+        collections_info = butler.collections.x_query_info(query_collections, include_summary=True)
+        expanded_collections = [info.name for info in collections_info]
+        dataset_types = list(butler.collections._filter_dataset_types(dataset_types, collections_info))
         for dt in dataset_types:
             results = query.datasets(dt, collections=expanded_collections, find_first=find_first)
             if where:
diff --git a/python/lsst/daf/butler/script/transferDatasets.py b/python/lsst/daf/butler/script/transferDatasets.py
index 5060065bad..9839212d2f 100644
--- a/python/lsst/daf/butler/script/transferDatasets.py
+++ b/python/lsst/daf/butler/script/transferDatasets.py
@@ -80,10 +80,16 @@ def transferDatasets(
     dataset_type_expr = dataset_type or ...
     collections_expr: tuple[str, ...] = collections or ("*",)
 
-    dataset_types = source_butler.registry.queryDatasetTypes(dataset_type_expr)
+    dataset_types = [dt.name for dt in source_butler.registry.queryDatasetTypes(dataset_type_expr)]
     source_refs: list[DatasetRef] = []
     with source_butler._query() as query:
-        query_collections = source_butler.collections.x_query(collections_expr)
+        query_collections_info = source_butler.collections.x_query_info(
+            collections_expr, include_summary=True
+        )
+        query_collections = [info.name for info in query_collections_info]
+        dataset_types = list(
+            source_butler.collections._filter_dataset_types(dataset_types, query_collections_info)
+        )
         # Loop over dataset types and accumulate.
         for dt in dataset_types:
             results = query.datasets(dt, collections=query_collections, find_first=find_first)

From 467cc550e945101a41ee0c60304acd0095a2ade2 Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Mon, 19 Aug 2024 13:42:08 -0700
Subject: [PATCH 24/26] Add --collections to query-dataset-types command-line

---
 python/lsst/daf/butler/cli/cmd/commands.py      |  5 +++++
 .../lsst/daf/butler/script/queryDatasetTypes.py | 17 +++++++++++++++--
 tests/test_cliCmdQueryDatasetTypes.py           |  2 +-
 3 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/python/lsst/daf/butler/cli/cmd/commands.py b/python/lsst/daf/butler/cli/cmd/commands.py
index 98810481c6..03b2325956 100644
--- a/python/lsst/daf/butler/cli/cmd/commands.py
+++ b/python/lsst/daf/butler/cli/cmd/commands.py
@@ -439,6 +439,11 @@ def query_collections(*args: Any, **kwargs: Any) -> None:
     "dataset types to return."
 )
 @verbose_option(help="Include dataset type name, dimensions, and storage class in output.")
+@collections_option(
+    help="Constrain the resulting dataset types by these collections. "
+    "This constraint does not say that a dataset of this type is definitely present, "
+    "solely that one may have been present at some point."
+)
 @options_file_option()
 def query_dataset_types(*args: Any, **kwargs: Any) -> None:
     """Get the dataset types in a repository."""
diff --git a/python/lsst/daf/butler/script/queryDatasetTypes.py b/python/lsst/daf/butler/script/queryDatasetTypes.py
index 58cf68b693..8d657e69bf 100644
--- a/python/lsst/daf/butler/script/queryDatasetTypes.py
+++ b/python/lsst/daf/butler/script/queryDatasetTypes.py
@@ -34,7 +34,9 @@
 from .._butler import Butler
 
 
-def queryDatasetTypes(repo: str, verbose: bool, glob: Iterable[str]) -> Table:
+def queryDatasetTypes(
+    repo: str, verbose: bool, glob: Iterable[str], collections: Iterable[str] | None = None
+) -> Table:
     """Get the dataset types in a repository.
 
     Parameters
@@ -48,16 +50,27 @@ def queryDatasetTypes(repo: str, verbose: bool, glob: Iterable[str]) -> Table:
     glob : iterable [`str`]
         A list of glob-style search string that fully or partially identify
         the dataset type names to search for.
+    collections : iterable [`str`] or `None`, optional
+        Constrains resulting dataset types such that only dataset type
+        found (at some point) in these collections will be returned.
 
     Returns
     -------
-    collections : `astropy.table.Table`
+    dataset_types_table : `astropy.table.Table`
         A dict whose key is "datasetTypes" and whose value is a list of
         collection names.
     """
     butler = Butler.from_config(repo, without_datastore=True)
     expression = glob or ...
     datasetTypes = butler.registry.queryDatasetTypes(expression=expression)
+
+    if collections:
+        collections_info = butler.collections.x_query_info(collections, include_summary=True)
+        filtered_dataset_types = set(
+            butler.collections._filter_dataset_types([d.name for d in datasetTypes], collections_info)
+        )
+        datasetTypes = [d for d in datasetTypes if d.name in filtered_dataset_types]
+
     if verbose:
         table = Table(
             array(
diff --git a/tests/test_cliCmdQueryDatasetTypes.py b/tests/test_cliCmdQueryDatasetTypes.py
index c656f4cbe3..8ce01e923c 100644
--- a/tests/test_cliCmdQueryDatasetTypes.py
+++ b/tests/test_cliCmdQueryDatasetTypes.py
@@ -46,7 +46,7 @@ class QueryDatasetTypesCmdTest(CliCmdTestBase, unittest.TestCase):
 
     @staticmethod
     def defaultExpected():
-        return dict(repo=None, verbose=False, glob=())
+        return dict(repo=None, verbose=False, glob=(), collections=())
 
     @staticmethod
     def command():

From 13ac82650bd8b7089d54e5f0eb25ab7e1c7c30b5 Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Mon, 19 Aug 2024 13:47:13 -0700
Subject: [PATCH 25/26] Add news fragment

---
 doc/changes/DM-45738.feature.rst | 4 ++++
 doc/changes/DM-45738.removal.rst | 2 ++
 2 files changed, 6 insertions(+)
 create mode 100644 doc/changes/DM-45738.feature.rst
 create mode 100644 doc/changes/DM-45738.removal.rst

diff --git a/doc/changes/DM-45738.feature.rst b/doc/changes/DM-45738.feature.rst
new file mode 100644
index 0000000000..df6ddf1f53
--- /dev/null
+++ b/doc/changes/DM-45738.feature.rst
@@ -0,0 +1,4 @@
+* Added ``--collections`` option to ``butler query-dataset-types`` to allow the resultant dataset types to be constrained by those that are used by specific collections.
+* Changed the ``Butler.collections`` property to be a ``ButlerCollections`` instance.
+  This object can still act as a sequence equivalent to ``ButlerCollections.defaults`` but adds new APIs for querying and manipulating collections.
+  Any methods with names starting with ``x_`` are deemed to be an experimental API that may change in the future.
diff --git a/doc/changes/DM-45738.removal.rst b/doc/changes/DM-45738.removal.rst
new file mode 100644
index 0000000000..f165b57d13
--- /dev/null
+++ b/doc/changes/DM-45738.removal.rst
@@ -0,0 +1,2 @@
+The ``Butler.collection_chains`` property is now deprecated.
+Please use ``Butler.collections`` instead.

From dc4b3cbb314b6294676436999b6d9e1634926298 Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Mon, 19 Aug 2024 14:17:17 -0700
Subject: [PATCH 26/26] Catch and reraise the IntegrityError in
 collections.x_remove

---
 python/lsst/daf/butler/_butler_collections.py              | 2 +-
 .../daf/butler/direct_butler/_direct_butler_collections.py | 7 ++++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/python/lsst/daf/butler/_butler_collections.py b/python/lsst/daf/butler/_butler_collections.py
index 71045f2937..8a3d2e2d63 100644
--- a/python/lsst/daf/butler/_butler_collections.py
+++ b/python/lsst/daf/butler/_butler_collections.py
@@ -384,7 +384,7 @@ def x_remove(self, name: str) -> None:
         ------
         lsst.daf.butler.registry.MissingCollectionError
             Raised if no collection with the given name exists.
-        sqlalchemy.exc.IntegrityError
+        lsst.daf.butler.registry.OrphanedRecordError
             Raised if the database rows associated with the collection are
             still referenced by some other table, such as a dataset in a
             datastore (for `~CollectionType.RUN` collections only) or a
diff --git a/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py b/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py
index baa1c92176..8a4b7c70e8 100644
--- a/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py
+++ b/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py
@@ -31,10 +31,12 @@
 
 from collections.abc import Iterable, Sequence, Set
 
+import sqlalchemy
 from lsst.utils.iteration import ensure_iterable
 
 from .._butler_collections import ButlerCollections, CollectionInfo
 from .._collection_type import CollectionType
+from ..registry._exceptions import OrphanedRecordError
 from ..registry.interfaces import ChainedCollectionRecord
 from ..registry.sql_registry import SqlRegistry
 
@@ -151,4 +153,7 @@ def register(self, name: str, type: CollectionType = CollectionType.RUN, doc: st
         return self._registry.registerCollection(name, type, doc)
 
     def x_remove(self, name: str) -> None:
-        self._registry.removeCollection(name)
+        try:
+            self._registry.removeCollection(name)
+        except sqlalchemy.exc.IntegrityError as e:
+            raise OrphanedRecordError(f"Datasets in run {name} are still referenced elsewhere.") from e