From c3cfcef4788a9a8d0314af5058a00a8844b5b7f2 Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Fri, 6 Sep 2024 17:20:27 -0700
Subject: [PATCH] Do not query all the collections again unless there are
 wildcards

It can be slow to run the query on a list of hundreds of collections
just on the off chance they have wildcards.
---
 python/lsst/daf/butler/_butler.py | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/python/lsst/daf/butler/_butler.py b/python/lsst/daf/butler/_butler.py
index 2a9d8d0ac4..f40cf03cb8 100644
--- a/python/lsst/daf/butler/_butler.py
+++ b/python/lsst/daf/butler/_butler.py
@@ -50,6 +50,7 @@
 from .dimensions import DataCoordinate, DimensionConfig
 from .registry import RegistryConfig, _RegistryFactory
 from .repo_relocation import BUTLER_ROOT_TAG
+from .utils import has_globs
 
 if TYPE_CHECKING:
     from ._dataset_existence import DatasetExistence
@@ -1691,14 +1692,15 @@ def query_datasets(
         if collections:
             # Wild cards need to be expanded but can only be allowed if
             # find_first=False because expanding wildcards does not return
-            # a guaranteed ordering.
-            expanded_collections = self.collections.query(collections)
-            if find_first and set(expanded_collections) != set(ensure_iterable(collections)):
-                raise RuntimeError(
-                    "Can not use wildcards in collections when find_first=True "
-                    f" (given {collections} which expanded to {expanded_collections})"
-                )
-            collections = expanded_collections
+            # a guaranteed ordering. Querying collection registry to expand
+            # collections when we do not have wildcards is expensive so only
+            # do it if we need it.
+            if has_globs(collections):
+                if find_first:
+                    raise RuntimeError(
+                        "Can not use wildcards in collections when find_first=True " f" (given {collections})"
+                    )
+                collections = self.collections.query(collections)
         query_limit = limit
         warn_limit = False
         if limit is not None and limit < 0: