Do not query all the collections again unless there are wildcards

It can be slow to run the query on a list of hundreds of collections just on the off chance they have wildcards.
lsst · Sep 7, 2024 · c3cfcef · c3cfcef
1 parent 709d25a
commit c3cfcef
Showing 1 changed file with 10 additions and 8 deletions.
diff --git a/python/lsst/daf/butler/_butler.py b/python/lsst/daf/butler/_butler.py
@@ -50,6 +50,7 @@
 from .dimensions import DataCoordinate, DimensionConfig
 from .registry import RegistryConfig, _RegistryFactory
 from .repo_relocation import BUTLER_ROOT_TAG
+from .utils import has_globs
 
 if TYPE_CHECKING:
     from ._dataset_existence import DatasetExistence
@@ -1691,14 +1692,15 @@ def query_datasets(
         if collections:
             # Wild cards need to be expanded but can only be allowed if
             # find_first=False because expanding wildcards does not return
-            # a guaranteed ordering.
-            expanded_collections = self.collections.query(collections)
-            if find_first and set(expanded_collections) != set(ensure_iterable(collections)):
-                raise RuntimeError(
-                    "Can not use wildcards in collections when find_first=True "
-                    f" (given {collections} which expanded to {expanded_collections})"
-                )
-            collections = expanded_collections
+            # a guaranteed ordering. Querying collection registry to expand
+            # collections when we do not have wildcards is expensive so only
+            # do it if we need it.
+            if has_globs(collections):
+                if find_first:
+                    raise RuntimeError(
+                        "Can not use wildcards in collections when find_first=True " f" (given {collections})"
+                    )
+                collections = self.collections.query(collections)
         query_limit = limit
         warn_limit = False
         if limit is not None and limit < 0: