From c3cfcef4788a9a8d0314af5058a00a8844b5b7f2 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Fri, 6 Sep 2024 17:20:27 -0700 Subject: [PATCH] Do not query all the collections again unless there are wildcards It can be slow to run the query on a list of hundreds of collections just on the off chance they have wildcards. --- python/lsst/daf/butler/_butler.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/python/lsst/daf/butler/_butler.py b/python/lsst/daf/butler/_butler.py index 2a9d8d0ac4..f40cf03cb8 100644 --- a/python/lsst/daf/butler/_butler.py +++ b/python/lsst/daf/butler/_butler.py @@ -50,6 +50,7 @@ from .dimensions import DataCoordinate, DimensionConfig from .registry import RegistryConfig, _RegistryFactory from .repo_relocation import BUTLER_ROOT_TAG +from .utils import has_globs if TYPE_CHECKING: from ._dataset_existence import DatasetExistence @@ -1691,14 +1692,15 @@ def query_datasets( if collections: # Wild cards need to be expanded but can only be allowed if # find_first=False because expanding wildcards does not return - # a guaranteed ordering. - expanded_collections = self.collections.query(collections) - if find_first and set(expanded_collections) != set(ensure_iterable(collections)): - raise RuntimeError( - "Can not use wildcards in collections when find_first=True " - f" (given {collections} which expanded to {expanded_collections})" - ) - collections = expanded_collections + # a guaranteed ordering. Querying collection registry to expand + # collections when we do not have wildcards is expensive so only + # do it if we need it. + if has_globs(collections): + if find_first: + raise RuntimeError( + "Can not use wildcards in collections when find_first=True " f" (given {collections})" + ) + collections = self.collections.query(collections) query_limit = limit warn_limit = False if limit is not None and limit < 0: