From dadf6029a68d37a24c8f05577da71477b418b3af Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Wed, 14 Aug 2024 12:37:50 -0700 Subject: [PATCH] Enable datastore cache for client/server butler --- .../butler/datastores/fileDatastoreClient.py | 12 ++++++-- .../butler/remote_butler/_remote_butler.py | 29 ++++++++++++++++++- 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/python/lsst/daf/butler/datastores/fileDatastoreClient.py b/python/lsst/daf/butler/datastores/fileDatastoreClient.py index f821fe01d3..8f306df4b1 100644 --- a/python/lsst/daf/butler/datastores/fileDatastoreClient.py +++ b/python/lsst/daf/butler/datastores/fileDatastoreClient.py @@ -4,7 +4,10 @@ import pydantic from lsst.daf.butler import DatasetRef, Location -from lsst.daf.butler.datastore.cache_manager import DatastoreDisabledCacheManager +from lsst.daf.butler.datastore.cache_manager import ( + AbstractDatastoreCacheManager, + DatastoreDisabledCacheManager, +) from lsst.daf.butler.datastore.stored_file_info import SerializedStoredFileInfo, StoredFileInfo from lsst.daf.butler.datastores.file_datastore.get import ( DatasetLocationInformation, @@ -47,6 +50,7 @@ def get_dataset_as_python_object( payload: FileDatastoreGetPayload, *, parameters: Mapping[str, Any] | None, + cache_manager: AbstractDatastoreCacheManager | None = None, ) -> Any: """Retrieve an artifact from storage and return it as a Python object. @@ -60,6 +64,8 @@ def get_dataset_as_python_object( parameters : `Mapping`[`str`, `typing.Any`] `StorageClass` and `Formatter` parameters to be used when converting the artifact to a Python object. + cache_manager : `AbstractDatastoreCacheManager` or `None`, optional + Cache manager to use. If `None` the cache is disabled. Returns ------- @@ -76,6 +82,8 @@ def get_dataset_as_python_object( ref=ref, parameters=parameters, ) + if cache_manager is None: + cache_manager = DatastoreDisabledCacheManager() return get_dataset_as_python_object_from_get_info( - datastore_file_info, ref=ref, parameters=parameters, cache_manager=DatastoreDisabledCacheManager() + datastore_file_info, ref=ref, parameters=parameters, cache_manager=cache_manager ) diff --git a/python/lsst/daf/butler/remote_butler/_remote_butler.py b/python/lsst/daf/butler/remote_butler/_remote_butler.py index 3392fc8791..d4631c816f 100644 --- a/python/lsst/daf/butler/remote_butler/_remote_butler.py +++ b/python/lsst/daf/butler/remote_butler/_remote_butler.py @@ -54,7 +54,12 @@ from .._exceptions import DatasetNotFoundError from .._storage_class import StorageClass, StorageClassFactory from .._utilities.locked_object import LockedObject -from ..datastore import DatasetRefURIs +from ..datastore import DatasetRefURIs, DatastoreConfig +from ..datastore.cache_manager import ( + AbstractDatastoreCacheManager, + DatastoreCacheManager, + DatastoreDisabledCacheManager, +) from ..dimensions import DataIdValue, DimensionConfig, DimensionUniverse, SerializedDataId from ..queries import Query from ..registry import CollectionArgType, NoDefaultCollectionError, Registry, RegistryDefaults @@ -104,6 +109,7 @@ class RemoteButler(Butler): # numpydoc ignore=PR02 _connection: RemoteButlerHttpConnection _cache: RemoteButlerCache _registry: Registry + _datastore_cache_manager: AbstractDatastoreCacheManager | None # This is __new__ instead of __init__ because we have to support # instantiation via the legacy constructor Butler.__new__(), which @@ -125,6 +131,7 @@ def __new__( self._connection = connection self._cache = cache + self._datastore_cache_manager = None # Avoid a circular import by deferring this import. from ._registry import RemoteButlerRegistry @@ -171,6 +178,25 @@ def dimensions(self) -> DimensionUniverse: cache.dimensions = universe return cache.dimensions + @property + def datastore_cache_manager(self) -> AbstractDatastoreCacheManager: + """Cache manager to use when reading files from the butler.""" + # RemoteButler does not get any cache configuration from the server. + # Read the Datastore default config (which is a FileDatastore) + # and obtain the default cache manager configuration. Overrides will + # come from environment variables. This will not work properly if the + # defaults for DatastoreConfig no longer include the cache. + if self._datastore_cache_manager is None: + datastore_config = DatastoreConfig() + self._datastore_cache_manager: AbstractDatastoreCacheManager + if "cached" in datastore_config: + self._datastore_cache_manager = DatastoreCacheManager( + datastore_config["cached"], universe=self.dimensions + ) + else: + self._datastore_cache_manager = DatastoreDisabledCacheManager("", universe=self.dimensions) + return self._datastore_cache_manager + def _caching_context(self) -> AbstractContextManager[None]: # Docstring inherited. # Not implemented for now, will have to think whether this needs to @@ -253,6 +279,7 @@ def _get_dataset_as_python_object( ref, _to_file_payload(model), parameters=parameters, + cache_manager=self.datastore_cache_manager, ) def _get_file_info(