Skip to content

Commit

Permalink
Enable datastore cache for client/server butler
Browse files Browse the repository at this point in the history
  • Loading branch information
timj committed Aug 14, 2024
1 parent b716002 commit 73d150a
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 3 deletions.
12 changes: 10 additions & 2 deletions python/lsst/daf/butler/datastores/fileDatastoreClient.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@

import pydantic
from lsst.daf.butler import DatasetRef, Location
from lsst.daf.butler.datastore.cache_manager import DatastoreDisabledCacheManager
from lsst.daf.butler.datastore.cache_manager import (
AbstractDatastoreCacheManager,
DatastoreDisabledCacheManager,
)
from lsst.daf.butler.datastore.stored_file_info import SerializedStoredFileInfo, StoredFileInfo
from lsst.daf.butler.datastores.file_datastore.get import (
DatasetLocationInformation,
Expand Down Expand Up @@ -47,6 +50,7 @@ def get_dataset_as_python_object(
payload: FileDatastoreGetPayload,
*,
parameters: Mapping[str, Any] | None,
cache_manager: AbstractDatastoreCacheManager | None = None,
) -> Any:
"""Retrieve an artifact from storage and return it as a Python object.
Expand All @@ -60,6 +64,8 @@ def get_dataset_as_python_object(
parameters : `Mapping`[`str`, `typing.Any`]
`StorageClass` and `Formatter` parameters to be used when converting
the artifact to a Python object.
cache_manager : `AbstractDatastoreCacheManager` or `None`, optional
Cache manager to use. If `None` the cache is disabled.
Returns
-------
Expand All @@ -76,6 +82,8 @@ def get_dataset_as_python_object(
ref=ref,
parameters=parameters,
)
if cache_manager is None:
cache_manager = DatastoreDisabledCacheManager()

Check warning on line 86 in python/lsst/daf/butler/datastores/fileDatastoreClient.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/datastores/fileDatastoreClient.py#L86

Added line #L86 was not covered by tests
return get_dataset_as_python_object_from_get_info(
datastore_file_info, ref=ref, parameters=parameters, cache_manager=DatastoreDisabledCacheManager()
datastore_file_info, ref=ref, parameters=parameters, cache_manager=cache_manager
)
29 changes: 28 additions & 1 deletion python/lsst/daf/butler/remote_butler/_remote_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,12 @@
from .._exceptions import DatasetNotFoundError
from .._storage_class import StorageClass, StorageClassFactory
from .._utilities.locked_object import LockedObject
from ..datastore import DatasetRefURIs
from ..datastore import DatasetRefURIs, DatastoreConfig
from ..datastore.cache_manager import (
AbstractDatastoreCacheManager,
DatastoreCacheManager,
DatastoreDisabledCacheManager,
)
from ..dimensions import DataIdValue, DimensionConfig, DimensionUniverse, SerializedDataId
from ..queries import Query
from ..registry import CollectionArgType, NoDefaultCollectionError, Registry, RegistryDefaults
Expand Down Expand Up @@ -104,6 +109,7 @@ class RemoteButler(Butler): # numpydoc ignore=PR02
_connection: RemoteButlerHttpConnection
_cache: RemoteButlerCache
_registry: Registry
_datastore_cache_manager: AbstractDatastoreCacheManager | None

# This is __new__ instead of __init__ because we have to support
# instantiation via the legacy constructor Butler.__new__(), which
Expand All @@ -125,6 +131,7 @@ def __new__(

self._connection = connection
self._cache = cache
self._datastore_cache_manager = None

# Avoid a circular import by deferring this import.
from ._registry import RemoteButlerRegistry
Expand Down Expand Up @@ -171,6 +178,25 @@ def dimensions(self) -> DimensionUniverse:
cache.dimensions = universe
return cache.dimensions

@property
def datastore_cache_manager(self) -> AbstractDatastoreCacheManager:
"""Cache manager to use when reading files from the butler."""
# RemoteButler does not get any cache configuration from the server.
# Read the Datastore default config (which is a FileDatastore)
# and obtain the default cache manager configuration. Overrides will
# come from environment variables. This will not work properly if the
# defaults for DatastoreConfig no longer include the cache.
if self._datastore_cache_manager is None:
datastore_config = DatastoreConfig()
self._datastore_cache_manager: AbstractDatastoreCacheManager
if "cached" in datastore_config:
self._datastore_cache_manager = DatastoreCacheManager(
datastore_config["cached"], universe=self.dimensions
)
else:
self._datastore_cache_manager = DatastoreDisabledCacheManager("", universe=self.dimensions)

Check warning on line 197 in python/lsst/daf/butler/remote_butler/_remote_butler.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/remote_butler/_remote_butler.py#L197

Added line #L197 was not covered by tests
return self._datastore_cache_manager

def _caching_context(self) -> AbstractContextManager[None]:
# Docstring inherited.
# Not implemented for now, will have to think whether this needs to
Expand Down Expand Up @@ -253,6 +279,7 @@ def _get_dataset_as_python_object(
ref,
_to_file_payload(model),
parameters=parameters,
cache_manager=self.datastore_cache_manager,
)

def _get_file_info(
Expand Down

0 comments on commit 73d150a

Please sign in to comment.