Skip to content

Commit

Permalink
Merge pull request #867 from lsst/tickets/DM-40120
Browse files Browse the repository at this point in the history
DM-40120: Add without_datastore flag to Butler
  • Loading branch information
timj authored Jul 21, 2023
2 parents 1fe3838 + 34be32a commit 4cd0e03
Show file tree
Hide file tree
Showing 15 changed files with 263 additions and 15 deletions.
2 changes: 2 additions & 0 deletions doc/changes/DM-40120.api.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Added new parameter ``without_datastore`` to the ``Butler`` and ``ButlerConfig`` constructors to allow a butler to be created that can not access a datastore.
This can be helpful if you want to query registry without requiring the overhead of the datastore.
16 changes: 12 additions & 4 deletions python/lsst/daf/butler/_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
DimensionRecord,
DimensionUniverse,
FileDataset,
NullDatastore,
Progress,
StorageClass,
StorageClassFactory,
Expand Down Expand Up @@ -149,6 +150,9 @@ class Butler(LimitedButler):
the default for that dimension. Nonexistent collections are ignored.
If a default value is provided explicitly for a governor dimension via
``**kwargs``, no default will be inferred for that dimension.
without_datastore : `bool`, optional
If `True` do not attach a datastore to this butler. Any attempts
to use a datastore will fail.
**kwargs : `str`
Default data ID key-value pairs. These may only identify "governor"
dimensions like ``instrument`` and ``skymap``.
Expand Down Expand Up @@ -203,6 +207,7 @@ def __init__(
searchPaths: Sequence[ResourcePathExpression] | None = None,
writeable: bool | None = None,
inferDefaults: bool = True,
without_datastore: bool = False,
**kwargs: str,
):
defaults = RegistryDefaults(collections=collections, run=run, infer=inferDefaults, **kwargs)
Expand All @@ -217,7 +222,7 @@ def __init__(
self.storageClasses = butler.storageClasses
self._config: ButlerConfig = butler._config
else:
self._config = ButlerConfig(config, searchPaths=searchPaths)
self._config = ButlerConfig(config, searchPaths=searchPaths, without_datastore=without_datastore)
try:
if "root" in self._config:
butlerRoot = self._config["root"]
Expand All @@ -228,9 +233,12 @@ def __init__(
self._registry = _RegistryFactory(self._config).from_config(
butlerRoot=butlerRoot, writeable=writeable, defaults=defaults
)
self._datastore = Datastore.fromConfig(
self._config, self._registry.getDatastoreBridgeManager(), butlerRoot=butlerRoot
)
if without_datastore:
self._datastore = NullDatastore(None, None)
else:
self._datastore = Datastore.fromConfig(
self._config, self._registry.getDatastoreBridgeManager(), butlerRoot=butlerRoot
)
self.storageClasses = StorageClassFactory()
self.storageClasses.addFromConfig(self._config)
except Exception:
Expand Down
11 changes: 10 additions & 1 deletion python/lsst/daf/butler/_butlerConfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,15 @@ class ButlerConfig(Config):
than those read from the environment in
`ConfigSubset.defaultSearchPaths()`. They are only read if ``other``
refers to a configuration file or directory.
without_datastore : `bool`, optional
If `True` remove the datastore configuration.
"""

def __init__(
self,
other: ResourcePathExpression | Config | None = None,
searchPaths: Sequence[ResourcePathExpression] | None = None,
without_datastore: bool = False,
):
self.configDir: ResourcePath | None = None

Expand Down Expand Up @@ -155,6 +158,13 @@ def __init__(
# configuration classes. We ask each of them to apply defaults to
# the values we have been supplied by the user.
for configClass in CONFIG_COMPONENT_CLASSES:
assert configClass.component is not None, "Config class component cannot be None"

if without_datastore and configClass is DatastoreConfig:
if configClass.component in butlerConfig:
del butlerConfig[configClass.component]
continue

# Only send the parent config if the child
# config component is present (otherwise it assumes that the
# keys from other components are part of the child)
Expand All @@ -163,7 +173,6 @@ def __init__(
localOverrides = butlerConfig
config = configClass(localOverrides, searchPaths=searchPaths)
# Re-attach it using the global namespace
assert configClass.component is not None, "Config class component cannot be None"
self.update({configClass.component: config})
# Remove the key from the butlerConfig since we have already
# merged that information.
Expand Down
124 changes: 123 additions & 1 deletion python/lsst/daf/butler/core/datastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,12 @@

from __future__ import annotations

__all__ = ("DatastoreConfig", "Datastore", "DatastoreValidationError", "DatasetRefURIs")
__all__ = ("DatastoreConfig", "Datastore", "DatastoreValidationError", "DatasetRefURIs", "NullDatastore")

import contextlib
import dataclasses
import logging
import time
from abc import ABCMeta, abstractmethod
from collections import abc, defaultdict
from collections.abc import Callable, Iterable, Iterator, Mapping
Expand All @@ -50,6 +51,8 @@
from .datastoreRecordData import DatastoreRecordData
from .storageClass import StorageClass

_LOG = logging.getLogger(__name__)


class DatastoreConfig(ConfigSubset):
"""Configuration for Datastores."""
Expand Down Expand Up @@ -1205,3 +1208,122 @@ def set_retrieve_dataset_type_method(self, method: Callable[[str], DatasetType |
guess dataset location based on its stored dataset type.
"""
pass


class NullDatastore(Datastore):
"""A datastore that implements the `Datastore` API but always fails when
it accepts any request.
"""

@classmethod
def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None:
# Nothing to do. This is not a real Datastore.
pass

def __init__(
self,
config: Config | ResourcePathExpression | None,
bridgeManager: DatastoreRegistryBridgeManager | None,
butlerRoot: ResourcePathExpression | None = None,
):
# Name ourselves with the timestamp the datastore
# was created.
self.name = f"{type(self).__name__}@{time.time()}"
_LOG.debug("Creating datastore %s", self.name)

return

def knows(self, ref: DatasetRef) -> bool:
return False

def exists(self, datasetRef: DatasetRef) -> bool:
return False

def get(
self,
datasetRef: DatasetRef,
parameters: Mapping[str, Any] | None = None,
storageClass: StorageClass | str | None = None,
) -> Any:
raise FileNotFoundError("This is a no-op datastore that can not access a real datastore")

def put(self, inMemoryDataset: Any, datasetRef: DatasetRef) -> None:
raise NotImplementedError("This is a no-op datastore that can not access a real datastore")

def ingest(
self, *datasets: FileDataset, transfer: str | None = None, record_validation_info: bool = True
) -> None:
raise NotImplementedError("This is a no-op datastore that can not access a real datastore")

def transfer_from(
self,
source_datastore: Datastore,
refs: Iterable[DatasetRef],
transfer: str = "auto",
artifact_existence: dict[ResourcePath, bool] | None = None,
) -> tuple[set[DatasetRef], set[DatasetRef]]:
raise NotImplementedError("This is a no-op datastore that can not access a real datastore")

def getURIs(self, datasetRef: DatasetRef, predict: bool = False) -> DatasetRefURIs:
raise FileNotFoundError("This is a no-op datastore that can not access a real datastore")

def getURI(self, datasetRef: DatasetRef, predict: bool = False) -> ResourcePath:
raise FileNotFoundError("This is a no-op datastore that can not access a real datastore")

def retrieveArtifacts(
self,
refs: Iterable[DatasetRef],
destination: ResourcePath,
transfer: str = "auto",
preserve_path: bool = True,
overwrite: bool = False,
) -> list[ResourcePath]:
raise NotImplementedError("This is a no-op datastore that can not access a real datastore")

def remove(self, datasetRef: DatasetRef) -> None:
raise NotImplementedError("This is a no-op datastore that can not access a real datastore")

def forget(self, refs: Iterable[DatasetRef]) -> None:
raise NotImplementedError("This is a no-op datastore that can not access a real datastore")

def trash(self, ref: DatasetRef | Iterable[DatasetRef], ignore_errors: bool = True) -> None:
raise NotImplementedError("This is a no-op datastore that can not access a real datastore")

def emptyTrash(self, ignore_errors: bool = True) -> None:
raise NotImplementedError("This is a no-op datastore that can not access a real datastore")

def transfer(self, inputDatastore: Datastore, datasetRef: DatasetRef) -> None:
raise NotImplementedError("This is a no-op datastore that can not access a real datastore")

def export(
self,
refs: Iterable[DatasetRef],
*,
directory: ResourcePathExpression | None = None,
transfer: str | None = "auto",
) -> Iterable[FileDataset]:
raise NotImplementedError("This is a no-op datastore that can not access a real datastore")

def validateConfiguration(
self, entities: Iterable[DatasetRef | DatasetType | StorageClass], logFailures: bool = False
) -> None:
# No configuration so always validates.
pass

def validateKey(self, lookupKey: LookupKey, entity: DatasetRef | DatasetType | StorageClass) -> None:
pass

def getLookupKeys(self) -> set[LookupKey]:
raise NotImplementedError("This is a no-op datastore that can not access a real datastore")

def import_records(
self,
data: Mapping[str, DatastoreRecordData],
) -> None:
raise NotImplementedError("This is a no-op datastore that can not access a real datastore")

def export_records(
self,
refs: Iterable[DatasetIdRef],
) -> Mapping[str, DatastoreRecordData]:
raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
2 changes: 1 addition & 1 deletion python/lsst/daf/butler/script/certifyCalibrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def certifyCalibrations(
Search all children of the inputCollection if it is a CHAINED
collection, instead of just the most recent one.
"""
butler = Butler(repo, writeable=True)
butler = Butler(repo, writeable=True, without_datastore=True)
registry = butler.registry
timespan = Timespan(
begin=astropy.time.Time(begin_date, scale="tai") if begin_date is not None else None,
Expand Down
2 changes: 1 addition & 1 deletion python/lsst/daf/butler/script/collectionChain.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def collectionChain(
chain : `tuple` of `str`
The collections in the chain following this command.
"""
butler = Butler(repo, writeable=True)
butler = Butler(repo, writeable=True, without_datastore=True)

# Every mode needs children except pop.
if not children and mode != "pop":
Expand Down
2 changes: 1 addition & 1 deletion python/lsst/daf/butler/script/queryCollections.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def _getTree(
names=("Name", "Type"),
dtype=(str, str),
)
butler = Butler(repo)
butler = Butler(repo, without_datastore=True)

def addCollection(name: str, level: int = 0) -> None:
collectionType = butler.registry.getCollectionType(name)
Expand Down
2 changes: 1 addition & 1 deletion python/lsst/daf/butler/script/queryDataIds.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def queryDataIds(
Docstring for supported parameters is the same as
`~lsst.daf.butler.Registry.queryDataIds`.
"""
butler = Butler(repo)
butler = Butler(repo, without_datastore=True)

if datasets and collections and not dimensions:
# Determine the dimensions relevant to all given dataset types.
Expand Down
2 changes: 1 addition & 1 deletion python/lsst/daf/butler/script/queryDatasetTypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def queryDatasetTypes(repo: str, verbose: bool, glob: Iterable[str], components:
A dict whose key is "datasetTypes" and whose value is a list of
collection names.
"""
butler = Butler(repo)
butler = Butler(repo, without_datastore=True)
expression = glob if glob else ...
datasetTypes = butler.registry.queryDatasetTypes(components=components, expression=expression)
if verbose:
Expand Down
4 changes: 3 additions & 1 deletion python/lsst/daf/butler/script/queryDatasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,9 @@ def __init__(
):
if (repo and butler) or (not repo and not butler):
raise RuntimeError("One of repo and butler must be provided and the other must be None.")
self.butler = butler or Butler(repo)
# show_uri requires a datastore.
without_datastore = False if show_uri else True
self.butler = butler or Butler(repo, without_datastore=without_datastore)
self._getDatasets(glob, collections, where, find_first)
self.showUri = show_uri

Expand Down
2 changes: 1 addition & 1 deletion python/lsst/daf/butler/script/queryDimensionRecords.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def queryDimensionRecords(
`~lsst.daf.butler.Registry.queryDimensionRecords` except for ``no_check``,
which is the inverse of ``check``.
"""
butler = Butler(repo)
butler = Butler(repo, without_datastore=True)

query_collections: Iterable[str] | EllipsisType | None = None
if datasets:
Expand Down
2 changes: 1 addition & 1 deletion python/lsst/daf/butler/script/register_dataset_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def register_dataset_type(
be created by this command. They are always derived from the composite
dataset type.
"""
butler = Butler(repo, writeable=True)
butler = Butler(repo, writeable=True, without_datastore=True)

composite, component = DatasetType.splitDatasetTypeName(dataset_type)
if component:
Expand Down
2 changes: 1 addition & 1 deletion python/lsst/daf/butler/script/removeDatasetType.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,5 +37,5 @@ def removeDatasetType(repo: str, dataset_type_name: tuple[str, ...]) -> None:
datasetTypeName : `str`
The name of the dataset type to be removed.
"""
butler = Butler(repo, writeable=True)
butler = Butler(repo, writeable=True, without_datastore=True)
butler.registry.removeDatasetType(dataset_type_name)
51 changes: 51 additions & 0 deletions tests/test_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ def mock_s3(*args: Any, **kwargs: Any) -> Any: # type: ignore[no-untyped-def]
FileDataset,
FileTemplate,
FileTemplateValidationError,
NullDatastore,
StorageClassFactory,
ValidationError,
script,
Expand Down Expand Up @@ -2332,6 +2333,56 @@ class ChainedDatastoreTransfers(PosixDatastoreTransfers):
configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")


class NullDatastoreTestCase(unittest.TestCase):
"""Test that we can fall back to a null datastore."""

# Need a good config to create the repo.
configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")

@classmethod
def setUpClass(cls) -> None:
cls.storageClassFactory = StorageClassFactory()
cls.storageClassFactory.addFromConfig(cls.configFile)

def setUp(self) -> None:
"""Create a new butler root for each test."""
self.root = makeTestTempDir(TESTDIR)
Butler.makeRepo(self.root, config=Config(self.configFile))

def tearDown(self) -> None:
removeTestTempDir(self.root)

def test_fallback(self) -> None:
# Read the butler config and mess with the datastore section.
bad_config = Config(os.path.join(self.root, "butler.yaml"))
bad_config["datastore", "cls"] = "lsst.not.a.datastore.Datastore"

with self.assertRaises(RuntimeError):
Butler(bad_config)

butler = Butler(bad_config, writeable=True, without_datastore=True)
self.assertIsInstance(butler._datastore, NullDatastore)

# Check that registry is working.
butler.registry.registerRun("MYRUN")
collections = butler.registry.queryCollections(...)
self.assertIn("MYRUN", set(collections))

# Create a ref.
dimensions = butler.dimensions.extract([])
storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict")
datasetTypeName = "metric"
datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
butler.registry.registerDatasetType(datasetType)
ref = DatasetRef(datasetType, {}, run="MYRUN")

# Check that datastore will complain.
with self.assertRaises(FileNotFoundError):
butler.get(ref)
with self.assertRaises(FileNotFoundError):
butler.getURI(ref)


def setup_module(module: types.ModuleType) -> None:
"""Set up the module for pytest."""
clean_environment()
Expand Down
Loading

0 comments on commit 4cd0e03

Please sign in to comment.