diff --git a/python/lsst/daf/butler/_butler.py b/python/lsst/daf/butler/_butler.py
index d8b04781d2..2b04691631 100644
--- a/python/lsst/daf/butler/_butler.py
+++ b/python/lsst/daf/butler/_butler.py
@@ -51,6 +51,7 @@
from ._dataset_existence import DatasetExistence
from ._deferredDatasetHandle import DeferredDatasetHandle
from ._limited_butler import LimitedButler
+from ._registry_shim import RegistryShim
from .core import (
Config,
ConfigSubset,
@@ -85,6 +86,8 @@
Registry,
RegistryConfig,
RegistryDefaults,
+ _ButlerRegistry,
+ _RegistryFactory,
)
from .transfers import RepoExportContext
@@ -209,7 +212,7 @@ def __init__(
raise TypeError(
"Cannot pass 'config', 'searchPaths', or 'writeable' arguments with 'butler' argument."
)
- self.registry = butler.registry.copy(defaults)
+ self._registry = butler._registry.copy(defaults)
self._datastore = butler._datastore
self.storageClasses = butler.storageClasses
self._config: ButlerConfig = butler._config
@@ -222,11 +225,11 @@ def __init__(
butlerRoot = self._config.configDir
if writeable is None:
writeable = run is not None
- self.registry = Registry.fromConfig(
- self._config, butlerRoot=butlerRoot, writeable=writeable, defaults=defaults
+ self._registry = _RegistryFactory(self._config).from_config(
+ butlerRoot=butlerRoot, writeable=writeable, defaults=defaults
)
self._datastore = Datastore.fromConfig(
- self._config, self.registry.getDatastoreBridgeManager(), butlerRoot=butlerRoot
+ self._config, self._registry.getDatastoreBridgeManager(), butlerRoot=butlerRoot
)
self.storageClasses = StorageClassFactory()
self.storageClasses.addFromConfig(self._config)
@@ -245,6 +248,8 @@ def __init__(
if "run" in self._config or "collection" in self._config:
raise ValueError("Passing a run or collection via configuration is no longer supported.")
+ self._registry_shim = RegistryShim(self)
+
GENERATION: ClassVar[int] = 3
"""This is a Generation 3 Butler.
@@ -256,7 +261,7 @@ def __init__(
def _retrieve_dataset_type(self, name: str) -> DatasetType | None:
"""Return DatasetType defined in registry given dataset type name."""
try:
- return self.registry.getDatasetType(name)
+ return self._registry.getDatasetType(name)
except MissingDatasetTypeError:
return None
@@ -458,7 +463,9 @@ def makeRepo(
# Create Registry and populate tables
registryConfig = RegistryConfig(config.get("registry"))
dimensionConfig = DimensionConfig(dimensionConfig)
- Registry.createFromConfig(registryConfig, dimensionConfig=dimensionConfig, butlerRoot=root_uri)
+ _RegistryFactory(registryConfig).create_from_config(
+ dimensionConfig=dimensionConfig, butlerRoot=root_uri
+ )
log.verbose("Wrote new Butler configuration file to %s", configURI)
@@ -517,19 +524,19 @@ def __reduce__(self) -> tuple:
self._config,
self.collections,
self.run,
- self.registry.defaults.dataId.byName(),
- self.registry.isWriteable(),
+ self._registry.defaults.dataId.byName(),
+ self._registry.isWriteable(),
),
)
def __str__(self) -> str:
return "Butler(collections={}, run={}, datastore='{}', registry='{}')".format(
- self.collections, self.run, self._datastore, self.registry
+ self.collections, self.run, self._datastore, self._registry
)
def isWriteable(self) -> bool:
"""Return `True` if this `Butler` supports write operations."""
- return self.registry.isWriteable()
+ return self._registry.isWriteable()
@contextlib.contextmanager
def transaction(self) -> Iterator[None]:
@@ -537,7 +544,7 @@ def transaction(self) -> Iterator[None]:
Transactions can be nested.
"""
- with self.registry.transaction():
+ with self._registry.transaction():
with self._datastore.transaction():
yield
@@ -602,11 +609,11 @@ def _standardizeArgs(
if isinstance(datasetRefOrType, DatasetType):
externalDatasetType = datasetRefOrType
else:
- internalDatasetType = self.registry.getDatasetType(datasetRefOrType)
+ internalDatasetType = self._registry.getDatasetType(datasetRefOrType)
# Check that they are self-consistent
if externalDatasetType is not None:
- internalDatasetType = self.registry.getDatasetType(externalDatasetType.name)
+ internalDatasetType = self._registry.getDatasetType(externalDatasetType.name)
if externalDatasetType != internalDatasetType:
# We can allow differences if they are compatible, depending
# on whether this is a get or a put. A get requires that
@@ -882,7 +889,7 @@ def _rewrite_data_id(
)
# Get the actual record and compare with these values.
try:
- recs = list(self.registry.queryDimensionRecords(dimensionName, dataId=newDataId))
+ recs = list(self._registry.queryDimensionRecords(dimensionName, dataId=newDataId))
except DataIdError:
raise ValueError(
f"Could not find dimension '{dimensionName}'"
@@ -911,7 +918,7 @@ def _rewrite_data_id(
# Hopefully we get a single record that matches
records = set(
- self.registry.queryDimensionRecords(
+ self._registry.queryDimensionRecords(
dimensionName, dataId=newDataId, where=where, bind=bind, **kwargs
)
)
@@ -927,7 +934,7 @@ def _rewrite_data_id(
and "visit_system" in self.dimensions["instrument"].metadata
):
instrument_records = list(
- self.registry.queryDimensionRecords(
+ self._registry.queryDimensionRecords(
"instrument",
dataId=newDataId,
**kwargs,
@@ -943,7 +950,7 @@ def _rewrite_data_id(
# visit_system_membership records.
for rec in records:
membership = list(
- self.registry.queryDimensionRecords(
+ self._registry.queryDimensionRecords(
# Use bind to allow zero results.
# This is a fully-specified query.
"visit_system_membership",
@@ -1055,21 +1062,21 @@ def _findDatasetRef(
# dimensions that provide temporal information for a validity-range
# lookup.
dataId = DataCoordinate.standardize(
- dataId, universe=self.dimensions, defaults=self.registry.defaults.dataId, **kwargs
+ dataId, universe=self.dimensions, defaults=self._registry.defaults.dataId, **kwargs
)
if dataId.graph.temporal:
- dataId = self.registry.expandDataId(dataId)
+ dataId = self._registry.expandDataId(dataId)
timespan = dataId.timespan
else:
# Standardize the data ID to just the dimensions of the dataset
# type instead of letting registry.findDataset do it, so we get the
# result even if no dataset is found.
dataId = DataCoordinate.standardize(
- dataId, graph=datasetType.dimensions, defaults=self.registry.defaults.dataId, **kwargs
+ dataId, graph=datasetType.dimensions, defaults=self._registry.defaults.dataId, **kwargs
)
# Always lookup the DatasetRef, even if one is given, to ensure it is
# present in the current collection.
- ref = self.registry.findDataset(datasetType, dataId, collections=collections, timespan=timespan)
+ ref = self._registry.findDataset(datasetType, dataId, collections=collections, timespan=timespan)
if ref is None:
if predict:
if run is None:
@@ -1079,7 +1086,7 @@ def _findDatasetRef(
return DatasetRef(datasetType, dataId, run=run)
else:
if collections is None:
- collections = self.registry.defaults.collections
+ collections = self._registry.defaults.collections
raise LookupError(
f"Dataset {datasetType.name} with data ID {dataId} "
f"could not be found in collections {collections}."
@@ -1160,7 +1167,7 @@ def put(
# dataset type and DataId, then _importDatasets will do nothing and
# just return an original ref. We have to raise in this case, there
# is a datastore check below for that.
- self.registry._importDatasets([datasetRefOrType], expand=True)
+ self._registry._importDatasets([datasetRefOrType], expand=True)
# Before trying to write to the datastore check that it does not
# know this dataset. This is prone to races, of course.
if self._datastore.knows(datasetRefOrType):
@@ -1183,8 +1190,8 @@ def put(
dataId, kwargs = self._rewrite_data_id(dataId, datasetType, **kwargs)
# Add Registry Dataset entry.
- dataId = self.registry.expandDataId(dataId, graph=datasetType.dimensions, **kwargs)
- (ref,) = self.registry.insertDatasets(datasetType, run=run, dataIds=[dataId])
+ dataId = self._registry.expandDataId(dataId, graph=datasetType.dimensions, **kwargs)
+ (ref,) = self._registry.insertDatasets(datasetType, run=run, dataIds=[dataId])
self._datastore.put(obj, ref)
return ref
@@ -1622,7 +1629,7 @@ def exists(
if data_id is not None:
warnings.warn("A DataID should not be specified with DatasetRef", stacklevel=2)
ref = dataset_ref_or_type
- registry_ref = self.registry.getDataset(dataset_ref_or_type.id)
+ registry_ref = self._registry.getDataset(dataset_ref_or_type.id)
if registry_ref is not None:
existence |= DatasetExistence.RECORDED
@@ -1692,7 +1699,7 @@ def _exists_many(
# Registry does not have a bulk API to check for a ref.
for ref in refs:
- registry_ref = self.registry.getDataset(ref.id)
+ registry_ref = self._registry.getDataset(ref.id)
if registry_ref is not None:
# It is possible, albeit unlikely, that the given ref does
# not match the one in registry even though the UUID matches.
@@ -1769,7 +1776,7 @@ def datasetExists(
"""
# A resolved ref may be given that is not known to this butler.
if isinstance(datasetRefOrType, DatasetRef):
- ref = self.registry.getDataset(datasetRefOrType.id)
+ ref = self._registry.getDataset(datasetRefOrType.id)
if ref is None:
raise LookupError(
f"Resolved DatasetRef with id {datasetRefOrType.id} is not known to registry."
@@ -1804,18 +1811,18 @@ def removeRuns(self, names: Iterable[str], unstore: bool = True) -> None:
names = list(names)
refs: list[DatasetRef] = []
for name in names:
- collectionType = self.registry.getCollectionType(name)
+ collectionType = self._registry.getCollectionType(name)
if collectionType is not CollectionType.RUN:
raise TypeError(f"The collection type of '{name}' is {collectionType.name}, not RUN.")
- refs.extend(self.registry.queryDatasets(..., collections=name, findFirst=True))
+ refs.extend(self._registry.queryDatasets(..., collections=name, findFirst=True))
with self._datastore.transaction():
- with self.registry.transaction():
+ with self._registry.transaction():
if unstore:
self._datastore.trash(refs)
else:
self._datastore.forget(refs)
for name in names:
- self.registry.removeCollection(name)
+ self._registry.removeCollection(name)
if unstore:
# Point of no return for removing artifacts
self._datastore.emptyTrash()
@@ -1843,7 +1850,7 @@ def pruneDatasets(
if not tags:
raise TypeError("No tags provided but disassociate=True.")
for tag in tags:
- collectionType = self.registry.getCollectionType(tag)
+ collectionType = self._registry.getCollectionType(tag)
if collectionType is not CollectionType.TAGGED:
raise TypeError(
f"Cannot disassociate from collection '{tag}' "
@@ -1864,15 +1871,15 @@ def pruneDatasets(
# but shouldn't change them), and hence all operations here are
# Registry operations.
with self._datastore.transaction():
- with self.registry.transaction():
+ with self._registry.transaction():
if unstore:
self._datastore.trash(refs)
if purge:
- self.registry.removeDatasets(refs)
+ self._registry.removeDatasets(refs)
elif disassociate:
assert tags, "Guaranteed by earlier logic in this function."
for tag in tags:
- self.registry.disassociate(tag, refs)
+ self._registry.disassociate(tag, refs)
# We've exited the Registry transaction, and apparently committed.
# (if there was an exception, everything rolled back, and it's as if
# nothing happened - and we never get here).
@@ -2041,7 +2048,7 @@ def ingest(
# Import the refs and expand the DataCoordinates since we can't
# guarantee that they are expanded and Datastore will need
# the records.
- imported_refs = self.registry._importDatasets(refs_to_import, expand=True)
+ imported_refs = self._registry._importDatasets(refs_to_import, expand=True)
assert set(imported_refs) == set(refs_to_import)
# Replace all the refs in the FileDataset with expanded versions.
@@ -2138,7 +2145,7 @@ def export(
backend = BackendClass(stream, universe=self.dimensions)
try:
helper = RepoExportContext(
- self.registry, self._datastore, backend=backend, directory=directory, transfer=transfer
+ self._registry, self._datastore, backend=backend, directory=directory, transfer=transfer
)
yield helper
except BaseException:
@@ -2228,7 +2235,7 @@ def import_(
)
def doImport(importStream: TextIO | ResourceHandleProtocol) -> None:
- backend = BackendClass(importStream, self.registry) # type: ignore[call-arg]
+ backend = BackendClass(importStream, self._registry) # type: ignore[call-arg]
backend.register()
with self.transaction():
backend.load(
@@ -2348,11 +2355,11 @@ def transfer_from(
# on to find additional inconsistent dataset types
# might result in additional unwanted dataset types being
# registered.
- if self.registry.registerDatasetType(datasetType):
+ if self._registry.registerDatasetType(datasetType):
newly_registered_dataset_types.add(datasetType)
else:
# If the dataset type is missing, let it fail immediately.
- target_dataset_type = self.registry.getDatasetType(datasetType.name)
+ target_dataset_type = self._registry.getDatasetType(datasetType.name)
if target_dataset_type != datasetType:
raise ConflictingDefinitionError(
"Source butler dataset type differs from definition"
@@ -2407,7 +2414,7 @@ def transfer_from(
# Assume that if the record is already present that we can
# use it without having to check that the record metadata
# is consistent.
- self.registry.insertDimensionData(element, *records, skip_existing=True)
+ self._registry.insertDimensionData(element, *records, skip_existing=True)
n_imported = 0
for (datasetType, run), refs_to_import in progress.iter_item_chunks(
@@ -2419,7 +2426,7 @@ def transfer_from(
run_doc = None
if registry := getattr(source_butler, "registry", None):
run_doc = registry.getCollectionDocumentation(run)
- registered = self.registry.registerRun(run, doc=run_doc)
+ registered = self._registry.registerRun(run, doc=run_doc)
handled_collections.add(run)
if registered:
log.log(VERBOSE, "Creating output run %s", run)
@@ -2435,7 +2442,7 @@ def transfer_from(
# Assume we are using UUIDs and the source refs will match
# those imported.
- imported_refs = self.registry._importDatasets(refs_to_import, expand=False)
+ imported_refs = self._registry._importDatasets(refs_to_import, expand=False)
assert set(imported_refs) == set(refs_to_import)
n_imported += len(imported_refs)
@@ -2493,9 +2500,9 @@ def validateConfiguration(
is configured.
"""
if datasetTypeNames:
- datasetTypes = [self.registry.getDatasetType(name) for name in datasetTypeNames]
+ datasetTypes = [self._registry.getDatasetType(name) for name in datasetTypeNames]
else:
- datasetTypes = list(self.registry.queryDatasetTypes())
+ datasetTypes = list(self._registry.queryDatasetTypes())
# filter out anything from the ignore list
if ignore:
@@ -2513,7 +2520,7 @@ def validateConfiguration(
# Find all the registered instruments (if "instrument" is in the
# universe).
if "instrument" in self.dimensions:
- instruments = {record.name for record in self.registry.queryDimensionRecords("instrument")}
+ instruments = {record.name for record in self._registry.queryDimensionRecords("instrument")}
for datasetType in datasetTypes:
if "instrument" in datasetType.dimensions:
@@ -2563,7 +2570,7 @@ def validateConfiguration(
pass
else:
try:
- self.registry.getDatasetType(key.name)
+ self._registry.getDatasetType(key.name)
except KeyError:
if logFailures:
log.critical("Key '%s' does not correspond to a DatasetType or StorageClass", key)
@@ -2612,7 +2619,7 @@ def collections(self) -> Sequence[str]:
by assigning a new `RegistryDefaults` instance to
``self.registry.defaults``.
"""
- return self.registry.defaults.collections
+ return self._registry.defaults.collections
@property
def run(self) -> str | None:
@@ -2624,15 +2631,27 @@ def run(self) -> str | None:
assigning a new `RegistryDefaults` instance to
``self.registry.defaults``.
"""
- return self.registry.defaults.run
+ return self._registry.defaults.run
+
+ @property
+ def registry(self) -> Registry:
+ """The object that manages dataset metadata and relationships
+ (`Registry`).
+
+ Many operations that don't involve reading or writing butler datasets
+ are accessible only via `Registry` methods. Eventually these methods
+ will be replaced by equivalent `Butler` methods.
+ """
+ return self._registry_shim
@property
def dimensions(self) -> DimensionUniverse:
# Docstring inherited.
- return self.registry.dimensions
+ return self._registry.dimensions
- registry: Registry
- """The object that manages dataset metadata and relationships (`Registry`).
+ _registry: _ButlerRegistry
+ """The object that manages dataset metadata and relationships
+ (`_ButlerRegistry`).
Most operations that don't involve reading or writing butler datasets are
accessible only via `Registry` methods.
diff --git a/python/lsst/daf/butler/_registry_shim.py b/python/lsst/daf/butler/_registry_shim.py
new file mode 100644
index 0000000000..3cc12f3944
--- /dev/null
+++ b/python/lsst/daf/butler/_registry_shim.py
@@ -0,0 +1,392 @@
+# This file is part of daf_butler.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (http://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+from __future__ import annotations
+
+__all__ = ("Registry",)
+
+import contextlib
+from collections.abc import Iterable, Iterator, Mapping, Sequence
+from typing import TYPE_CHECKING, Any
+
+from .core import (
+ DataCoordinate,
+ DataId,
+ DatasetAssociation,
+ DatasetId,
+ DatasetIdGenEnum,
+ DatasetRef,
+ DatasetType,
+ Dimension,
+ DimensionElement,
+ DimensionGraph,
+ DimensionRecord,
+ DimensionUniverse,
+ NameLookupMapping,
+ Timespan,
+)
+from .registry import Registry
+from .registry._collection_summary import CollectionSummary
+from .registry._collectionType import CollectionType
+from .registry._defaults import RegistryDefaults
+from .registry.queries import DataCoordinateQueryResults, DatasetQueryResults, DimensionRecordQueryResults
+
+if TYPE_CHECKING:
+ from ._butler import Butler
+ from .registry._registry import CollectionArgType
+ from .registry.interfaces import ObsCoreTableManager
+
+
+class RegistryShim(Registry):
+ """Implementation of `Registry` interface exposed to clients by `Butler`.
+
+ Parameters
+ ----------
+ butler : `Butler`
+ Data butler instance.
+
+ Notes
+ -----
+ This shim implementation of `Registry` forwards all methods to an actual
+ Registry instance which is internal to Butler or to Butler methods. Its
+ purpose is to provide a stable interface to many client-visible operations
+ while we perform re-structuring of Registry and Butler implementations.
+ """
+
+ def __init__(self, butler: Butler):
+ self._butler = butler
+ self._registry = butler._registry
+
+ def isWriteable(self) -> bool:
+ # Docstring inherited from a base class.
+ return self._registry.isWriteable()
+
+ @property
+ def dimensions(self) -> DimensionUniverse:
+ # Docstring inherited from a base class.
+ return self._registry.dimensions
+
+ @property
+ def defaults(self) -> RegistryDefaults:
+ # Docstring inherited from a base class.
+ return self._registry.defaults
+
+ @defaults.setter
+ def defaults(self, value: RegistryDefaults) -> None:
+ # Docstring inherited from a base class.
+ self._registry.defaults = value
+
+ def refresh(self) -> None:
+ # Docstring inherited from a base class.
+ self._registry.refresh()
+
+ @contextlib.contextmanager
+ def transaction(self, *, savepoint: bool = False) -> Iterator[None]:
+ # Docstring inherited from a base class.
+ with self._registry.transaction(savepoint=savepoint):
+ yield
+
+ def resetConnectionPool(self) -> None:
+ # Docstring inherited from a base class.
+ self._registry.resetConnectionPool()
+
+ def registerCollection(
+ self, name: str, type: CollectionType = CollectionType.TAGGED, doc: str | None = None
+ ) -> bool:
+ # Docstring inherited from a base class.
+ return self._registry.registerCollection(name, type, doc)
+
+ def getCollectionType(self, name: str) -> CollectionType:
+ # Docstring inherited from a base class.
+ return self._registry.getCollectionType(name)
+
+ def registerRun(self, name: str, doc: str | None = None) -> bool:
+ # Docstring inherited from a base class.
+ return self._registry.registerRun(name, doc)
+
+ def removeCollection(self, name: str) -> None:
+ # Docstring inherited from a base class.
+ self._registry.removeCollection(name)
+
+ def getCollectionChain(self, parent: str) -> Sequence[str]:
+ # Docstring inherited from a base class.
+ return self._registry.getCollectionChain(parent)
+
+ def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None:
+ # Docstring inherited from a base class.
+ self._registry.setCollectionChain(parent, children, flatten=flatten)
+
+ def getCollectionParentChains(self, collection: str) -> set[str]:
+ # Docstring inherited from a base class.
+ return self._registry.getCollectionParentChains(collection)
+
+ def getCollectionDocumentation(self, collection: str) -> str | None:
+ # Docstring inherited from a base class.
+ return self._registry.getCollectionDocumentation(collection)
+
+ def setCollectionDocumentation(self, collection: str, doc: str | None) -> None:
+ # Docstring inherited from a base class.
+ self._registry.setCollectionDocumentation(collection, doc)
+
+ def getCollectionSummary(self, collection: str) -> CollectionSummary:
+ # Docstring inherited from a base class.
+ return self._registry.getCollectionSummary(collection)
+
+ def registerDatasetType(self, datasetType: DatasetType) -> bool:
+ # Docstring inherited from a base class.
+ return self._registry.registerDatasetType(datasetType)
+
+ def removeDatasetType(self, name: str | tuple[str, ...]) -> None:
+ # Docstring inherited from a base class.
+ self._registry.removeDatasetType(name)
+
+ def getDatasetType(self, name: str) -> DatasetType:
+ # Docstring inherited from a base class.
+ return self._registry.getDatasetType(name)
+
+ def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool:
+ # Docstring inherited from a base class.
+ return self._registry.supportsIdGenerationMode(mode)
+
+ def findDataset(
+ self,
+ datasetType: DatasetType | str,
+ dataId: DataId | None = None,
+ *,
+ collections: CollectionArgType | None = None,
+ timespan: Timespan | None = None,
+ **kwargs: Any,
+ ) -> DatasetRef | None:
+ # Docstring inherited from a base class.
+ return self._registry.findDataset(
+ datasetType, dataId, collections=collections, timespan=timespan, **kwargs
+ )
+
+ def insertDatasets(
+ self,
+ datasetType: DatasetType | str,
+ dataIds: Iterable[DataId],
+ run: str | None = None,
+ expand: bool = True,
+ idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
+ ) -> list[DatasetRef]:
+ # Docstring inherited from a base class.
+ return self._registry.insertDatasets(datasetType, dataIds, run, expand, idGenerationMode)
+
+ def _importDatasets(self, datasets: Iterable[DatasetRef], expand: bool = True) -> list[DatasetRef]:
+ # Docstring inherited from a base class.
+ return self._registry._importDatasets(datasets, expand)
+
+ def getDataset(self, id: DatasetId) -> DatasetRef | None:
+ # Docstring inherited from a base class.
+ return self._registry.getDataset(id)
+
+ def removeDatasets(self, refs: Iterable[DatasetRef]) -> None:
+ # Docstring inherited from a base class.
+ self._registry.removeDatasets(refs)
+
+ def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
+ # Docstring inherited from a base class.
+ self._registry.associate(collection, refs)
+
+ def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
+ # Docstring inherited from a base class.
+ self._registry.disassociate(collection, refs)
+
+ def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None:
+ # Docstring inherited from a base class.
+ self._registry.certify(collection, refs, timespan)
+
+ def decertify(
+ self,
+ collection: str,
+ datasetType: str | DatasetType,
+ timespan: Timespan,
+ *,
+ dataIds: Iterable[DataId] | None = None,
+ ) -> None:
+ # Docstring inherited from a base class.
+ self._registry.decertify(collection, datasetType, timespan, dataIds=dataIds)
+
+ def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]:
+ # Docstring inherited from a base class.
+ return self._registry.getDatasetLocations(ref)
+
+ def expandDataId(
+ self,
+ dataId: DataId | None = None,
+ *,
+ graph: DimensionGraph | None = None,
+ records: NameLookupMapping[DimensionElement, DimensionRecord | None] | None = None,
+ withDefaults: bool = True,
+ **kwargs: Any,
+ ) -> DataCoordinate:
+ # Docstring inherited from a base class.
+ return self._registry.expandDataId(
+ dataId, graph=graph, records=records, withDefaults=withDefaults, **kwargs
+ )
+
+ def insertDimensionData(
+ self,
+ element: DimensionElement | str,
+ *data: Mapping[str, Any] | DimensionRecord,
+ conform: bool = True,
+ replace: bool = False,
+ skip_existing: bool = False,
+ ) -> None:
+ # Docstring inherited from a base class.
+ self._registry.insertDimensionData(
+ element, *data, conform=conform, replace=replace, skip_existing=skip_existing
+ )
+
+ def syncDimensionData(
+ self,
+ element: DimensionElement | str,
+ row: Mapping[str, Any] | DimensionRecord,
+ conform: bool = True,
+ update: bool = False,
+ ) -> bool | dict[str, Any]:
+ # Docstring inherited from a base class.
+ return self._registry.syncDimensionData(element, row, conform, update)
+
+ def queryDatasetTypes(
+ self,
+ expression: Any = ...,
+ *,
+ components: bool | None = None,
+ missing: list[str] | None = None,
+ ) -> Iterable[DatasetType]:
+ # Docstring inherited from a base class.
+ return self._registry.queryDatasetTypes(expression, components=components, missing=missing)
+
+ def queryCollections(
+ self,
+ expression: Any = ...,
+ datasetType: DatasetType | None = None,
+ collectionTypes: Iterable[CollectionType] | CollectionType = CollectionType.all(),
+ flattenChains: bool = False,
+ includeChains: bool | None = None,
+ ) -> Sequence[str]:
+ # Docstring inherited from a base class.
+ return self._registry.queryCollections(
+ expression, datasetType, collectionTypes, flattenChains, includeChains
+ )
+
+ def queryDatasets(
+ self,
+ datasetType: Any,
+ *,
+ collections: CollectionArgType | None = None,
+ dimensions: Iterable[Dimension | str] | None = None,
+ dataId: DataId | None = None,
+ where: str = "",
+ findFirst: bool = False,
+ components: bool | None = None,
+ bind: Mapping[str, Any] | None = None,
+ check: bool = True,
+ **kwargs: Any,
+ ) -> DatasetQueryResults:
+ # Docstring inherited from a base class.
+ return self._registry.queryDatasets(
+ datasetType,
+ collections=collections,
+ dimensions=dimensions,
+ dataId=dataId,
+ where=where,
+ findFirst=findFirst,
+ components=components,
+ bind=bind,
+ check=check,
+ **kwargs,
+ )
+
+ def queryDataIds(
+ self,
+ dimensions: Iterable[Dimension | str] | Dimension | str,
+ *,
+ dataId: DataId | None = None,
+ datasets: Any = None,
+ collections: CollectionArgType | None = None,
+ where: str = "",
+ components: bool | None = None,
+ bind: Mapping[str, Any] | None = None,
+ check: bool = True,
+ **kwargs: Any,
+ ) -> DataCoordinateQueryResults:
+ # Docstring inherited from a base class.
+ return self._registry.queryDataIds(
+ dimensions,
+ dataId=dataId,
+ datasets=datasets,
+ collections=collections,
+ where=where,
+ components=components,
+ bind=bind,
+ check=check,
+ **kwargs,
+ )
+
+ def queryDimensionRecords(
+ self,
+ element: DimensionElement | str,
+ *,
+ dataId: DataId | None = None,
+ datasets: Any = None,
+ collections: CollectionArgType | None = None,
+ where: str = "",
+ components: bool | None = None,
+ bind: Mapping[str, Any] | None = None,
+ check: bool = True,
+ **kwargs: Any,
+ ) -> DimensionRecordQueryResults:
+ # Docstring inherited from a base class.
+ return self._registry.queryDimensionRecords(
+ element,
+ dataId=dataId,
+ datasets=datasets,
+ collections=collections,
+ where=where,
+ components=components,
+ bind=bind,
+ check=check,
+ **kwargs,
+ )
+
+ def queryDatasetAssociations(
+ self,
+ datasetType: str | DatasetType,
+ collections: CollectionArgType | None = ...,
+ *,
+ collectionTypes: Iterable[CollectionType] = CollectionType.all(),
+ flattenChains: bool = False,
+ ) -> Iterator[DatasetAssociation]:
+ # Docstring inherited from a base class.
+ return self._registry.queryDatasetAssociations(
+ datasetType,
+ collections,
+ collectionTypes=collectionTypes,
+ flattenChains=flattenChains,
+ )
+
+ @property
+ def obsCoreTableManager(self) -> ObsCoreTableManager | None:
+ # Docstring inherited from a base class.
+ return self._registry.obsCoreTableManager
diff --git a/python/lsst/daf/butler/registries/remote.py b/python/lsst/daf/butler/registries/remote.py
index 5297da0011..f9f4959b6f 100644
--- a/python/lsst/daf/butler/registries/remote.py
+++ b/python/lsst/daf/butler/registries/remote.py
@@ -41,7 +41,6 @@
DataId,
DatasetAssociation,
DatasetId,
- DatasetIdFactory,
DatasetIdGenEnum,
DatasetRef,
DatasetType,
@@ -66,7 +65,7 @@
QueryDatasetsModel,
QueryDimensionRecordsModel,
)
-from ..registry import CollectionSummary, CollectionType, Registry, RegistryConfig, RegistryDefaults
+from ..registry import CollectionSummary, CollectionType, RegistryConfig, RegistryDefaults, _ButlerRegistry
if TYPE_CHECKING:
from .._butlerConfig import ButlerConfig
@@ -74,7 +73,7 @@
from ..registry.interfaces import CollectionRecord, DatastoreRegistryBridgeManager
-class RemoteRegistry(Registry):
+class RemoteRegistry(_ButlerRegistry):
"""Registry that can talk to a remote Butler server.
Parameters
@@ -92,8 +91,8 @@ def createFromConfig(
config: RegistryConfig | str | None = None,
dimensionConfig: DimensionConfig | str | None = None,
butlerRoot: ResourcePathExpression | None = None,
- ) -> Registry:
- """Create registry database and return `Registry` instance.
+ ) -> _ButlerRegistry:
+ """Create registry database and return `_ButlerRegistry` instance.
A remote registry can not create a registry database. Calling this
method will raise an exception.
@@ -107,7 +106,7 @@ def fromConfig(
butlerRoot: ResourcePathExpression | None = None,
writeable: bool = True,
defaults: RegistryDefaults | None = None,
- ) -> Registry:
+ ) -> _ButlerRegistry:
# Docstring inherited from lsst.daf.butler.registry.Registry
config = cls.forceRegistryConfig(config)
config.replaceRoot(butlerRoot)
@@ -133,10 +132,6 @@ def __init__(
self._db = server_uri
self._defaults = defaults
- # In the future DatasetIdFactory may become configurable and this
- # instance will need to be shared with datasets manager.
- self.datasetIdFactory = DatasetIdFactory()
-
# All PUT calls should be short-circuited if not writeable.
self._writeable = writeable
@@ -167,8 +162,8 @@ def isWriteable(self) -> bool:
# Can be used to prevent any PUTs to server
return self._writeable
- def copy(self, defaults: RegistryDefaults | None = None) -> Registry:
- # Docstring inherited from lsst.daf.butler.registry.Registry
+ def copy(self, defaults: RegistryDefaults | None = None) -> _ButlerRegistry:
+ # Docstring inherited from lsst.daf.butler.registry._ButlerRegistry
if defaults is None:
# No need to copy, because `RegistryDefaults` is immutable; we
# effectively copy on write.
diff --git a/python/lsst/daf/butler/registries/sql.py b/python/lsst/daf/butler/registries/sql.py
index a216a7ff2a..dc112eeafb 100644
--- a/python/lsst/daf/butler/registries/sql.py
+++ b/python/lsst/daf/butler/registries/sql.py
@@ -42,7 +42,6 @@
DatasetAssociation,
DatasetColumnTag,
DatasetId,
- DatasetIdFactory,
DatasetIdGenEnum,
DatasetRef,
DatasetType,
@@ -73,10 +72,10 @@
InconsistentDataIdError,
NoDefaultCollectionError,
OrphanedRecordError,
- Registry,
RegistryConfig,
RegistryConsistencyError,
RegistryDefaults,
+ _ButlerRegistry,
queries,
)
from ..registry.interfaces import ChainedCollectionRecord, RunRecord
@@ -97,7 +96,7 @@
_LOG = logging.getLogger(__name__)
-class SqlRegistry(Registry):
+class SqlRegistry(_ButlerRegistry):
"""Registry implementation based on SQLAlchemy.
Parameters
@@ -122,7 +121,7 @@ def createFromConfig(
config: RegistryConfig | str | None = None,
dimensionConfig: DimensionConfig | str | None = None,
butlerRoot: ResourcePathExpression | None = None,
- ) -> Registry:
+ ) -> _ButlerRegistry:
"""Create registry database and return `SqlRegistry` instance.
This method initializes database contents, database must be empty
@@ -169,7 +168,7 @@ def fromConfig(
butlerRoot: ResourcePathExpression | None = None,
writeable: bool = True,
defaults: RegistryDefaults | None = None,
- ) -> Registry:
+ ) -> _ButlerRegistry:
"""Create `Registry` subclass instance from `config`.
Registry database must be initialized prior to calling this method.
@@ -215,9 +214,6 @@ def __init__(self, database: Database, defaults: RegistryDefaults, managers: Reg
# can only be done after most of the rest of Registry has already been
# initialized, and must be done before the property getter is used.
self.defaults = defaults
- # In the future DatasetIdFactory may become configurable and this
- # instance will need to be shared with datasets manager.
- self.datasetIdFactory = DatasetIdFactory()
def __str__(self) -> str:
return str(self._db)
@@ -229,7 +225,7 @@ def isWriteable(self) -> bool:
# Docstring inherited from lsst.daf.butler.registry.Registry
return self._db.isWriteable()
- def copy(self, defaults: RegistryDefaults | None = None) -> Registry:
+ def copy(self, defaults: RegistryDefaults | None = None) -> _ButlerRegistry:
# Docstring inherited from lsst.daf.butler.registry.Registry
if defaults is None:
# No need to copy, because `RegistryDefaults` is immutable; we
diff --git a/python/lsst/daf/butler/registry/__init__.py b/python/lsst/daf/butler/registry/__init__.py
index e894782f12..c5819d0a24 100644
--- a/python/lsst/daf/butler/registry/__init__.py
+++ b/python/lsst/daf/butler/registry/__init__.py
@@ -20,6 +20,7 @@
# along with this program. If not, see .
from . import interfaces, managers, queries, wildcards
+from ._butler_registry import _ButlerRegistry
from ._collection_summary import *
from ._collectionType import *
from ._config import *
@@ -27,6 +28,7 @@
from ._defaults import *
from ._exceptions import *
from ._registry import *
+from ._registry_factory import *
from .wildcards import CollectionSearch
# Some modules intentionally not imported, either because they are purely
diff --git a/python/lsst/daf/butler/registry/_butler_registry.py b/python/lsst/daf/butler/registry/_butler_registry.py
new file mode 100644
index 0000000000..f0150f320e
--- /dev/null
+++ b/python/lsst/daf/butler/registry/_butler_registry.py
@@ -0,0 +1,211 @@
+# This file is part of daf_butler.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (http://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+from __future__ import annotations
+
+__all__ = ("_ButlerRegistry",)
+
+from abc import abstractmethod
+from typing import TYPE_CHECKING
+
+from lsst.resources import ResourcePathExpression
+
+from ..core import Config, DimensionConfig
+from ._config import RegistryConfig
+from ._defaults import RegistryDefaults
+from ._registry import Registry
+
+if TYPE_CHECKING:
+ from .._butlerConfig import ButlerConfig
+ from .interfaces import CollectionRecord, DatastoreRegistryBridgeManager
+
+
+class _ButlerRegistry(Registry):
+ """Registry interface extended with methods used by Butler implementation.
+
+ Each registry implementation can have its own constructor parameters.
+ The assumption is that an instance of a specific subclass will be
+ constructed from configuration using `Registry.fromConfig()`.
+ The base class will look for a ``cls`` entry and call that specific
+ `fromConfig()` method.
+ """
+
+ defaultConfigFile: str | None = None
+ """Path to configuration defaults. Accessed within the ``configs`` resource
+ or relative to a search path. Can be None if no defaults specified.
+ """
+
+ @classmethod
+ def forceRegistryConfig(
+ cls, config: ButlerConfig | RegistryConfig | Config | str | None
+ ) -> RegistryConfig:
+ """Force the supplied config to a `RegistryConfig`.
+
+ Parameters
+ ----------
+ config : `RegistryConfig`, `Config` or `str` or `None`
+ Registry configuration, if missing then default configuration will
+ be loaded from registry.yaml.
+
+ Returns
+ -------
+ registry_config : `RegistryConfig`
+ A registry config.
+ """
+ if not isinstance(config, RegistryConfig):
+ if isinstance(config, (str, Config)) or config is None:
+ config = RegistryConfig(config)
+ else:
+ raise ValueError(f"Incompatible Registry configuration: {config}")
+ return config
+
+ @classmethod
+ @abstractmethod
+ def createFromConfig(
+ cls,
+ config: RegistryConfig | str | None = None,
+ dimensionConfig: DimensionConfig | str | None = None,
+ butlerRoot: ResourcePathExpression | None = None,
+ ) -> _ButlerRegistry:
+ """Create registry database and return `_ButlerRegistry` instance.
+
+ This method initializes database contents, database must be empty
+ prior to calling this method.
+
+ Parameters
+ ----------
+ config : `RegistryConfig` or `str`, optional
+ Registry configuration, if missing then default configuration will
+ be loaded from registry.yaml.
+ dimensionConfig : `DimensionConfig` or `str`, optional
+ Dimensions configuration, if missing then default configuration
+ will be loaded from dimensions.yaml.
+ butlerRoot : convertible to `lsst.resources.ResourcePath`, optional
+ Path to the repository root this `Registry` will manage.
+
+ Returns
+ -------
+ registry : `_ButlerRegistry`
+ A new `_ButlerRegistry` instance.
+
+ Notes
+ -----
+ This class will determine the concrete `_ButlerRegistry` subclass to
+ use from configuration. Each subclass should implement this method
+ even if it can not create a registry.
+ """
+ raise NotImplementedError()
+
+ @classmethod
+ @abstractmethod
+ def fromConfig(
+ cls,
+ config: ButlerConfig | RegistryConfig | Config | str,
+ butlerRoot: ResourcePathExpression | None = None,
+ writeable: bool = True,
+ defaults: RegistryDefaults | None = None,
+ ) -> _ButlerRegistry:
+ """Create `_ButlerRegistry` subclass instance from ``config``.
+
+ Registry database must be initialized prior to calling this method.
+
+ Parameters
+ ----------
+ config : `ButlerConfig`, `RegistryConfig`, `Config` or `str`
+ Registry configuration
+ butlerRoot : convertible to `lsst.resources.ResourcePath`, optional
+ Path to the repository root this `Registry` will manage.
+ writeable : `bool`, optional
+ If `True` (default) create a read-write connection to the database.
+ defaults : `~lsst.daf.butler.registry.RegistryDefaults`, optional
+ Default collection search path and/or output `~CollectionType.RUN`
+ collection.
+
+ Returns
+ -------
+ registry : `_ButlerRegistry` (subclass)
+ A new `_ButlerRegistry` subclass instance.
+
+ Notes
+ -----
+ This class will determine the concrete `_ButlerRegistry` subclass to
+ use from configuration. Each subclass should implement this method.
+ """
+ # The base class implementation should trampoline to the correct
+ # subclass. No implementation should ever use this implementation
+ # directly. If no class is specified, default to the standard
+ # registry.
+ raise NotImplementedError()
+
+ @abstractmethod
+ def copy(self, defaults: RegistryDefaults | None = None) -> _ButlerRegistry:
+ """Create a new `_ButlerRegistry` backed by the same data repository
+ and connection as this one, but independent defaults.
+
+ Parameters
+ ----------
+ defaults : `~lsst.daf.butler.registry.RegistryDefaults`, optional
+ Default collections and data ID values for the new registry. If
+ not provided, ``self.defaults`` will be used (but future changes
+ to either registry's defaults will not affect the other).
+
+ Returns
+ -------
+ copy : `_ButlerRegistry`
+ A new `_ButlerRegistry` instance with its own defaults.
+
+ Notes
+ -----
+ Because the new registry shares a connection with the original, they
+ also share transaction state (despite the fact that their `transaction`
+ context manager methods do not reflect this), and must be used with
+ care.
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def _get_collection_record(self, name: str) -> CollectionRecord:
+ """Return the record for this collection.
+
+ Parameters
+ ----------
+ name : `str`
+ Name of the collection for which the record is to be retrieved.
+
+ Returns
+ -------
+ record : `CollectionRecord`
+ The record for this collection.
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager:
+ """Return an object that allows a new `Datastore` instance to
+ communicate with this `Registry`.
+
+ Returns
+ -------
+ manager : `~.interfaces.DatastoreRegistryBridgeManager`
+ Object that mediates communication between this `Registry` and its
+ associated datastores.
+ """
+ raise NotImplementedError()
diff --git a/python/lsst/daf/butler/registry/_registry.py b/python/lsst/daf/butler/registry/_registry.py
index 8ca9cdadcf..b97c62a1d4 100644
--- a/python/lsst/daf/butler/registry/_registry.py
+++ b/python/lsst/daf/butler/registry/_registry.py
@@ -32,15 +32,12 @@
from typing import TYPE_CHECKING, Any
from lsst.resources import ResourcePathExpression
-from lsst.utils import doImportType
from ..core import (
- Config,
DataCoordinate,
DataId,
DatasetAssociation,
DatasetId,
- DatasetIdFactory,
DatasetIdGenEnum,
DatasetRef,
DatasetType,
@@ -62,8 +59,7 @@
from .wildcards import CollectionWildcard
if TYPE_CHECKING:
- from .._butlerConfig import ButlerConfig
- from .interfaces import CollectionRecord, DatastoreRegistryBridgeManager, ObsCoreTableManager
+ from .interfaces import ObsCoreTableManager
_LOG = logging.getLogger(__name__)
@@ -74,78 +70,11 @@
class Registry(ABC):
"""Abstract Registry interface.
- Each registry implementation can have its own constructor parameters.
- The assumption is that an instance of a specific subclass will be
- constructed from configuration using `Registry.fromConfig()`.
- The base class will look for a ``cls`` entry and call that specific
- `fromConfig()` method.
-
All subclasses should store `~lsst.daf.butler.registry.RegistryDefaults` in
a ``_defaults`` property. No other properties are assumed shared between
implementations.
"""
- defaultConfigFile: str | None = None
- """Path to configuration defaults. Accessed within the ``configs`` resource
- or relative to a search path. Can be None if no defaults specified.
- """
-
- @classmethod
- def forceRegistryConfig(
- cls, config: ButlerConfig | RegistryConfig | Config | str | None
- ) -> RegistryConfig:
- """Force the supplied config to a `RegistryConfig`.
-
- Parameters
- ----------
- config : `RegistryConfig`, `Config` or `str` or `None`
- Registry configuration, if missing then default configuration will
- be loaded from registry.yaml.
-
- Returns
- -------
- registry_config : `RegistryConfig`
- A registry config.
- """
- if not isinstance(config, RegistryConfig):
- if isinstance(config, (str, Config)) or config is None:
- config = RegistryConfig(config)
- else:
- raise ValueError(f"Incompatible Registry configuration: {config}")
- return config
-
- @classmethod
- def determineTrampoline(
- cls, config: ButlerConfig | RegistryConfig | Config | str | None
- ) -> tuple[type[Registry], RegistryConfig]:
- """Return class to use to instantiate real registry.
-
- Parameters
- ----------
- config : `RegistryConfig` or `str`, optional
- Registry configuration, if missing then default configuration will
- be loaded from registry.yaml.
-
- Returns
- -------
- requested_cls : `type` of `Registry`
- The real registry class to use.
- registry_config : `RegistryConfig`
- The `RegistryConfig` to use.
- """
- config = cls.forceRegistryConfig(config)
-
- # Default to the standard registry
- registry_cls_name = config.get("cls", "lsst.daf.butler.registries.sql.SqlRegistry")
- registry_cls = doImportType(registry_cls_name)
- if registry_cls is cls:
- raise ValueError("Can not instantiate the abstract base Registry from config")
- if not issubclass(registry_cls, Registry):
- raise TypeError(
- f"Registry class obtained from config {registry_cls_name} is not a Registry class."
- )
- return registry_cls, config
-
@classmethod
def createFromConfig(
cls,
@@ -176,53 +105,14 @@ def createFromConfig(
Notes
-----
- This class will determine the concrete `Registry` subclass to
- use from configuration. Each subclass should implement this method
- even if it can not create a registry.
+ This method is for backward compatibility only, until all clients
+ migrate to use new `~lsst.daf.butler.registry._RegistryFactory` factory
+ class. Regular clients of registry class do not use this method, it is
+ only used by tests in multiple packages.
"""
- registry_cls, registry_config = cls.determineTrampoline(config)
- return registry_cls.createFromConfig(registry_config, dimensionConfig, butlerRoot)
-
- @classmethod
- def fromConfig(
- cls,
- config: ButlerConfig | RegistryConfig | Config | str,
- butlerRoot: ResourcePathExpression | None = None,
- writeable: bool = True,
- defaults: RegistryDefaults | None = None,
- ) -> Registry:
- """Create `Registry` subclass instance from ``config``.
-
- Registry database must be initialized prior to calling this method.
-
- Parameters
- ----------
- config : `ButlerConfig`, `RegistryConfig`, `Config` or `str`
- Registry configuration
- butlerRoot : convertible to `lsst.resources.ResourcePath`, optional
- Path to the repository root this `Registry` will manage.
- writeable : `bool`, optional
- If `True` (default) create a read-write connection to the database.
- defaults : `~lsst.daf.butler.registry.RegistryDefaults`, optional
- Default collection search path and/or output `~CollectionType.RUN`
- collection.
+ from ._registry_factory import _RegistryFactory
- Returns
- -------
- registry : `Registry` (subclass)
- A new `Registry` subclass instance.
-
- Notes
- -----
- This class will determine the concrete `Registry` subclass to
- use from configuration. Each subclass should implement this method.
- """
- # The base class implementation should trampoline to the correct
- # subclass. No implementation should ever use this implementation
- # directly. If no class is specified, default to the standard
- # registry.
- registry_cls, registry_config = cls.determineTrampoline(config)
- return registry_cls.fromConfig(config, butlerRoot, writeable, defaults)
+ return _RegistryFactory(config).create_from_config(dimensionConfig, butlerRoot)
@abstractmethod
def isWriteable(self) -> bool:
@@ -231,32 +121,6 @@ def isWriteable(self) -> bool:
"""
raise NotImplementedError()
- @abstractmethod
- def copy(self, defaults: RegistryDefaults | None = None) -> Registry:
- """Create a new `Registry` backed by the same data repository and
- connection as this one, but independent defaults.
-
- Parameters
- ----------
- defaults : `~lsst.daf.butler.registry.RegistryDefaults`, optional
- Default collections and data ID values for the new registry. If
- not provided, ``self.defaults`` will be used (but future changes
- to either registry's defaults will not affect the other).
-
- Returns
- -------
- copy : `Registry`
- A new `Registry` instance with its own defaults.
-
- Notes
- -----
- Because the new registry shares a connection with the original, they
- also share transaction state (despite the fact that their `transaction`
- context manager methods do not reflect this), and must be used with
- care.
- """
- raise NotImplementedError()
-
@property
@abstractmethod
def dimensions(self) -> DimensionUniverse:
@@ -360,22 +224,6 @@ def getCollectionType(self, name: str) -> CollectionType:
"""
raise NotImplementedError()
- @abstractmethod
- def _get_collection_record(self, name: str) -> CollectionRecord:
- """Return the record for this collection.
-
- Parameters
- ----------
- name : `str`
- Name of the collection for which the record is to be retrieved.
-
- Returns
- -------
- record : `CollectionRecord`
- The record for this collection.
- """
- raise NotImplementedError()
-
@abstractmethod
def registerRun(self, name: str, doc: str | None = None) -> bool:
"""Add a new run if one with the given name does not exist.
@@ -1017,19 +865,6 @@ def decertify(
"""
raise NotImplementedError()
- @abstractmethod
- def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager:
- """Return an object that allows a new `Datastore` instance to
- communicate with this `Registry`.
-
- Returns
- -------
- manager : `~.interfaces.DatastoreRegistryBridgeManager`
- Object that mediates communication between this `Registry` and its
- associated datastores.
- """
- raise NotImplementedError()
-
@abstractmethod
def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]:
"""Retrieve datastore locations for a given dataset.
@@ -1678,6 +1513,3 @@ def obsCoreTableManager(self) -> ObsCoreTableManager | None:
storageClasses: StorageClassFactory
"""All storage classes known to the registry (`StorageClassFactory`).
"""
-
- datasetIdFactory: DatasetIdFactory
- """Factory for dataset IDs."""
diff --git a/python/lsst/daf/butler/registry/_registry_factory.py b/python/lsst/daf/butler/registry/_registry_factory.py
new file mode 100644
index 0000000000..74bb9291df
--- /dev/null
+++ b/python/lsst/daf/butler/registry/_registry_factory.py
@@ -0,0 +1,128 @@
+# This file is part of daf_butler.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (http://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+from __future__ import annotations
+
+__all__ = ("_RegistryFactory",)
+
+from typing import TYPE_CHECKING
+
+from lsst.resources import ResourcePathExpression
+from lsst.utils import doImportType
+
+from ..core import Config, DimensionConfig
+from ._butler_registry import _ButlerRegistry
+from ._config import RegistryConfig
+from ._defaults import RegistryDefaults
+
+if TYPE_CHECKING:
+ from .._butlerConfig import ButlerConfig
+
+
+class _RegistryFactory:
+ """Interface for creating and initializing Registry instances.
+
+ Parameters
+ ----------
+ config : `RegistryConfig` or `str`, optional
+ Registry configuration, if missing then default configuration will
+ be loaded from registry.yaml.
+
+ Notes
+ -----
+ Each registry implementation can have its own constructor parameters.
+ The assumption is that an instance of a specific subclass will be
+ constructed from configuration using ``RegistryClass.fromConfig()`` or
+ ``RegistryClass.createFromConfig()``.
+
+ This class will look for a ``cls`` entry in registry configuration object
+ (defaulting to ``SqlRegistry``), import that class, and call one of the
+ above methods on the imported class.
+ """
+
+ def __init__(self, config: ButlerConfig | RegistryConfig | Config | str | None):
+ if not isinstance(config, RegistryConfig):
+ if isinstance(config, (str, Config)) or config is None:
+ config = RegistryConfig(config)
+ else:
+ raise ValueError(f"Incompatible Registry configuration: {config}")
+ self._config = config
+
+ # Default to the standard registry
+ registry_cls_name = config.get("cls", "lsst.daf.butler.registries.sql.SqlRegistry")
+ registry_cls = doImportType(registry_cls_name)
+ if not issubclass(registry_cls, _ButlerRegistry):
+ raise TypeError(
+ f"Registry class obtained from config {registry_cls_name} is not a _ButlerRegistry class."
+ )
+ self._registry_cls = registry_cls
+
+ def create_from_config(
+ self,
+ dimensionConfig: DimensionConfig | str | None = None,
+ butlerRoot: ResourcePathExpression | None = None,
+ ) -> _ButlerRegistry:
+ """Create registry database and return `_ButlerRegistry` instance.
+
+ This method initializes database contents, database must be empty
+ prior to calling this method.
+
+ Parameters
+ ----------
+ dimensionConfig : `DimensionConfig` or `str`, optional
+ Dimensions configuration, if missing then default configuration
+ will be loaded from dimensions.yaml.
+ butlerRoot : convertible to `lsst.resources.ResourcePath`, optional
+ Path to the repository root this `Registry` will manage.
+
+ Returns
+ -------
+ registry : `_ButlerRegistry`
+ A new `_ButlerRegistry` instance.
+ """
+ return self._registry_cls.createFromConfig(self._config, dimensionConfig, butlerRoot)
+
+ def from_config(
+ self,
+ butlerRoot: ResourcePathExpression | None = None,
+ writeable: bool = True,
+ defaults: RegistryDefaults | None = None,
+ ) -> _ButlerRegistry:
+ """Create `_ButlerRegistry` subclass instance from ``config``.
+
+ Registry database must be initialized prior to calling this method.
+
+ Parameters
+ ----------
+ butlerRoot : convertible to `lsst.resources.ResourcePath`, optional
+ Path to the repository root this `Registry` will manage.
+ writeable : `bool`, optional
+ If `True` (default) create a read-write connection to the database.
+ defaults : `~lsst.daf.butler.registry.RegistryDefaults`, optional
+ Default collection search path and/or output `~CollectionType.RUN`
+ collection.
+
+ Returns
+ -------
+ registry : `_ButlerRegistry` (subclass)
+ A new `_ButlerRegistry` subclass instance.
+ """
+ return self._registry_cls.fromConfig(self._config, butlerRoot, writeable, defaults)
diff --git a/python/lsst/daf/butler/registry/tests/_registry.py b/python/lsst/daf/butler/registry/tests/_registry.py
index 435f295690..558ae01f5f 100644
--- a/python/lsst/daf/butler/registry/tests/_registry.py
+++ b/python/lsst/daf/butler/registry/tests/_registry.py
@@ -49,6 +49,7 @@
DataCoordinate,
DataCoordinateSet,
DatasetAssociation,
+ DatasetIdFactory,
DatasetIdGenEnum,
DatasetRef,
DatasetType,
@@ -3090,7 +3091,7 @@ def testDatasetIdFactory(self):
in its API.
"""
registry = self.makeRegistry()
- factory = registry.datasetIdFactory
+ factory = DatasetIdFactory()
dataset_type = DatasetType(
"datasetType",
dimensions=["detector", "instrument"],
diff --git a/python/lsst/daf/butler/tests/_datasetsHelper.py b/python/lsst/daf/butler/tests/_datasetsHelper.py
index ff7e7f0b6a..c7be535714 100644
--- a/python/lsst/daf/butler/tests/_datasetsHelper.py
+++ b/python/lsst/daf/butler/tests/_datasetsHelper.py
@@ -37,7 +37,8 @@
from lsst.daf.butler.formatters.yaml import YamlFormatter
if TYPE_CHECKING:
- from lsst.daf.butler import Config, DatasetId, Datastore, Dimension, DimensionGraph, Registry
+ from lsst.daf.butler import Config, DatasetId, Datastore, Dimension, DimensionGraph
+ from lsst.daf.butler.registry import _ButlerRegistry
class DatasetTestHelper:
@@ -101,7 +102,7 @@ class DatastoreTestHelper:
datastoreType: type[Datastore]
configFile: str
- def setUpDatastoreTests(self, registryClass: type[Registry], configClass: type[Config]) -> None:
+ def setUpDatastoreTests(self, registryClass: type[_ButlerRegistry], configClass: type[Config]) -> None:
"""Shared setUp code for all Datastore tests."""
self.registry = registryClass()
self.config = configClass(self.configFile)
diff --git a/python/lsst/daf/butler/transfers/_context.py b/python/lsst/daf/butler/transfers/_context.py
index 9b41e83270..8e71516316 100644
--- a/python/lsst/daf/butler/transfers/_context.py
+++ b/python/lsst/daf/butler/transfers/_context.py
@@ -38,7 +38,7 @@
DimensionRecord,
FileDataset,
)
-from ..registry import CollectionType, Registry
+from ..registry import CollectionType, _ButlerRegistry
from ..registry.interfaces import ChainedCollectionRecord, CollectionRecord
from ._interfaces import RepoExportBackend
@@ -58,7 +58,7 @@ class RepoExportContext:
Parameters
----------
- registry : `Registry`
+ registry : `_ButlerRegistry`
Registry to export from.
datastore : `Datastore`
Datastore to export from.
@@ -73,7 +73,7 @@ class RepoExportContext:
def __init__(
self,
- registry: Registry,
+ registry: _ButlerRegistry,
datastore: Datastore,
backend: RepoExportBackend,
*,
diff --git a/tests/test_butler.py b/tests/test_butler.py
index 605f986227..307cad7573 100644
--- a/tests/test_butler.py
+++ b/tests/test_butler.py
@@ -1679,8 +1679,8 @@ def testPytypeCoercion(self) -> None:
# Now need to hack the registry dataset type definition.
# There is no API for this.
- assert isinstance(butler.registry, SqlRegistry)
- manager = butler.registry._managers.datasets
+ assert isinstance(butler._registry, SqlRegistry)
+ manager = butler._registry._managers.datasets
assert hasattr(manager, "_db") and hasattr(manager, "_static")
manager._db.update(
manager._static.dataset_type,
diff --git a/tests/test_dimensions.py b/tests/test_dimensions.py
index 14767065d0..291ede05c3 100644
--- a/tests/test_dimensions.py
+++ b/tests/test_dimensions.py
@@ -42,11 +42,10 @@
DimensionUniverse,
NamedKeyDict,
NamedValueSet,
- Registry,
TimespanDatabaseRepresentation,
YamlRepoImportBackend,
)
-from lsst.daf.butler.registry import RegistryConfig
+from lsst.daf.butler.registry import RegistryConfig, _RegistryFactory
DIMENSION_DATA_FILE = os.path.normpath(
os.path.join(os.path.dirname(__file__), "data", "registry", "hsc-rc2-subset.yaml")
@@ -65,7 +64,7 @@ def loadDimensionData() -> DataCoordinateSequence:
# data and retreive it as a set of DataCoordinate objects.
config = RegistryConfig()
config["db"] = "sqlite://"
- registry = Registry.createFromConfig(config)
+ registry = _RegistryFactory(config).create_from_config()
with open(DIMENSION_DATA_FILE) as stream:
backend = YamlRepoImportBackend(stream, registry)
backend.register()
diff --git a/tests/test_obscore.py b/tests/test_obscore.py
index d50b5b620d..bf213496f3 100644
--- a/tests/test_obscore.py
+++ b/tests/test_obscore.py
@@ -33,13 +33,12 @@
CollectionType,
Config,
DataCoordinate,
- DatasetIdGenEnum,
DatasetRef,
DatasetType,
StorageClassFactory,
)
from lsst.daf.butler.registries.sql import SqlRegistry
-from lsst.daf.butler.registry import Registry, RegistryConfig
+from lsst.daf.butler.registry import Registry, RegistryConfig, _ButlerRegistry, _RegistryFactory
from lsst.daf.butler.registry.obscore import (
DatasetTypeConfig,
ObsCoreConfig,
@@ -65,10 +64,10 @@ class ObsCoreTests(TestCaseMixin):
def make_registry(
self, collections: list[str] | None = None, collection_type: str | None = None
- ) -> Registry:
+ ) -> _ButlerRegistry:
"""Create new empty Registry."""
config = self.make_registry_config(collections, collection_type)
- registry = Registry.createFromConfig(config, butlerRoot=self.root)
+ registry = _RegistryFactory(config).create_from_config(butlerRoot=self.root)
self.initialize_registry(registry)
return registry
@@ -225,10 +224,7 @@ def _insert_dataset(
coordinate = DataCoordinate.standardize(data_id, universe=registry.dimensions)
if do_import:
ds_type = self.dataset_types[dataset_type]
- dataset_id = registry.datasetIdFactory.makeDatasetId(
- run, ds_type, coordinate, DatasetIdGenEnum.UNIQUE
- )
- ref = DatasetRef(ds_type, coordinate, id=dataset_id, run=run)
+ ref = DatasetRef(ds_type, coordinate, run=run)
[ref] = registry._importDatasets([ref])
else:
[ref] = registry.insertDatasets(dataset_type, [data_id], run=run)
diff --git a/tests/test_postgresql.py b/tests/test_postgresql.py
index 78dc6c2308..b1e5d573bc 100644
--- a/tests/test_postgresql.py
+++ b/tests/test_postgresql.py
@@ -40,7 +40,7 @@
import sqlalchemy
from lsst.daf.butler import Timespan, ddl
-from lsst.daf.butler.registry import Registry
+from lsst.daf.butler.registry import _ButlerRegistry, _RegistryFactory
from lsst.daf.butler.registry.databases.postgresql import PostgresqlDatabase, _RangeTimespanType
from lsst.daf.butler.registry.tests import DatabaseTests, RegistryTests
from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir
@@ -236,7 +236,7 @@ def tearDownClass(cls):
def getDataDir(cls) -> str:
return os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry"))
- def makeRegistry(self, share_repo_with: Registry | None = None) -> Registry:
+ def makeRegistry(self, share_repo_with: _ButlerRegistry | None = None) -> _ButlerRegistry:
if share_repo_with is None:
namespace = f"namespace_{secrets.token_hex(8).lower()}"
else:
@@ -245,9 +245,9 @@ def makeRegistry(self, share_repo_with: Registry | None = None) -> Registry:
config["db"] = self.server.url()
config["namespace"] = namespace
if share_repo_with is None:
- return Registry.createFromConfig(config)
+ return _RegistryFactory(config).create_from_config()
else:
- return Registry.fromConfig(config)
+ return _RegistryFactory(config).from_config()
class PostgresqlRegistryNameKeyCollMgrUUIDTestCase(PostgresqlRegistryTests, unittest.TestCase):
diff --git a/tests/test_quantumBackedButler.py b/tests/test_quantumBackedButler.py
index 18c6c898e7..486fe2113d 100644
--- a/tests/test_quantumBackedButler.py
+++ b/tests/test_quantumBackedButler.py
@@ -34,10 +34,10 @@
Quantum,
QuantumBackedButler,
QuantumProvenanceData,
- Registry,
RegistryConfig,
StorageClass,
)
+from lsst.daf.butler.registry import _RegistryFactory
from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir
from lsst.resources import ResourcePath
@@ -55,7 +55,7 @@ def setUp(self) -> None:
# Make a butler and import dimension definitions.
registryConfig = RegistryConfig(self.config.get("registry"))
- Registry.createFromConfig(registryConfig, butlerRoot=self.root)
+ _RegistryFactory(registryConfig).create_from_config(butlerRoot=self.root)
self.butler = Butler(self.config, writeable=True, run="RUN")
self.butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
diff --git a/tests/test_query_relations.py b/tests/test_query_relations.py
index ffead5de84..096a7aa7b8 100644
--- a/tests/test_query_relations.py
+++ b/tests/test_query_relations.py
@@ -25,7 +25,7 @@
import re
import unittest
-from lsst.daf.butler.registry import MissingSpatialOverlapError, Registry, RegistryConfig, queries
+from lsst.daf.butler.registry import MissingSpatialOverlapError, RegistryConfig, _RegistryFactory, queries
from lsst.daf.butler.transfers import YamlRepoImportBackend
TESTDIR = os.path.abspath(os.path.dirname(__file__))
@@ -52,7 +52,7 @@ class TestQueryRelationsTests(unittest.TestCase):
def setUpClass(cls) -> None:
config = RegistryConfig()
config["db"] = "sqlite://"
- cls.registry = Registry.createFromConfig(config)
+ cls.registry = _RegistryFactory(config).create_from_config()
# We need just enough test data to have valid dimension records for
# all of the dimensions we're concerned with, and we want to pick
# values for each dimension that correspond to a spatiotemporal
diff --git a/tests/test_simpleButler.py b/tests/test_simpleButler.py
index 2a25317f05..87d8dc08d8 100644
--- a/tests/test_simpleButler.py
+++ b/tests/test_simpleButler.py
@@ -34,17 +34,8 @@
np = None
import astropy.time
-from lsst.daf.butler import (
- Butler,
- ButlerConfig,
- CollectionType,
- DatasetId,
- DatasetRef,
- DatasetType,
- Registry,
- Timespan,
-)
-from lsst.daf.butler.registry import RegistryConfig, RegistryDefaults
+from lsst.daf.butler import Butler, ButlerConfig, CollectionType, DatasetId, DatasetRef, DatasetType, Timespan
+from lsst.daf.butler.registry import RegistryConfig, RegistryDefaults, _RegistryFactory
from lsst.daf.butler.tests import DatastoreMock
from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir
@@ -80,7 +71,7 @@ def makeButler(self, **kwargs: Any) -> Butler:
# have to make a registry first
registryConfig = RegistryConfig(config.get("registry"))
- Registry.createFromConfig(registryConfig)
+ _RegistryFactory(registryConfig).create_from_config()
butler = Butler(config, **kwargs)
DatastoreMock.apply(butler)
@@ -286,7 +277,6 @@ def testButlerGet(self):
# Create a numpy integer to check that works fine
detector_np = np.int64(2) if np else 2
- print(type(detector_np))
# Try to get it using different variations of dataId + keyword
# arguments
diff --git a/tests/test_sqlite.py b/tests/test_sqlite.py
index bf76af233e..7081354234 100644
--- a/tests/test_sqlite.py
+++ b/tests/test_sqlite.py
@@ -28,7 +28,7 @@
import sqlalchemy
from lsst.daf.butler import ddl
-from lsst.daf.butler.registry import Registry
+from lsst.daf.butler.registry import _ButlerRegistry, _RegistryFactory
from lsst.daf.butler.registry.databases.sqlite import SqliteDatabase
from lsst.daf.butler.registry.tests import DatabaseTests, RegistryTests
from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir
@@ -196,7 +196,7 @@ def tearDown(self):
def getDataDir(cls) -> str:
return os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry"))
- def makeRegistry(self, share_repo_with: Registry | None = None) -> Registry:
+ def makeRegistry(self, share_repo_with: _ButlerRegistry | None = None) -> _ButlerRegistry:
if share_repo_with is None:
_, filename = tempfile.mkstemp(dir=self.root, suffix=".sqlite3")
else:
@@ -204,9 +204,9 @@ def makeRegistry(self, share_repo_with: Registry | None = None) -> Registry:
config = self.makeRegistryConfig()
config["db"] = f"sqlite:///{filename}"
if share_repo_with is None:
- return Registry.createFromConfig(config, butlerRoot=self.root)
+ return _RegistryFactory(config).create_from_config(butlerRoot=self.root)
else:
- return Registry.fromConfig(config, butlerRoot=self.root)
+ return _RegistryFactory(config).from_config(butlerRoot=self.root)
class SqliteFileRegistryNameKeyCollMgrUUIDTestCase(SqliteFileRegistryTests, unittest.TestCase):
@@ -242,12 +242,12 @@ class SqliteMemoryRegistryTests(RegistryTests):
def getDataDir(cls) -> str:
return os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry"))
- def makeRegistry(self, share_repo_with: Registry | None = None) -> Registry | None:
+ def makeRegistry(self, share_repo_with: _ButlerRegistry | None = None) -> _ButlerRegistry | None:
if share_repo_with is not None:
return None
config = self.makeRegistryConfig()
config["db"] = "sqlite://"
- return Registry.createFromConfig(config)
+ return _RegistryFactory(config).create_from_config()
def testMissingAttributes(self):
"""Test for instantiating a registry against outdated schema which
@@ -258,7 +258,7 @@ def testMissingAttributes(self):
config = self.makeRegistryConfig()
config["db"] = "sqlite://"
with self.assertRaises(LookupError):
- Registry.fromConfig(config)
+ _RegistryFactory(config).from_config()
class SqliteMemoryRegistryNameKeyCollMgrUUIDTestCase(unittest.TestCase, SqliteMemoryRegistryTests):