diff --git a/python/lsst/daf/butler/_butler.py b/python/lsst/daf/butler/_butler.py index d8b04781d2..2b04691631 100644 --- a/python/lsst/daf/butler/_butler.py +++ b/python/lsst/daf/butler/_butler.py @@ -51,6 +51,7 @@ from ._dataset_existence import DatasetExistence from ._deferredDatasetHandle import DeferredDatasetHandle from ._limited_butler import LimitedButler +from ._registry_shim import RegistryShim from .core import ( Config, ConfigSubset, @@ -85,6 +86,8 @@ Registry, RegistryConfig, RegistryDefaults, + _ButlerRegistry, + _RegistryFactory, ) from .transfers import RepoExportContext @@ -209,7 +212,7 @@ def __init__( raise TypeError( "Cannot pass 'config', 'searchPaths', or 'writeable' arguments with 'butler' argument." ) - self.registry = butler.registry.copy(defaults) + self._registry = butler._registry.copy(defaults) self._datastore = butler._datastore self.storageClasses = butler.storageClasses self._config: ButlerConfig = butler._config @@ -222,11 +225,11 @@ def __init__( butlerRoot = self._config.configDir if writeable is None: writeable = run is not None - self.registry = Registry.fromConfig( - self._config, butlerRoot=butlerRoot, writeable=writeable, defaults=defaults + self._registry = _RegistryFactory(self._config).from_config( + butlerRoot=butlerRoot, writeable=writeable, defaults=defaults ) self._datastore = Datastore.fromConfig( - self._config, self.registry.getDatastoreBridgeManager(), butlerRoot=butlerRoot + self._config, self._registry.getDatastoreBridgeManager(), butlerRoot=butlerRoot ) self.storageClasses = StorageClassFactory() self.storageClasses.addFromConfig(self._config) @@ -245,6 +248,8 @@ def __init__( if "run" in self._config or "collection" in self._config: raise ValueError("Passing a run or collection via configuration is no longer supported.") + self._registry_shim = RegistryShim(self) + GENERATION: ClassVar[int] = 3 """This is a Generation 3 Butler. @@ -256,7 +261,7 @@ def __init__( def _retrieve_dataset_type(self, name: str) -> DatasetType | None: """Return DatasetType defined in registry given dataset type name.""" try: - return self.registry.getDatasetType(name) + return self._registry.getDatasetType(name) except MissingDatasetTypeError: return None @@ -458,7 +463,9 @@ def makeRepo( # Create Registry and populate tables registryConfig = RegistryConfig(config.get("registry")) dimensionConfig = DimensionConfig(dimensionConfig) - Registry.createFromConfig(registryConfig, dimensionConfig=dimensionConfig, butlerRoot=root_uri) + _RegistryFactory(registryConfig).create_from_config( + dimensionConfig=dimensionConfig, butlerRoot=root_uri + ) log.verbose("Wrote new Butler configuration file to %s", configURI) @@ -517,19 +524,19 @@ def __reduce__(self) -> tuple: self._config, self.collections, self.run, - self.registry.defaults.dataId.byName(), - self.registry.isWriteable(), + self._registry.defaults.dataId.byName(), + self._registry.isWriteable(), ), ) def __str__(self) -> str: return "Butler(collections={}, run={}, datastore='{}', registry='{}')".format( - self.collections, self.run, self._datastore, self.registry + self.collections, self.run, self._datastore, self._registry ) def isWriteable(self) -> bool: """Return `True` if this `Butler` supports write operations.""" - return self.registry.isWriteable() + return self._registry.isWriteable() @contextlib.contextmanager def transaction(self) -> Iterator[None]: @@ -537,7 +544,7 @@ def transaction(self) -> Iterator[None]: Transactions can be nested. """ - with self.registry.transaction(): + with self._registry.transaction(): with self._datastore.transaction(): yield @@ -602,11 +609,11 @@ def _standardizeArgs( if isinstance(datasetRefOrType, DatasetType): externalDatasetType = datasetRefOrType else: - internalDatasetType = self.registry.getDatasetType(datasetRefOrType) + internalDatasetType = self._registry.getDatasetType(datasetRefOrType) # Check that they are self-consistent if externalDatasetType is not None: - internalDatasetType = self.registry.getDatasetType(externalDatasetType.name) + internalDatasetType = self._registry.getDatasetType(externalDatasetType.name) if externalDatasetType != internalDatasetType: # We can allow differences if they are compatible, depending # on whether this is a get or a put. A get requires that @@ -882,7 +889,7 @@ def _rewrite_data_id( ) # Get the actual record and compare with these values. try: - recs = list(self.registry.queryDimensionRecords(dimensionName, dataId=newDataId)) + recs = list(self._registry.queryDimensionRecords(dimensionName, dataId=newDataId)) except DataIdError: raise ValueError( f"Could not find dimension '{dimensionName}'" @@ -911,7 +918,7 @@ def _rewrite_data_id( # Hopefully we get a single record that matches records = set( - self.registry.queryDimensionRecords( + self._registry.queryDimensionRecords( dimensionName, dataId=newDataId, where=where, bind=bind, **kwargs ) ) @@ -927,7 +934,7 @@ def _rewrite_data_id( and "visit_system" in self.dimensions["instrument"].metadata ): instrument_records = list( - self.registry.queryDimensionRecords( + self._registry.queryDimensionRecords( "instrument", dataId=newDataId, **kwargs, @@ -943,7 +950,7 @@ def _rewrite_data_id( # visit_system_membership records. for rec in records: membership = list( - self.registry.queryDimensionRecords( + self._registry.queryDimensionRecords( # Use bind to allow zero results. # This is a fully-specified query. "visit_system_membership", @@ -1055,21 +1062,21 @@ def _findDatasetRef( # dimensions that provide temporal information for a validity-range # lookup. dataId = DataCoordinate.standardize( - dataId, universe=self.dimensions, defaults=self.registry.defaults.dataId, **kwargs + dataId, universe=self.dimensions, defaults=self._registry.defaults.dataId, **kwargs ) if dataId.graph.temporal: - dataId = self.registry.expandDataId(dataId) + dataId = self._registry.expandDataId(dataId) timespan = dataId.timespan else: # Standardize the data ID to just the dimensions of the dataset # type instead of letting registry.findDataset do it, so we get the # result even if no dataset is found. dataId = DataCoordinate.standardize( - dataId, graph=datasetType.dimensions, defaults=self.registry.defaults.dataId, **kwargs + dataId, graph=datasetType.dimensions, defaults=self._registry.defaults.dataId, **kwargs ) # Always lookup the DatasetRef, even if one is given, to ensure it is # present in the current collection. - ref = self.registry.findDataset(datasetType, dataId, collections=collections, timespan=timespan) + ref = self._registry.findDataset(datasetType, dataId, collections=collections, timespan=timespan) if ref is None: if predict: if run is None: @@ -1079,7 +1086,7 @@ def _findDatasetRef( return DatasetRef(datasetType, dataId, run=run) else: if collections is None: - collections = self.registry.defaults.collections + collections = self._registry.defaults.collections raise LookupError( f"Dataset {datasetType.name} with data ID {dataId} " f"could not be found in collections {collections}." @@ -1160,7 +1167,7 @@ def put( # dataset type and DataId, then _importDatasets will do nothing and # just return an original ref. We have to raise in this case, there # is a datastore check below for that. - self.registry._importDatasets([datasetRefOrType], expand=True) + self._registry._importDatasets([datasetRefOrType], expand=True) # Before trying to write to the datastore check that it does not # know this dataset. This is prone to races, of course. if self._datastore.knows(datasetRefOrType): @@ -1183,8 +1190,8 @@ def put( dataId, kwargs = self._rewrite_data_id(dataId, datasetType, **kwargs) # Add Registry Dataset entry. - dataId = self.registry.expandDataId(dataId, graph=datasetType.dimensions, **kwargs) - (ref,) = self.registry.insertDatasets(datasetType, run=run, dataIds=[dataId]) + dataId = self._registry.expandDataId(dataId, graph=datasetType.dimensions, **kwargs) + (ref,) = self._registry.insertDatasets(datasetType, run=run, dataIds=[dataId]) self._datastore.put(obj, ref) return ref @@ -1622,7 +1629,7 @@ def exists( if data_id is not None: warnings.warn("A DataID should not be specified with DatasetRef", stacklevel=2) ref = dataset_ref_or_type - registry_ref = self.registry.getDataset(dataset_ref_or_type.id) + registry_ref = self._registry.getDataset(dataset_ref_or_type.id) if registry_ref is not None: existence |= DatasetExistence.RECORDED @@ -1692,7 +1699,7 @@ def _exists_many( # Registry does not have a bulk API to check for a ref. for ref in refs: - registry_ref = self.registry.getDataset(ref.id) + registry_ref = self._registry.getDataset(ref.id) if registry_ref is not None: # It is possible, albeit unlikely, that the given ref does # not match the one in registry even though the UUID matches. @@ -1769,7 +1776,7 @@ def datasetExists( """ # A resolved ref may be given that is not known to this butler. if isinstance(datasetRefOrType, DatasetRef): - ref = self.registry.getDataset(datasetRefOrType.id) + ref = self._registry.getDataset(datasetRefOrType.id) if ref is None: raise LookupError( f"Resolved DatasetRef with id {datasetRefOrType.id} is not known to registry." @@ -1804,18 +1811,18 @@ def removeRuns(self, names: Iterable[str], unstore: bool = True) -> None: names = list(names) refs: list[DatasetRef] = [] for name in names: - collectionType = self.registry.getCollectionType(name) + collectionType = self._registry.getCollectionType(name) if collectionType is not CollectionType.RUN: raise TypeError(f"The collection type of '{name}' is {collectionType.name}, not RUN.") - refs.extend(self.registry.queryDatasets(..., collections=name, findFirst=True)) + refs.extend(self._registry.queryDatasets(..., collections=name, findFirst=True)) with self._datastore.transaction(): - with self.registry.transaction(): + with self._registry.transaction(): if unstore: self._datastore.trash(refs) else: self._datastore.forget(refs) for name in names: - self.registry.removeCollection(name) + self._registry.removeCollection(name) if unstore: # Point of no return for removing artifacts self._datastore.emptyTrash() @@ -1843,7 +1850,7 @@ def pruneDatasets( if not tags: raise TypeError("No tags provided but disassociate=True.") for tag in tags: - collectionType = self.registry.getCollectionType(tag) + collectionType = self._registry.getCollectionType(tag) if collectionType is not CollectionType.TAGGED: raise TypeError( f"Cannot disassociate from collection '{tag}' " @@ -1864,15 +1871,15 @@ def pruneDatasets( # but shouldn't change them), and hence all operations here are # Registry operations. with self._datastore.transaction(): - with self.registry.transaction(): + with self._registry.transaction(): if unstore: self._datastore.trash(refs) if purge: - self.registry.removeDatasets(refs) + self._registry.removeDatasets(refs) elif disassociate: assert tags, "Guaranteed by earlier logic in this function." for tag in tags: - self.registry.disassociate(tag, refs) + self._registry.disassociate(tag, refs) # We've exited the Registry transaction, and apparently committed. # (if there was an exception, everything rolled back, and it's as if # nothing happened - and we never get here). @@ -2041,7 +2048,7 @@ def ingest( # Import the refs and expand the DataCoordinates since we can't # guarantee that they are expanded and Datastore will need # the records. - imported_refs = self.registry._importDatasets(refs_to_import, expand=True) + imported_refs = self._registry._importDatasets(refs_to_import, expand=True) assert set(imported_refs) == set(refs_to_import) # Replace all the refs in the FileDataset with expanded versions. @@ -2138,7 +2145,7 @@ def export( backend = BackendClass(stream, universe=self.dimensions) try: helper = RepoExportContext( - self.registry, self._datastore, backend=backend, directory=directory, transfer=transfer + self._registry, self._datastore, backend=backend, directory=directory, transfer=transfer ) yield helper except BaseException: @@ -2228,7 +2235,7 @@ def import_( ) def doImport(importStream: TextIO | ResourceHandleProtocol) -> None: - backend = BackendClass(importStream, self.registry) # type: ignore[call-arg] + backend = BackendClass(importStream, self._registry) # type: ignore[call-arg] backend.register() with self.transaction(): backend.load( @@ -2348,11 +2355,11 @@ def transfer_from( # on to find additional inconsistent dataset types # might result in additional unwanted dataset types being # registered. - if self.registry.registerDatasetType(datasetType): + if self._registry.registerDatasetType(datasetType): newly_registered_dataset_types.add(datasetType) else: # If the dataset type is missing, let it fail immediately. - target_dataset_type = self.registry.getDatasetType(datasetType.name) + target_dataset_type = self._registry.getDatasetType(datasetType.name) if target_dataset_type != datasetType: raise ConflictingDefinitionError( "Source butler dataset type differs from definition" @@ -2407,7 +2414,7 @@ def transfer_from( # Assume that if the record is already present that we can # use it without having to check that the record metadata # is consistent. - self.registry.insertDimensionData(element, *records, skip_existing=True) + self._registry.insertDimensionData(element, *records, skip_existing=True) n_imported = 0 for (datasetType, run), refs_to_import in progress.iter_item_chunks( @@ -2419,7 +2426,7 @@ def transfer_from( run_doc = None if registry := getattr(source_butler, "registry", None): run_doc = registry.getCollectionDocumentation(run) - registered = self.registry.registerRun(run, doc=run_doc) + registered = self._registry.registerRun(run, doc=run_doc) handled_collections.add(run) if registered: log.log(VERBOSE, "Creating output run %s", run) @@ -2435,7 +2442,7 @@ def transfer_from( # Assume we are using UUIDs and the source refs will match # those imported. - imported_refs = self.registry._importDatasets(refs_to_import, expand=False) + imported_refs = self._registry._importDatasets(refs_to_import, expand=False) assert set(imported_refs) == set(refs_to_import) n_imported += len(imported_refs) @@ -2493,9 +2500,9 @@ def validateConfiguration( is configured. """ if datasetTypeNames: - datasetTypes = [self.registry.getDatasetType(name) for name in datasetTypeNames] + datasetTypes = [self._registry.getDatasetType(name) for name in datasetTypeNames] else: - datasetTypes = list(self.registry.queryDatasetTypes()) + datasetTypes = list(self._registry.queryDatasetTypes()) # filter out anything from the ignore list if ignore: @@ -2513,7 +2520,7 @@ def validateConfiguration( # Find all the registered instruments (if "instrument" is in the # universe). if "instrument" in self.dimensions: - instruments = {record.name for record in self.registry.queryDimensionRecords("instrument")} + instruments = {record.name for record in self._registry.queryDimensionRecords("instrument")} for datasetType in datasetTypes: if "instrument" in datasetType.dimensions: @@ -2563,7 +2570,7 @@ def validateConfiguration( pass else: try: - self.registry.getDatasetType(key.name) + self._registry.getDatasetType(key.name) except KeyError: if logFailures: log.critical("Key '%s' does not correspond to a DatasetType or StorageClass", key) @@ -2612,7 +2619,7 @@ def collections(self) -> Sequence[str]: by assigning a new `RegistryDefaults` instance to ``self.registry.defaults``. """ - return self.registry.defaults.collections + return self._registry.defaults.collections @property def run(self) -> str | None: @@ -2624,15 +2631,27 @@ def run(self) -> str | None: assigning a new `RegistryDefaults` instance to ``self.registry.defaults``. """ - return self.registry.defaults.run + return self._registry.defaults.run + + @property + def registry(self) -> Registry: + """The object that manages dataset metadata and relationships + (`Registry`). + + Many operations that don't involve reading or writing butler datasets + are accessible only via `Registry` methods. Eventually these methods + will be replaced by equivalent `Butler` methods. + """ + return self._registry_shim @property def dimensions(self) -> DimensionUniverse: # Docstring inherited. - return self.registry.dimensions + return self._registry.dimensions - registry: Registry - """The object that manages dataset metadata and relationships (`Registry`). + _registry: _ButlerRegistry + """The object that manages dataset metadata and relationships + (`_ButlerRegistry`). Most operations that don't involve reading or writing butler datasets are accessible only via `Registry` methods. diff --git a/python/lsst/daf/butler/_registry_shim.py b/python/lsst/daf/butler/_registry_shim.py new file mode 100644 index 0000000000..3cc12f3944 --- /dev/null +++ b/python/lsst/daf/butler/_registry_shim.py @@ -0,0 +1,392 @@ +# This file is part of daf_butler. +# +# Developed for the LSST Data Management System. +# This product includes software developed by the LSST Project +# (http://www.lsst.org). +# See the COPYRIGHT file at the top-level directory of this distribution +# for details of code ownership. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from __future__ import annotations + +__all__ = ("Registry",) + +import contextlib +from collections.abc import Iterable, Iterator, Mapping, Sequence +from typing import TYPE_CHECKING, Any + +from .core import ( + DataCoordinate, + DataId, + DatasetAssociation, + DatasetId, + DatasetIdGenEnum, + DatasetRef, + DatasetType, + Dimension, + DimensionElement, + DimensionGraph, + DimensionRecord, + DimensionUniverse, + NameLookupMapping, + Timespan, +) +from .registry import Registry +from .registry._collection_summary import CollectionSummary +from .registry._collectionType import CollectionType +from .registry._defaults import RegistryDefaults +from .registry.queries import DataCoordinateQueryResults, DatasetQueryResults, DimensionRecordQueryResults + +if TYPE_CHECKING: + from ._butler import Butler + from .registry._registry import CollectionArgType + from .registry.interfaces import ObsCoreTableManager + + +class RegistryShim(Registry): + """Implementation of `Registry` interface exposed to clients by `Butler`. + + Parameters + ---------- + butler : `Butler` + Data butler instance. + + Notes + ----- + This shim implementation of `Registry` forwards all methods to an actual + Registry instance which is internal to Butler or to Butler methods. Its + purpose is to provide a stable interface to many client-visible operations + while we perform re-structuring of Registry and Butler implementations. + """ + + def __init__(self, butler: Butler): + self._butler = butler + self._registry = butler._registry + + def isWriteable(self) -> bool: + # Docstring inherited from a base class. + return self._registry.isWriteable() + + @property + def dimensions(self) -> DimensionUniverse: + # Docstring inherited from a base class. + return self._registry.dimensions + + @property + def defaults(self) -> RegistryDefaults: + # Docstring inherited from a base class. + return self._registry.defaults + + @defaults.setter + def defaults(self, value: RegistryDefaults) -> None: + # Docstring inherited from a base class. + self._registry.defaults = value + + def refresh(self) -> None: + # Docstring inherited from a base class. + self._registry.refresh() + + @contextlib.contextmanager + def transaction(self, *, savepoint: bool = False) -> Iterator[None]: + # Docstring inherited from a base class. + with self._registry.transaction(savepoint=savepoint): + yield + + def resetConnectionPool(self) -> None: + # Docstring inherited from a base class. + self._registry.resetConnectionPool() + + def registerCollection( + self, name: str, type: CollectionType = CollectionType.TAGGED, doc: str | None = None + ) -> bool: + # Docstring inherited from a base class. + return self._registry.registerCollection(name, type, doc) + + def getCollectionType(self, name: str) -> CollectionType: + # Docstring inherited from a base class. + return self._registry.getCollectionType(name) + + def registerRun(self, name: str, doc: str | None = None) -> bool: + # Docstring inherited from a base class. + return self._registry.registerRun(name, doc) + + def removeCollection(self, name: str) -> None: + # Docstring inherited from a base class. + self._registry.removeCollection(name) + + def getCollectionChain(self, parent: str) -> Sequence[str]: + # Docstring inherited from a base class. + return self._registry.getCollectionChain(parent) + + def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None: + # Docstring inherited from a base class. + self._registry.setCollectionChain(parent, children, flatten=flatten) + + def getCollectionParentChains(self, collection: str) -> set[str]: + # Docstring inherited from a base class. + return self._registry.getCollectionParentChains(collection) + + def getCollectionDocumentation(self, collection: str) -> str | None: + # Docstring inherited from a base class. + return self._registry.getCollectionDocumentation(collection) + + def setCollectionDocumentation(self, collection: str, doc: str | None) -> None: + # Docstring inherited from a base class. + self._registry.setCollectionDocumentation(collection, doc) + + def getCollectionSummary(self, collection: str) -> CollectionSummary: + # Docstring inherited from a base class. + return self._registry.getCollectionSummary(collection) + + def registerDatasetType(self, datasetType: DatasetType) -> bool: + # Docstring inherited from a base class. + return self._registry.registerDatasetType(datasetType) + + def removeDatasetType(self, name: str | tuple[str, ...]) -> None: + # Docstring inherited from a base class. + self._registry.removeDatasetType(name) + + def getDatasetType(self, name: str) -> DatasetType: + # Docstring inherited from a base class. + return self._registry.getDatasetType(name) + + def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool: + # Docstring inherited from a base class. + return self._registry.supportsIdGenerationMode(mode) + + def findDataset( + self, + datasetType: DatasetType | str, + dataId: DataId | None = None, + *, + collections: CollectionArgType | None = None, + timespan: Timespan | None = None, + **kwargs: Any, + ) -> DatasetRef | None: + # Docstring inherited from a base class. + return self._registry.findDataset( + datasetType, dataId, collections=collections, timespan=timespan, **kwargs + ) + + def insertDatasets( + self, + datasetType: DatasetType | str, + dataIds: Iterable[DataId], + run: str | None = None, + expand: bool = True, + idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, + ) -> list[DatasetRef]: + # Docstring inherited from a base class. + return self._registry.insertDatasets(datasetType, dataIds, run, expand, idGenerationMode) + + def _importDatasets(self, datasets: Iterable[DatasetRef], expand: bool = True) -> list[DatasetRef]: + # Docstring inherited from a base class. + return self._registry._importDatasets(datasets, expand) + + def getDataset(self, id: DatasetId) -> DatasetRef | None: + # Docstring inherited from a base class. + return self._registry.getDataset(id) + + def removeDatasets(self, refs: Iterable[DatasetRef]) -> None: + # Docstring inherited from a base class. + self._registry.removeDatasets(refs) + + def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None: + # Docstring inherited from a base class. + self._registry.associate(collection, refs) + + def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None: + # Docstring inherited from a base class. + self._registry.disassociate(collection, refs) + + def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None: + # Docstring inherited from a base class. + self._registry.certify(collection, refs, timespan) + + def decertify( + self, + collection: str, + datasetType: str | DatasetType, + timespan: Timespan, + *, + dataIds: Iterable[DataId] | None = None, + ) -> None: + # Docstring inherited from a base class. + self._registry.decertify(collection, datasetType, timespan, dataIds=dataIds) + + def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: + # Docstring inherited from a base class. + return self._registry.getDatasetLocations(ref) + + def expandDataId( + self, + dataId: DataId | None = None, + *, + graph: DimensionGraph | None = None, + records: NameLookupMapping[DimensionElement, DimensionRecord | None] | None = None, + withDefaults: bool = True, + **kwargs: Any, + ) -> DataCoordinate: + # Docstring inherited from a base class. + return self._registry.expandDataId( + dataId, graph=graph, records=records, withDefaults=withDefaults, **kwargs + ) + + def insertDimensionData( + self, + element: DimensionElement | str, + *data: Mapping[str, Any] | DimensionRecord, + conform: bool = True, + replace: bool = False, + skip_existing: bool = False, + ) -> None: + # Docstring inherited from a base class. + self._registry.insertDimensionData( + element, *data, conform=conform, replace=replace, skip_existing=skip_existing + ) + + def syncDimensionData( + self, + element: DimensionElement | str, + row: Mapping[str, Any] | DimensionRecord, + conform: bool = True, + update: bool = False, + ) -> bool | dict[str, Any]: + # Docstring inherited from a base class. + return self._registry.syncDimensionData(element, row, conform, update) + + def queryDatasetTypes( + self, + expression: Any = ..., + *, + components: bool | None = None, + missing: list[str] | None = None, + ) -> Iterable[DatasetType]: + # Docstring inherited from a base class. + return self._registry.queryDatasetTypes(expression, components=components, missing=missing) + + def queryCollections( + self, + expression: Any = ..., + datasetType: DatasetType | None = None, + collectionTypes: Iterable[CollectionType] | CollectionType = CollectionType.all(), + flattenChains: bool = False, + includeChains: bool | None = None, + ) -> Sequence[str]: + # Docstring inherited from a base class. + return self._registry.queryCollections( + expression, datasetType, collectionTypes, flattenChains, includeChains + ) + + def queryDatasets( + self, + datasetType: Any, + *, + collections: CollectionArgType | None = None, + dimensions: Iterable[Dimension | str] | None = None, + dataId: DataId | None = None, + where: str = "", + findFirst: bool = False, + components: bool | None = None, + bind: Mapping[str, Any] | None = None, + check: bool = True, + **kwargs: Any, + ) -> DatasetQueryResults: + # Docstring inherited from a base class. + return self._registry.queryDatasets( + datasetType, + collections=collections, + dimensions=dimensions, + dataId=dataId, + where=where, + findFirst=findFirst, + components=components, + bind=bind, + check=check, + **kwargs, + ) + + def queryDataIds( + self, + dimensions: Iterable[Dimension | str] | Dimension | str, + *, + dataId: DataId | None = None, + datasets: Any = None, + collections: CollectionArgType | None = None, + where: str = "", + components: bool | None = None, + bind: Mapping[str, Any] | None = None, + check: bool = True, + **kwargs: Any, + ) -> DataCoordinateQueryResults: + # Docstring inherited from a base class. + return self._registry.queryDataIds( + dimensions, + dataId=dataId, + datasets=datasets, + collections=collections, + where=where, + components=components, + bind=bind, + check=check, + **kwargs, + ) + + def queryDimensionRecords( + self, + element: DimensionElement | str, + *, + dataId: DataId | None = None, + datasets: Any = None, + collections: CollectionArgType | None = None, + where: str = "", + components: bool | None = None, + bind: Mapping[str, Any] | None = None, + check: bool = True, + **kwargs: Any, + ) -> DimensionRecordQueryResults: + # Docstring inherited from a base class. + return self._registry.queryDimensionRecords( + element, + dataId=dataId, + datasets=datasets, + collections=collections, + where=where, + components=components, + bind=bind, + check=check, + **kwargs, + ) + + def queryDatasetAssociations( + self, + datasetType: str | DatasetType, + collections: CollectionArgType | None = ..., + *, + collectionTypes: Iterable[CollectionType] = CollectionType.all(), + flattenChains: bool = False, + ) -> Iterator[DatasetAssociation]: + # Docstring inherited from a base class. + return self._registry.queryDatasetAssociations( + datasetType, + collections, + collectionTypes=collectionTypes, + flattenChains=flattenChains, + ) + + @property + def obsCoreTableManager(self) -> ObsCoreTableManager | None: + # Docstring inherited from a base class. + return self._registry.obsCoreTableManager diff --git a/python/lsst/daf/butler/registries/remote.py b/python/lsst/daf/butler/registries/remote.py index 5297da0011..f9f4959b6f 100644 --- a/python/lsst/daf/butler/registries/remote.py +++ b/python/lsst/daf/butler/registries/remote.py @@ -41,7 +41,6 @@ DataId, DatasetAssociation, DatasetId, - DatasetIdFactory, DatasetIdGenEnum, DatasetRef, DatasetType, @@ -66,7 +65,7 @@ QueryDatasetsModel, QueryDimensionRecordsModel, ) -from ..registry import CollectionSummary, CollectionType, Registry, RegistryConfig, RegistryDefaults +from ..registry import CollectionSummary, CollectionType, RegistryConfig, RegistryDefaults, _ButlerRegistry if TYPE_CHECKING: from .._butlerConfig import ButlerConfig @@ -74,7 +73,7 @@ from ..registry.interfaces import CollectionRecord, DatastoreRegistryBridgeManager -class RemoteRegistry(Registry): +class RemoteRegistry(_ButlerRegistry): """Registry that can talk to a remote Butler server. Parameters @@ -92,8 +91,8 @@ def createFromConfig( config: RegistryConfig | str | None = None, dimensionConfig: DimensionConfig | str | None = None, butlerRoot: ResourcePathExpression | None = None, - ) -> Registry: - """Create registry database and return `Registry` instance. + ) -> _ButlerRegistry: + """Create registry database and return `_ButlerRegistry` instance. A remote registry can not create a registry database. Calling this method will raise an exception. @@ -107,7 +106,7 @@ def fromConfig( butlerRoot: ResourcePathExpression | None = None, writeable: bool = True, defaults: RegistryDefaults | None = None, - ) -> Registry: + ) -> _ButlerRegistry: # Docstring inherited from lsst.daf.butler.registry.Registry config = cls.forceRegistryConfig(config) config.replaceRoot(butlerRoot) @@ -133,10 +132,6 @@ def __init__( self._db = server_uri self._defaults = defaults - # In the future DatasetIdFactory may become configurable and this - # instance will need to be shared with datasets manager. - self.datasetIdFactory = DatasetIdFactory() - # All PUT calls should be short-circuited if not writeable. self._writeable = writeable @@ -167,8 +162,8 @@ def isWriteable(self) -> bool: # Can be used to prevent any PUTs to server return self._writeable - def copy(self, defaults: RegistryDefaults | None = None) -> Registry: - # Docstring inherited from lsst.daf.butler.registry.Registry + def copy(self, defaults: RegistryDefaults | None = None) -> _ButlerRegistry: + # Docstring inherited from lsst.daf.butler.registry._ButlerRegistry if defaults is None: # No need to copy, because `RegistryDefaults` is immutable; we # effectively copy on write. diff --git a/python/lsst/daf/butler/registries/sql.py b/python/lsst/daf/butler/registries/sql.py index a216a7ff2a..dc112eeafb 100644 --- a/python/lsst/daf/butler/registries/sql.py +++ b/python/lsst/daf/butler/registries/sql.py @@ -42,7 +42,6 @@ DatasetAssociation, DatasetColumnTag, DatasetId, - DatasetIdFactory, DatasetIdGenEnum, DatasetRef, DatasetType, @@ -73,10 +72,10 @@ InconsistentDataIdError, NoDefaultCollectionError, OrphanedRecordError, - Registry, RegistryConfig, RegistryConsistencyError, RegistryDefaults, + _ButlerRegistry, queries, ) from ..registry.interfaces import ChainedCollectionRecord, RunRecord @@ -97,7 +96,7 @@ _LOG = logging.getLogger(__name__) -class SqlRegistry(Registry): +class SqlRegistry(_ButlerRegistry): """Registry implementation based on SQLAlchemy. Parameters @@ -122,7 +121,7 @@ def createFromConfig( config: RegistryConfig | str | None = None, dimensionConfig: DimensionConfig | str | None = None, butlerRoot: ResourcePathExpression | None = None, - ) -> Registry: + ) -> _ButlerRegistry: """Create registry database and return `SqlRegistry` instance. This method initializes database contents, database must be empty @@ -169,7 +168,7 @@ def fromConfig( butlerRoot: ResourcePathExpression | None = None, writeable: bool = True, defaults: RegistryDefaults | None = None, - ) -> Registry: + ) -> _ButlerRegistry: """Create `Registry` subclass instance from `config`. Registry database must be initialized prior to calling this method. @@ -215,9 +214,6 @@ def __init__(self, database: Database, defaults: RegistryDefaults, managers: Reg # can only be done after most of the rest of Registry has already been # initialized, and must be done before the property getter is used. self.defaults = defaults - # In the future DatasetIdFactory may become configurable and this - # instance will need to be shared with datasets manager. - self.datasetIdFactory = DatasetIdFactory() def __str__(self) -> str: return str(self._db) @@ -229,7 +225,7 @@ def isWriteable(self) -> bool: # Docstring inherited from lsst.daf.butler.registry.Registry return self._db.isWriteable() - def copy(self, defaults: RegistryDefaults | None = None) -> Registry: + def copy(self, defaults: RegistryDefaults | None = None) -> _ButlerRegistry: # Docstring inherited from lsst.daf.butler.registry.Registry if defaults is None: # No need to copy, because `RegistryDefaults` is immutable; we diff --git a/python/lsst/daf/butler/registry/__init__.py b/python/lsst/daf/butler/registry/__init__.py index e894782f12..c5819d0a24 100644 --- a/python/lsst/daf/butler/registry/__init__.py +++ b/python/lsst/daf/butler/registry/__init__.py @@ -20,6 +20,7 @@ # along with this program. If not, see . from . import interfaces, managers, queries, wildcards +from ._butler_registry import _ButlerRegistry from ._collection_summary import * from ._collectionType import * from ._config import * @@ -27,6 +28,7 @@ from ._defaults import * from ._exceptions import * from ._registry import * +from ._registry_factory import * from .wildcards import CollectionSearch # Some modules intentionally not imported, either because they are purely diff --git a/python/lsst/daf/butler/registry/_butler_registry.py b/python/lsst/daf/butler/registry/_butler_registry.py new file mode 100644 index 0000000000..f0150f320e --- /dev/null +++ b/python/lsst/daf/butler/registry/_butler_registry.py @@ -0,0 +1,211 @@ +# This file is part of daf_butler. +# +# Developed for the LSST Data Management System. +# This product includes software developed by the LSST Project +# (http://www.lsst.org). +# See the COPYRIGHT file at the top-level directory of this distribution +# for details of code ownership. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from __future__ import annotations + +__all__ = ("_ButlerRegistry",) + +from abc import abstractmethod +from typing import TYPE_CHECKING + +from lsst.resources import ResourcePathExpression + +from ..core import Config, DimensionConfig +from ._config import RegistryConfig +from ._defaults import RegistryDefaults +from ._registry import Registry + +if TYPE_CHECKING: + from .._butlerConfig import ButlerConfig + from .interfaces import CollectionRecord, DatastoreRegistryBridgeManager + + +class _ButlerRegistry(Registry): + """Registry interface extended with methods used by Butler implementation. + + Each registry implementation can have its own constructor parameters. + The assumption is that an instance of a specific subclass will be + constructed from configuration using `Registry.fromConfig()`. + The base class will look for a ``cls`` entry and call that specific + `fromConfig()` method. + """ + + defaultConfigFile: str | None = None + """Path to configuration defaults. Accessed within the ``configs`` resource + or relative to a search path. Can be None if no defaults specified. + """ + + @classmethod + def forceRegistryConfig( + cls, config: ButlerConfig | RegistryConfig | Config | str | None + ) -> RegistryConfig: + """Force the supplied config to a `RegistryConfig`. + + Parameters + ---------- + config : `RegistryConfig`, `Config` or `str` or `None` + Registry configuration, if missing then default configuration will + be loaded from registry.yaml. + + Returns + ------- + registry_config : `RegistryConfig` + A registry config. + """ + if not isinstance(config, RegistryConfig): + if isinstance(config, (str, Config)) or config is None: + config = RegistryConfig(config) + else: + raise ValueError(f"Incompatible Registry configuration: {config}") + return config + + @classmethod + @abstractmethod + def createFromConfig( + cls, + config: RegistryConfig | str | None = None, + dimensionConfig: DimensionConfig | str | None = None, + butlerRoot: ResourcePathExpression | None = None, + ) -> _ButlerRegistry: + """Create registry database and return `_ButlerRegistry` instance. + + This method initializes database contents, database must be empty + prior to calling this method. + + Parameters + ---------- + config : `RegistryConfig` or `str`, optional + Registry configuration, if missing then default configuration will + be loaded from registry.yaml. + dimensionConfig : `DimensionConfig` or `str`, optional + Dimensions configuration, if missing then default configuration + will be loaded from dimensions.yaml. + butlerRoot : convertible to `lsst.resources.ResourcePath`, optional + Path to the repository root this `Registry` will manage. + + Returns + ------- + registry : `_ButlerRegistry` + A new `_ButlerRegistry` instance. + + Notes + ----- + This class will determine the concrete `_ButlerRegistry` subclass to + use from configuration. Each subclass should implement this method + even if it can not create a registry. + """ + raise NotImplementedError() + + @classmethod + @abstractmethod + def fromConfig( + cls, + config: ButlerConfig | RegistryConfig | Config | str, + butlerRoot: ResourcePathExpression | None = None, + writeable: bool = True, + defaults: RegistryDefaults | None = None, + ) -> _ButlerRegistry: + """Create `_ButlerRegistry` subclass instance from ``config``. + + Registry database must be initialized prior to calling this method. + + Parameters + ---------- + config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` + Registry configuration + butlerRoot : convertible to `lsst.resources.ResourcePath`, optional + Path to the repository root this `Registry` will manage. + writeable : `bool`, optional + If `True` (default) create a read-write connection to the database. + defaults : `~lsst.daf.butler.registry.RegistryDefaults`, optional + Default collection search path and/or output `~CollectionType.RUN` + collection. + + Returns + ------- + registry : `_ButlerRegistry` (subclass) + A new `_ButlerRegistry` subclass instance. + + Notes + ----- + This class will determine the concrete `_ButlerRegistry` subclass to + use from configuration. Each subclass should implement this method. + """ + # The base class implementation should trampoline to the correct + # subclass. No implementation should ever use this implementation + # directly. If no class is specified, default to the standard + # registry. + raise NotImplementedError() + + @abstractmethod + def copy(self, defaults: RegistryDefaults | None = None) -> _ButlerRegistry: + """Create a new `_ButlerRegistry` backed by the same data repository + and connection as this one, but independent defaults. + + Parameters + ---------- + defaults : `~lsst.daf.butler.registry.RegistryDefaults`, optional + Default collections and data ID values for the new registry. If + not provided, ``self.defaults`` will be used (but future changes + to either registry's defaults will not affect the other). + + Returns + ------- + copy : `_ButlerRegistry` + A new `_ButlerRegistry` instance with its own defaults. + + Notes + ----- + Because the new registry shares a connection with the original, they + also share transaction state (despite the fact that their `transaction` + context manager methods do not reflect this), and must be used with + care. + """ + raise NotImplementedError() + + @abstractmethod + def _get_collection_record(self, name: str) -> CollectionRecord: + """Return the record for this collection. + + Parameters + ---------- + name : `str` + Name of the collection for which the record is to be retrieved. + + Returns + ------- + record : `CollectionRecord` + The record for this collection. + """ + raise NotImplementedError() + + @abstractmethod + def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: + """Return an object that allows a new `Datastore` instance to + communicate with this `Registry`. + + Returns + ------- + manager : `~.interfaces.DatastoreRegistryBridgeManager` + Object that mediates communication between this `Registry` and its + associated datastores. + """ + raise NotImplementedError() diff --git a/python/lsst/daf/butler/registry/_registry.py b/python/lsst/daf/butler/registry/_registry.py index 8ca9cdadcf..b97c62a1d4 100644 --- a/python/lsst/daf/butler/registry/_registry.py +++ b/python/lsst/daf/butler/registry/_registry.py @@ -32,15 +32,12 @@ from typing import TYPE_CHECKING, Any from lsst.resources import ResourcePathExpression -from lsst.utils import doImportType from ..core import ( - Config, DataCoordinate, DataId, DatasetAssociation, DatasetId, - DatasetIdFactory, DatasetIdGenEnum, DatasetRef, DatasetType, @@ -62,8 +59,7 @@ from .wildcards import CollectionWildcard if TYPE_CHECKING: - from .._butlerConfig import ButlerConfig - from .interfaces import CollectionRecord, DatastoreRegistryBridgeManager, ObsCoreTableManager + from .interfaces import ObsCoreTableManager _LOG = logging.getLogger(__name__) @@ -74,78 +70,11 @@ class Registry(ABC): """Abstract Registry interface. - Each registry implementation can have its own constructor parameters. - The assumption is that an instance of a specific subclass will be - constructed from configuration using `Registry.fromConfig()`. - The base class will look for a ``cls`` entry and call that specific - `fromConfig()` method. - All subclasses should store `~lsst.daf.butler.registry.RegistryDefaults` in a ``_defaults`` property. No other properties are assumed shared between implementations. """ - defaultConfigFile: str | None = None - """Path to configuration defaults. Accessed within the ``configs`` resource - or relative to a search path. Can be None if no defaults specified. - """ - - @classmethod - def forceRegistryConfig( - cls, config: ButlerConfig | RegistryConfig | Config | str | None - ) -> RegistryConfig: - """Force the supplied config to a `RegistryConfig`. - - Parameters - ---------- - config : `RegistryConfig`, `Config` or `str` or `None` - Registry configuration, if missing then default configuration will - be loaded from registry.yaml. - - Returns - ------- - registry_config : `RegistryConfig` - A registry config. - """ - if not isinstance(config, RegistryConfig): - if isinstance(config, (str, Config)) or config is None: - config = RegistryConfig(config) - else: - raise ValueError(f"Incompatible Registry configuration: {config}") - return config - - @classmethod - def determineTrampoline( - cls, config: ButlerConfig | RegistryConfig | Config | str | None - ) -> tuple[type[Registry], RegistryConfig]: - """Return class to use to instantiate real registry. - - Parameters - ---------- - config : `RegistryConfig` or `str`, optional - Registry configuration, if missing then default configuration will - be loaded from registry.yaml. - - Returns - ------- - requested_cls : `type` of `Registry` - The real registry class to use. - registry_config : `RegistryConfig` - The `RegistryConfig` to use. - """ - config = cls.forceRegistryConfig(config) - - # Default to the standard registry - registry_cls_name = config.get("cls", "lsst.daf.butler.registries.sql.SqlRegistry") - registry_cls = doImportType(registry_cls_name) - if registry_cls is cls: - raise ValueError("Can not instantiate the abstract base Registry from config") - if not issubclass(registry_cls, Registry): - raise TypeError( - f"Registry class obtained from config {registry_cls_name} is not a Registry class." - ) - return registry_cls, config - @classmethod def createFromConfig( cls, @@ -176,53 +105,14 @@ def createFromConfig( Notes ----- - This class will determine the concrete `Registry` subclass to - use from configuration. Each subclass should implement this method - even if it can not create a registry. + This method is for backward compatibility only, until all clients + migrate to use new `~lsst.daf.butler.registry._RegistryFactory` factory + class. Regular clients of registry class do not use this method, it is + only used by tests in multiple packages. """ - registry_cls, registry_config = cls.determineTrampoline(config) - return registry_cls.createFromConfig(registry_config, dimensionConfig, butlerRoot) - - @classmethod - def fromConfig( - cls, - config: ButlerConfig | RegistryConfig | Config | str, - butlerRoot: ResourcePathExpression | None = None, - writeable: bool = True, - defaults: RegistryDefaults | None = None, - ) -> Registry: - """Create `Registry` subclass instance from ``config``. - - Registry database must be initialized prior to calling this method. - - Parameters - ---------- - config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` - Registry configuration - butlerRoot : convertible to `lsst.resources.ResourcePath`, optional - Path to the repository root this `Registry` will manage. - writeable : `bool`, optional - If `True` (default) create a read-write connection to the database. - defaults : `~lsst.daf.butler.registry.RegistryDefaults`, optional - Default collection search path and/or output `~CollectionType.RUN` - collection. + from ._registry_factory import _RegistryFactory - Returns - ------- - registry : `Registry` (subclass) - A new `Registry` subclass instance. - - Notes - ----- - This class will determine the concrete `Registry` subclass to - use from configuration. Each subclass should implement this method. - """ - # The base class implementation should trampoline to the correct - # subclass. No implementation should ever use this implementation - # directly. If no class is specified, default to the standard - # registry. - registry_cls, registry_config = cls.determineTrampoline(config) - return registry_cls.fromConfig(config, butlerRoot, writeable, defaults) + return _RegistryFactory(config).create_from_config(dimensionConfig, butlerRoot) @abstractmethod def isWriteable(self) -> bool: @@ -231,32 +121,6 @@ def isWriteable(self) -> bool: """ raise NotImplementedError() - @abstractmethod - def copy(self, defaults: RegistryDefaults | None = None) -> Registry: - """Create a new `Registry` backed by the same data repository and - connection as this one, but independent defaults. - - Parameters - ---------- - defaults : `~lsst.daf.butler.registry.RegistryDefaults`, optional - Default collections and data ID values for the new registry. If - not provided, ``self.defaults`` will be used (but future changes - to either registry's defaults will not affect the other). - - Returns - ------- - copy : `Registry` - A new `Registry` instance with its own defaults. - - Notes - ----- - Because the new registry shares a connection with the original, they - also share transaction state (despite the fact that their `transaction` - context manager methods do not reflect this), and must be used with - care. - """ - raise NotImplementedError() - @property @abstractmethod def dimensions(self) -> DimensionUniverse: @@ -360,22 +224,6 @@ def getCollectionType(self, name: str) -> CollectionType: """ raise NotImplementedError() - @abstractmethod - def _get_collection_record(self, name: str) -> CollectionRecord: - """Return the record for this collection. - - Parameters - ---------- - name : `str` - Name of the collection for which the record is to be retrieved. - - Returns - ------- - record : `CollectionRecord` - The record for this collection. - """ - raise NotImplementedError() - @abstractmethod def registerRun(self, name: str, doc: str | None = None) -> bool: """Add a new run if one with the given name does not exist. @@ -1017,19 +865,6 @@ def decertify( """ raise NotImplementedError() - @abstractmethod - def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: - """Return an object that allows a new `Datastore` instance to - communicate with this `Registry`. - - Returns - ------- - manager : `~.interfaces.DatastoreRegistryBridgeManager` - Object that mediates communication between this `Registry` and its - associated datastores. - """ - raise NotImplementedError() - @abstractmethod def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: """Retrieve datastore locations for a given dataset. @@ -1678,6 +1513,3 @@ def obsCoreTableManager(self) -> ObsCoreTableManager | None: storageClasses: StorageClassFactory """All storage classes known to the registry (`StorageClassFactory`). """ - - datasetIdFactory: DatasetIdFactory - """Factory for dataset IDs.""" diff --git a/python/lsst/daf/butler/registry/_registry_factory.py b/python/lsst/daf/butler/registry/_registry_factory.py new file mode 100644 index 0000000000..74bb9291df --- /dev/null +++ b/python/lsst/daf/butler/registry/_registry_factory.py @@ -0,0 +1,128 @@ +# This file is part of daf_butler. +# +# Developed for the LSST Data Management System. +# This product includes software developed by the LSST Project +# (http://www.lsst.org). +# See the COPYRIGHT file at the top-level directory of this distribution +# for details of code ownership. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from __future__ import annotations + +__all__ = ("_RegistryFactory",) + +from typing import TYPE_CHECKING + +from lsst.resources import ResourcePathExpression +from lsst.utils import doImportType + +from ..core import Config, DimensionConfig +from ._butler_registry import _ButlerRegistry +from ._config import RegistryConfig +from ._defaults import RegistryDefaults + +if TYPE_CHECKING: + from .._butlerConfig import ButlerConfig + + +class _RegistryFactory: + """Interface for creating and initializing Registry instances. + + Parameters + ---------- + config : `RegistryConfig` or `str`, optional + Registry configuration, if missing then default configuration will + be loaded from registry.yaml. + + Notes + ----- + Each registry implementation can have its own constructor parameters. + The assumption is that an instance of a specific subclass will be + constructed from configuration using ``RegistryClass.fromConfig()`` or + ``RegistryClass.createFromConfig()``. + + This class will look for a ``cls`` entry in registry configuration object + (defaulting to ``SqlRegistry``), import that class, and call one of the + above methods on the imported class. + """ + + def __init__(self, config: ButlerConfig | RegistryConfig | Config | str | None): + if not isinstance(config, RegistryConfig): + if isinstance(config, (str, Config)) or config is None: + config = RegistryConfig(config) + else: + raise ValueError(f"Incompatible Registry configuration: {config}") + self._config = config + + # Default to the standard registry + registry_cls_name = config.get("cls", "lsst.daf.butler.registries.sql.SqlRegistry") + registry_cls = doImportType(registry_cls_name) + if not issubclass(registry_cls, _ButlerRegistry): + raise TypeError( + f"Registry class obtained from config {registry_cls_name} is not a _ButlerRegistry class." + ) + self._registry_cls = registry_cls + + def create_from_config( + self, + dimensionConfig: DimensionConfig | str | None = None, + butlerRoot: ResourcePathExpression | None = None, + ) -> _ButlerRegistry: + """Create registry database and return `_ButlerRegistry` instance. + + This method initializes database contents, database must be empty + prior to calling this method. + + Parameters + ---------- + dimensionConfig : `DimensionConfig` or `str`, optional + Dimensions configuration, if missing then default configuration + will be loaded from dimensions.yaml. + butlerRoot : convertible to `lsst.resources.ResourcePath`, optional + Path to the repository root this `Registry` will manage. + + Returns + ------- + registry : `_ButlerRegistry` + A new `_ButlerRegistry` instance. + """ + return self._registry_cls.createFromConfig(self._config, dimensionConfig, butlerRoot) + + def from_config( + self, + butlerRoot: ResourcePathExpression | None = None, + writeable: bool = True, + defaults: RegistryDefaults | None = None, + ) -> _ButlerRegistry: + """Create `_ButlerRegistry` subclass instance from ``config``. + + Registry database must be initialized prior to calling this method. + + Parameters + ---------- + butlerRoot : convertible to `lsst.resources.ResourcePath`, optional + Path to the repository root this `Registry` will manage. + writeable : `bool`, optional + If `True` (default) create a read-write connection to the database. + defaults : `~lsst.daf.butler.registry.RegistryDefaults`, optional + Default collection search path and/or output `~CollectionType.RUN` + collection. + + Returns + ------- + registry : `_ButlerRegistry` (subclass) + A new `_ButlerRegistry` subclass instance. + """ + return self._registry_cls.fromConfig(self._config, butlerRoot, writeable, defaults) diff --git a/python/lsst/daf/butler/registry/tests/_registry.py b/python/lsst/daf/butler/registry/tests/_registry.py index 435f295690..558ae01f5f 100644 --- a/python/lsst/daf/butler/registry/tests/_registry.py +++ b/python/lsst/daf/butler/registry/tests/_registry.py @@ -49,6 +49,7 @@ DataCoordinate, DataCoordinateSet, DatasetAssociation, + DatasetIdFactory, DatasetIdGenEnum, DatasetRef, DatasetType, @@ -3090,7 +3091,7 @@ def testDatasetIdFactory(self): in its API. """ registry = self.makeRegistry() - factory = registry.datasetIdFactory + factory = DatasetIdFactory() dataset_type = DatasetType( "datasetType", dimensions=["detector", "instrument"], diff --git a/python/lsst/daf/butler/tests/_datasetsHelper.py b/python/lsst/daf/butler/tests/_datasetsHelper.py index ff7e7f0b6a..c7be535714 100644 --- a/python/lsst/daf/butler/tests/_datasetsHelper.py +++ b/python/lsst/daf/butler/tests/_datasetsHelper.py @@ -37,7 +37,8 @@ from lsst.daf.butler.formatters.yaml import YamlFormatter if TYPE_CHECKING: - from lsst.daf.butler import Config, DatasetId, Datastore, Dimension, DimensionGraph, Registry + from lsst.daf.butler import Config, DatasetId, Datastore, Dimension, DimensionGraph + from lsst.daf.butler.registry import _ButlerRegistry class DatasetTestHelper: @@ -101,7 +102,7 @@ class DatastoreTestHelper: datastoreType: type[Datastore] configFile: str - def setUpDatastoreTests(self, registryClass: type[Registry], configClass: type[Config]) -> None: + def setUpDatastoreTests(self, registryClass: type[_ButlerRegistry], configClass: type[Config]) -> None: """Shared setUp code for all Datastore tests.""" self.registry = registryClass() self.config = configClass(self.configFile) diff --git a/python/lsst/daf/butler/transfers/_context.py b/python/lsst/daf/butler/transfers/_context.py index 9b41e83270..8e71516316 100644 --- a/python/lsst/daf/butler/transfers/_context.py +++ b/python/lsst/daf/butler/transfers/_context.py @@ -38,7 +38,7 @@ DimensionRecord, FileDataset, ) -from ..registry import CollectionType, Registry +from ..registry import CollectionType, _ButlerRegistry from ..registry.interfaces import ChainedCollectionRecord, CollectionRecord from ._interfaces import RepoExportBackend @@ -58,7 +58,7 @@ class RepoExportContext: Parameters ---------- - registry : `Registry` + registry : `_ButlerRegistry` Registry to export from. datastore : `Datastore` Datastore to export from. @@ -73,7 +73,7 @@ class RepoExportContext: def __init__( self, - registry: Registry, + registry: _ButlerRegistry, datastore: Datastore, backend: RepoExportBackend, *, diff --git a/tests/test_butler.py b/tests/test_butler.py index 605f986227..307cad7573 100644 --- a/tests/test_butler.py +++ b/tests/test_butler.py @@ -1679,8 +1679,8 @@ def testPytypeCoercion(self) -> None: # Now need to hack the registry dataset type definition. # There is no API for this. - assert isinstance(butler.registry, SqlRegistry) - manager = butler.registry._managers.datasets + assert isinstance(butler._registry, SqlRegistry) + manager = butler._registry._managers.datasets assert hasattr(manager, "_db") and hasattr(manager, "_static") manager._db.update( manager._static.dataset_type, diff --git a/tests/test_dimensions.py b/tests/test_dimensions.py index 14767065d0..291ede05c3 100644 --- a/tests/test_dimensions.py +++ b/tests/test_dimensions.py @@ -42,11 +42,10 @@ DimensionUniverse, NamedKeyDict, NamedValueSet, - Registry, TimespanDatabaseRepresentation, YamlRepoImportBackend, ) -from lsst.daf.butler.registry import RegistryConfig +from lsst.daf.butler.registry import RegistryConfig, _RegistryFactory DIMENSION_DATA_FILE = os.path.normpath( os.path.join(os.path.dirname(__file__), "data", "registry", "hsc-rc2-subset.yaml") @@ -65,7 +64,7 @@ def loadDimensionData() -> DataCoordinateSequence: # data and retreive it as a set of DataCoordinate objects. config = RegistryConfig() config["db"] = "sqlite://" - registry = Registry.createFromConfig(config) + registry = _RegistryFactory(config).create_from_config() with open(DIMENSION_DATA_FILE) as stream: backend = YamlRepoImportBackend(stream, registry) backend.register() diff --git a/tests/test_obscore.py b/tests/test_obscore.py index d50b5b620d..bf213496f3 100644 --- a/tests/test_obscore.py +++ b/tests/test_obscore.py @@ -33,13 +33,12 @@ CollectionType, Config, DataCoordinate, - DatasetIdGenEnum, DatasetRef, DatasetType, StorageClassFactory, ) from lsst.daf.butler.registries.sql import SqlRegistry -from lsst.daf.butler.registry import Registry, RegistryConfig +from lsst.daf.butler.registry import Registry, RegistryConfig, _ButlerRegistry, _RegistryFactory from lsst.daf.butler.registry.obscore import ( DatasetTypeConfig, ObsCoreConfig, @@ -65,10 +64,10 @@ class ObsCoreTests(TestCaseMixin): def make_registry( self, collections: list[str] | None = None, collection_type: str | None = None - ) -> Registry: + ) -> _ButlerRegistry: """Create new empty Registry.""" config = self.make_registry_config(collections, collection_type) - registry = Registry.createFromConfig(config, butlerRoot=self.root) + registry = _RegistryFactory(config).create_from_config(butlerRoot=self.root) self.initialize_registry(registry) return registry @@ -225,10 +224,7 @@ def _insert_dataset( coordinate = DataCoordinate.standardize(data_id, universe=registry.dimensions) if do_import: ds_type = self.dataset_types[dataset_type] - dataset_id = registry.datasetIdFactory.makeDatasetId( - run, ds_type, coordinate, DatasetIdGenEnum.UNIQUE - ) - ref = DatasetRef(ds_type, coordinate, id=dataset_id, run=run) + ref = DatasetRef(ds_type, coordinate, run=run) [ref] = registry._importDatasets([ref]) else: [ref] = registry.insertDatasets(dataset_type, [data_id], run=run) diff --git a/tests/test_postgresql.py b/tests/test_postgresql.py index 78dc6c2308..b1e5d573bc 100644 --- a/tests/test_postgresql.py +++ b/tests/test_postgresql.py @@ -40,7 +40,7 @@ import sqlalchemy from lsst.daf.butler import Timespan, ddl -from lsst.daf.butler.registry import Registry +from lsst.daf.butler.registry import _ButlerRegistry, _RegistryFactory from lsst.daf.butler.registry.databases.postgresql import PostgresqlDatabase, _RangeTimespanType from lsst.daf.butler.registry.tests import DatabaseTests, RegistryTests from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir @@ -236,7 +236,7 @@ def tearDownClass(cls): def getDataDir(cls) -> str: return os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) - def makeRegistry(self, share_repo_with: Registry | None = None) -> Registry: + def makeRegistry(self, share_repo_with: _ButlerRegistry | None = None) -> _ButlerRegistry: if share_repo_with is None: namespace = f"namespace_{secrets.token_hex(8).lower()}" else: @@ -245,9 +245,9 @@ def makeRegistry(self, share_repo_with: Registry | None = None) -> Registry: config["db"] = self.server.url() config["namespace"] = namespace if share_repo_with is None: - return Registry.createFromConfig(config) + return _RegistryFactory(config).create_from_config() else: - return Registry.fromConfig(config) + return _RegistryFactory(config).from_config() class PostgresqlRegistryNameKeyCollMgrUUIDTestCase(PostgresqlRegistryTests, unittest.TestCase): diff --git a/tests/test_quantumBackedButler.py b/tests/test_quantumBackedButler.py index 18c6c898e7..486fe2113d 100644 --- a/tests/test_quantumBackedButler.py +++ b/tests/test_quantumBackedButler.py @@ -34,10 +34,10 @@ Quantum, QuantumBackedButler, QuantumProvenanceData, - Registry, RegistryConfig, StorageClass, ) +from lsst.daf.butler.registry import _RegistryFactory from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir from lsst.resources import ResourcePath @@ -55,7 +55,7 @@ def setUp(self) -> None: # Make a butler and import dimension definitions. registryConfig = RegistryConfig(self.config.get("registry")) - Registry.createFromConfig(registryConfig, butlerRoot=self.root) + _RegistryFactory(registryConfig).create_from_config(butlerRoot=self.root) self.butler = Butler(self.config, writeable=True, run="RUN") self.butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) diff --git a/tests/test_query_relations.py b/tests/test_query_relations.py index ffead5de84..096a7aa7b8 100644 --- a/tests/test_query_relations.py +++ b/tests/test_query_relations.py @@ -25,7 +25,7 @@ import re import unittest -from lsst.daf.butler.registry import MissingSpatialOverlapError, Registry, RegistryConfig, queries +from lsst.daf.butler.registry import MissingSpatialOverlapError, RegistryConfig, _RegistryFactory, queries from lsst.daf.butler.transfers import YamlRepoImportBackend TESTDIR = os.path.abspath(os.path.dirname(__file__)) @@ -52,7 +52,7 @@ class TestQueryRelationsTests(unittest.TestCase): def setUpClass(cls) -> None: config = RegistryConfig() config["db"] = "sqlite://" - cls.registry = Registry.createFromConfig(config) + cls.registry = _RegistryFactory(config).create_from_config() # We need just enough test data to have valid dimension records for # all of the dimensions we're concerned with, and we want to pick # values for each dimension that correspond to a spatiotemporal diff --git a/tests/test_simpleButler.py b/tests/test_simpleButler.py index 2a25317f05..87d8dc08d8 100644 --- a/tests/test_simpleButler.py +++ b/tests/test_simpleButler.py @@ -34,17 +34,8 @@ np = None import astropy.time -from lsst.daf.butler import ( - Butler, - ButlerConfig, - CollectionType, - DatasetId, - DatasetRef, - DatasetType, - Registry, - Timespan, -) -from lsst.daf.butler.registry import RegistryConfig, RegistryDefaults +from lsst.daf.butler import Butler, ButlerConfig, CollectionType, DatasetId, DatasetRef, DatasetType, Timespan +from lsst.daf.butler.registry import RegistryConfig, RegistryDefaults, _RegistryFactory from lsst.daf.butler.tests import DatastoreMock from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir @@ -80,7 +71,7 @@ def makeButler(self, **kwargs: Any) -> Butler: # have to make a registry first registryConfig = RegistryConfig(config.get("registry")) - Registry.createFromConfig(registryConfig) + _RegistryFactory(registryConfig).create_from_config() butler = Butler(config, **kwargs) DatastoreMock.apply(butler) @@ -286,7 +277,6 @@ def testButlerGet(self): # Create a numpy integer to check that works fine detector_np = np.int64(2) if np else 2 - print(type(detector_np)) # Try to get it using different variations of dataId + keyword # arguments diff --git a/tests/test_sqlite.py b/tests/test_sqlite.py index bf76af233e..7081354234 100644 --- a/tests/test_sqlite.py +++ b/tests/test_sqlite.py @@ -28,7 +28,7 @@ import sqlalchemy from lsst.daf.butler import ddl -from lsst.daf.butler.registry import Registry +from lsst.daf.butler.registry import _ButlerRegistry, _RegistryFactory from lsst.daf.butler.registry.databases.sqlite import SqliteDatabase from lsst.daf.butler.registry.tests import DatabaseTests, RegistryTests from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir @@ -196,7 +196,7 @@ def tearDown(self): def getDataDir(cls) -> str: return os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) - def makeRegistry(self, share_repo_with: Registry | None = None) -> Registry: + def makeRegistry(self, share_repo_with: _ButlerRegistry | None = None) -> _ButlerRegistry: if share_repo_with is None: _, filename = tempfile.mkstemp(dir=self.root, suffix=".sqlite3") else: @@ -204,9 +204,9 @@ def makeRegistry(self, share_repo_with: Registry | None = None) -> Registry: config = self.makeRegistryConfig() config["db"] = f"sqlite:///{filename}" if share_repo_with is None: - return Registry.createFromConfig(config, butlerRoot=self.root) + return _RegistryFactory(config).create_from_config(butlerRoot=self.root) else: - return Registry.fromConfig(config, butlerRoot=self.root) + return _RegistryFactory(config).from_config(butlerRoot=self.root) class SqliteFileRegistryNameKeyCollMgrUUIDTestCase(SqliteFileRegistryTests, unittest.TestCase): @@ -242,12 +242,12 @@ class SqliteMemoryRegistryTests(RegistryTests): def getDataDir(cls) -> str: return os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) - def makeRegistry(self, share_repo_with: Registry | None = None) -> Registry | None: + def makeRegistry(self, share_repo_with: _ButlerRegistry | None = None) -> _ButlerRegistry | None: if share_repo_with is not None: return None config = self.makeRegistryConfig() config["db"] = "sqlite://" - return Registry.createFromConfig(config) + return _RegistryFactory(config).create_from_config() def testMissingAttributes(self): """Test for instantiating a registry against outdated schema which @@ -258,7 +258,7 @@ def testMissingAttributes(self): config = self.makeRegistryConfig() config["db"] = "sqlite://" with self.assertRaises(LookupError): - Registry.fromConfig(config) + _RegistryFactory(config).from_config() class SqliteMemoryRegistryNameKeyCollMgrUUIDTestCase(unittest.TestCase, SqliteMemoryRegistryTests):