From c88dc014ac3131ae55c5ca7af4078d776c7017d9 Mon Sep 17 00:00:00 2001 From: Jim Bosch Date: Wed, 16 Aug 2023 16:38:58 -0400 Subject: [PATCH] Deduplicate when merging DatastoreRecordData and document preconditions. --- python/lsst/daf/butler/core/datastore.py | 6 +++++- .../lsst/daf/butler/core/datastoreRecordData.py | 15 ++++++++++++--- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/python/lsst/daf/butler/core/datastore.py b/python/lsst/daf/butler/core/datastore.py index 2123e672ec..abb05423b7 100644 --- a/python/lsst/daf/butler/core/datastore.py +++ b/python/lsst/daf/butler/core/datastore.py @@ -1166,6 +1166,9 @@ def import_records( Implementations are responsible for calling `DatastoreRegistryBridge.insert` on all datasets in ``data.locations`` where the key is in `names`, as well as loading any opaque table data. + + Implementations may assume that datasets are either fully present or + not at all (single-component exports are not permitted). """ raise NotImplementedError() @@ -1181,7 +1184,8 @@ def export_records( ---------- refs : `~collections.abc.Iterable` [ `DatasetIdRef` ] Datasets to save. This may include datasets not known to this - datastore, which should be ignored. + datastore, which should be ignored. May not include component + datasets. Returns ------- diff --git a/python/lsst/daf/butler/core/datastoreRecordData.py b/python/lsst/daf/butler/core/datastoreRecordData.py index 93ae3667b2..a9b6421dfc 100644 --- a/python/lsst/daf/butler/core/datastoreRecordData.py +++ b/python/lsst/daf/butler/core/datastoreRecordData.py @@ -114,16 +114,25 @@ def update(self, other: DatastoreRecordData) -> None: Parameters ---------- other : `DatastoreRecordData` - Records tho merge into this instance. + Records to merge into this instance. Notes ----- - Merged instances can not have identical records. + If a ``(dataset_id, table_name)`` combination has any records in + ``self``, it is assumed that all records for that combination are + already present. This allows duplicates of the same dataset to be + handled gracefully. """ for dataset_id, table_records in other.records.items(): this_table_records = self.records.setdefault(dataset_id, {}) for table_name, records in table_records.items(): - this_table_records.setdefault(table_name, []).extend(records) + # If this (dataset_id, table_name) combination already has + # records in `self`, we assume that means all of the records + # for that combination; we require other code to ensure entire + # (parent) datasets are exported to these data structures + # (never components). + if not (this_records := this_table_records.setdefault(table_name, [])): + this_records.extend(records) def subset(self, dataset_ids: set[DatasetId]) -> DatastoreRecordData | None: """Extract a subset of the records that match given dataset IDs.