diff --git a/python/lsst/daf/butler/transfers/_yaml.py b/python/lsst/daf/butler/transfers/_yaml.py index 25ef39e04a..e1309ef72f 100644 --- a/python/lsst/daf/butler/transfers/_yaml.py +++ b/python/lsst/daf/butler/transfers/_yaml.py @@ -28,7 +28,7 @@ from collections import defaultdict from collections.abc import Iterable, Mapping from datetime import datetime -from typing import IO, TYPE_CHECKING, Any +from typing import IO, TYPE_CHECKING, Any, cast import astropy.time import yaml @@ -64,6 +64,8 @@ this version of the code. """ +_refIntId2UUID = defaultdict[int, uuid.UUID](uuid.uuid4) + def _uuid_representer(dumper: yaml.Dumper, data: uuid.UUID) -> yaml.Node: """Generate YAML representation for UUID. @@ -338,16 +340,27 @@ def __init__(self, stream: IO, registry: Registry): elif data["type"] == "associations": collectionType = CollectionType.from_name(data["collection_type"]) if collectionType is CollectionType.TAGGED: - self.tagAssociations[data["collection"]].extend(data["dataset_ids"]) + self.tagAssociations[data["collection"]].extend( + [ + x if not isinstance(x, int) else cast(DatasetId, _refIntId2UUID[x]) + for x in data["dataset_ids"] + ] + ) elif collectionType is CollectionType.CALIBRATION: assocsByTimespan = self.calibAssociations[data["collection"]] for d in data["validity_ranges"]: if "timespan" in d: - assocsByTimespan[d["timespan"]] = d["dataset_ids"] + assocsByTimespan[d["timespan"]] = [ + x if not isinstance(x, int) else cast(DatasetId, _refIntId2UUID[x]) + for x in d["dataset_ids"] + ] else: # TODO: this is for backward compatibility, should # be removed at some point. - assocsByTimespan[Timespan(begin=d["begin"], end=d["end"])] = d["dataset_ids"] + assocsByTimespan[Timespan(begin=d["begin"], end=d["end"])] = [ + x if not isinstance(x, int) else cast(DatasetId, _refIntId2UUID[x]) + for x in d["dataset_ids"] + ] else: raise ValueError(f"Unexpected calibration type for association: {collectionType.name}.") else: @@ -362,7 +375,14 @@ def __init__(self, stream: IO, registry: Registry): FileDataset( d.get("path"), [ - DatasetRef(datasetType, dataId, run=data["run"], id=refid) + DatasetRef( + datasetType, + dataId, + run=data["run"], + id=refid + if not isinstance(refid, int) + else cast(DatasetId, _refIntId2UUID[refid]), + ) for dataId, refid in zip( ensure_iterable(d["data_id"]), ensure_iterable(d["dataset_id"]) )