From 93e3286a36c6cdc942daa3d6707c0abbdeaa783c Mon Sep 17 00:00:00 2001 From: Nate Lust Date: Tue, 27 Jun 2023 14:55:04 -0400 Subject: [PATCH] Convert integer ids to UUID early Downstream code now depends on refs holding UUIDs. Have the yaml loader convert old style integer ids to UUIDs early rather than waiting for downstream cleanups. --- python/lsst/daf/butler/transfers/_yaml.py | 30 +++++++++++++++++++---- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/python/lsst/daf/butler/transfers/_yaml.py b/python/lsst/daf/butler/transfers/_yaml.py index 25ef39e04a..e1309ef72f 100644 --- a/python/lsst/daf/butler/transfers/_yaml.py +++ b/python/lsst/daf/butler/transfers/_yaml.py @@ -28,7 +28,7 @@ from collections import defaultdict from collections.abc import Iterable, Mapping from datetime import datetime -from typing import IO, TYPE_CHECKING, Any +from typing import IO, TYPE_CHECKING, Any, cast import astropy.time import yaml @@ -64,6 +64,8 @@ this version of the code. """ +_refIntId2UUID = defaultdict[int, uuid.UUID](uuid.uuid4) + def _uuid_representer(dumper: yaml.Dumper, data: uuid.UUID) -> yaml.Node: """Generate YAML representation for UUID. @@ -338,16 +340,27 @@ def __init__(self, stream: IO, registry: Registry): elif data["type"] == "associations": collectionType = CollectionType.from_name(data["collection_type"]) if collectionType is CollectionType.TAGGED: - self.tagAssociations[data["collection"]].extend(data["dataset_ids"]) + self.tagAssociations[data["collection"]].extend( + [ + x if not isinstance(x, int) else cast(DatasetId, _refIntId2UUID[x]) + for x in data["dataset_ids"] + ] + ) elif collectionType is CollectionType.CALIBRATION: assocsByTimespan = self.calibAssociations[data["collection"]] for d in data["validity_ranges"]: if "timespan" in d: - assocsByTimespan[d["timespan"]] = d["dataset_ids"] + assocsByTimespan[d["timespan"]] = [ + x if not isinstance(x, int) else cast(DatasetId, _refIntId2UUID[x]) + for x in d["dataset_ids"] + ] else: # TODO: this is for backward compatibility, should # be removed at some point. - assocsByTimespan[Timespan(begin=d["begin"], end=d["end"])] = d["dataset_ids"] + assocsByTimespan[Timespan(begin=d["begin"], end=d["end"])] = [ + x if not isinstance(x, int) else cast(DatasetId, _refIntId2UUID[x]) + for x in d["dataset_ids"] + ] else: raise ValueError(f"Unexpected calibration type for association: {collectionType.name}.") else: @@ -362,7 +375,14 @@ def __init__(self, stream: IO, registry: Registry): FileDataset( d.get("path"), [ - DatasetRef(datasetType, dataId, run=data["run"], id=refid) + DatasetRef( + datasetType, + dataId, + run=data["run"], + id=refid + if not isinstance(refid, int) + else cast(DatasetId, _refIntId2UUID[refid]), + ) for dataId, refid in zip( ensure_iterable(d["data_id"]), ensure_iterable(d["dataset_id"]) )