Skip to content

Commit

Permalink
Convert integer ids to UUID early
Browse files Browse the repository at this point in the history
Downstream code now depends on refs holding UUIDs. Have the yaml
loader convert old style integer ids to UUIDs early rather than
waiting for downstream cleanups.
  • Loading branch information
natelust committed Jun 27, 2023
1 parent 97137a2 commit 93e3286
Showing 1 changed file with 25 additions and 5 deletions.
30 changes: 25 additions & 5 deletions python/lsst/daf/butler/transfers/_yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from collections import defaultdict
from collections.abc import Iterable, Mapping
from datetime import datetime
from typing import IO, TYPE_CHECKING, Any
from typing import IO, TYPE_CHECKING, Any, cast

import astropy.time
import yaml
Expand Down Expand Up @@ -64,6 +64,8 @@
this version of the code.
"""

_refIntId2UUID = defaultdict[int, uuid.UUID](uuid.uuid4)


def _uuid_representer(dumper: yaml.Dumper, data: uuid.UUID) -> yaml.Node:
"""Generate YAML representation for UUID.
Expand Down Expand Up @@ -338,16 +340,27 @@ def __init__(self, stream: IO, registry: Registry):
elif data["type"] == "associations":
collectionType = CollectionType.from_name(data["collection_type"])
if collectionType is CollectionType.TAGGED:
self.tagAssociations[data["collection"]].extend(data["dataset_ids"])
self.tagAssociations[data["collection"]].extend(
[
x if not isinstance(x, int) else cast(DatasetId, _refIntId2UUID[x])
for x in data["dataset_ids"]
]
)
elif collectionType is CollectionType.CALIBRATION:
assocsByTimespan = self.calibAssociations[data["collection"]]
for d in data["validity_ranges"]:
if "timespan" in d:
assocsByTimespan[d["timespan"]] = d["dataset_ids"]
assocsByTimespan[d["timespan"]] = [
x if not isinstance(x, int) else cast(DatasetId, _refIntId2UUID[x])
for x in d["dataset_ids"]
]
else:
# TODO: this is for backward compatibility, should
# be removed at some point.
assocsByTimespan[Timespan(begin=d["begin"], end=d["end"])] = d["dataset_ids"]
assocsByTimespan[Timespan(begin=d["begin"], end=d["end"])] = [
x if not isinstance(x, int) else cast(DatasetId, _refIntId2UUID[x])
for x in d["dataset_ids"]
]
else:
raise ValueError(f"Unexpected calibration type for association: {collectionType.name}.")
else:
Expand All @@ -362,7 +375,14 @@ def __init__(self, stream: IO, registry: Registry):
FileDataset(
d.get("path"),
[
DatasetRef(datasetType, dataId, run=data["run"], id=refid)
DatasetRef(
datasetType,
dataId,
run=data["run"],
id=refid
if not isinstance(refid, int)
else cast(DatasetId, _refIntId2UUID[refid]),
)
for dataId, refid in zip(
ensure_iterable(d["data_id"]), ensure_iterable(d["dataset_id"])
)
Expand Down

0 comments on commit 93e3286

Please sign in to comment.