From 56872e47beccbdcc6ab7c7a4cbb16891d3e35d5c Mon Sep 17 00:00:00 2001 From: Nate Lust Date: Mon, 24 Jul 2023 11:07:25 -0400 Subject: [PATCH] Optimizations in QuantumGraph loading --- doc/changes/DM-40121.misc.md | 1 + .../daf/butler/core/dimensions/_records.py | 26 +++++++++++-------- python/lsst/daf/butler/core/quantum.py | 19 +++++++------- 3 files changed, 25 insertions(+), 21 deletions(-) create mode 100644 doc/changes/DM-40121.misc.md diff --git a/doc/changes/DM-40121.misc.md b/doc/changes/DM-40121.misc.md new file mode 100644 index 0000000000..3b1ba56f19 --- /dev/null +++ b/doc/changes/DM-40121.misc.md @@ -0,0 +1 @@ +Added various optimizations to QuatnumGraph loading. diff --git a/python/lsst/daf/butler/core/dimensions/_records.py b/python/lsst/daf/butler/core/dimensions/_records.py index 882ef5938d..314777733e 100644 --- a/python/lsst/daf/butler/core/dimensions/_records.py +++ b/python/lsst/daf/butler/core/dimensions/_records.py @@ -23,6 +23,7 @@ __all__ = ("DimensionRecord", "SerializedDimensionRecord") +from collections.abc import Hashable from typing import TYPE_CHECKING, Any, ClassVar, Optional, Tuple import lsst.sphgeom @@ -170,23 +171,22 @@ def direct( This method should only be called when the inputs are trusted. """ - _recItems = record.items() + # This method requires tuples as values of the mapping, but JSON + # readers will read things in as lists. Be kind and transparently + # transform to tuples + _recItems = {k: v if type(v) != list else tuple(v) for k, v in record.items()} # type: ignore + # Type ignore because the ternary statement seems to confuse mypy # based on conflicting inferred types of v. key = ( definition, - frozenset((k, v if not isinstance(v, list) else tuple(v)) for k, v in _recItems), # type: ignore + frozenset(_recItems.items()), ) cache = PersistenceContextVars.serializedDimensionRecordMapping.get() if cache is not None and (result := cache.get(key)) is not None: return result - # This method requires tuples as values of the mapping, but JSON - # readers will read things in as lists. Be kind and transparently - # transform to tuples - serialized_record = {k: v if type(v) != list else tuple(v) for k, v in record.items()} # type: ignore - - node = cls.model_construct(definition=definition, record=serialized_record) # type: ignore + node = cls.model_construct(definition=definition, record=_recItems) # type: ignore if cache is not None: cache[key] = node @@ -351,6 +351,7 @@ def from_simple( simple: SerializedDimensionRecord, universe: DimensionUniverse | None = None, registry: Registry | None = None, + cacheKey: Hashable | None = None, ) -> DimensionRecord: """Construct a new object from the simplified form. @@ -367,6 +368,10 @@ def from_simple( registry : `lsst.daf.butler.Registry`, optional Registry from which a universe can be extracted. Can be `None` if universe is provided explicitly. + cacheKey : `Hashable` or `None` + If this is not None, it will be used as a key for any cached + reconstruction instead of calculating a value from the serialized + format. Returns ------- @@ -380,12 +385,11 @@ def from_simple( if universe is None: # this is for mypy raise ValueError("Unable to determine a usable universe") - _recItems = simple.record.items() # Type ignore because the ternary statement seems to confuse mypy # based on conflicting inferred types of v. - key = ( + key = cacheKey or ( simple.definition, - frozenset((k, v if not isinstance(v, list) else tuple(v)) for k, v in _recItems), # type: ignore + frozenset(simple.record.items()), # type: ignore ) cache = PersistenceContextVars.dimensionRecords.get() if cache is not None and (result := cache.get(key)) is not None: diff --git a/python/lsst/daf/butler/core/quantum.py b/python/lsst/daf/butler/core/quantum.py index 21344f7ddd..f784cb5fd9 100644 --- a/python/lsst/daf/butler/core/quantum.py +++ b/python/lsst/daf/butler/core/quantum.py @@ -53,19 +53,18 @@ def _reconstructDatasetRef( ) -> DatasetRef: """Reconstruct a DatasetRef stored in a Serialized Quantum.""" # Reconstruct the dimension records + # if the dimension record has been loaded previously use that, + # otherwise load it from the dict of Serialized DimensionRecords + if dimensionRecords is None and ids: + raise ValueError("Cannot construct from a SerializedQuantum with no dimension records. ") records = {} for dId in ids: - # if the dimension record has been loaded previously use that, - # otherwise load it from the dict of Serialized DimensionRecords - if dimensionRecords is None: - raise ValueError("Cannot construct from a SerializedQuantum with no dimension records. ") - tmpSerialized = dimensionRecords[dId] - reconstructedDim = DimensionRecord.from_simple(tmpSerialized, universe=universe) - records[sys.intern(reconstructedDim.definition.name)] = reconstructedDim - # turn the serialized form into an object and attach the dimension records + # Ignore typing because it is missing that the above if statement + # ensures that if there is a loop that dimensionRecords is not None. + tmpSerialized = dimensionRecords[dId] # type: ignore + records[tmpSerialized.definition] = tmpSerialized + simple.dataId.records = records or None rebuiltDatasetRef = DatasetRef.from_simple(simple, universe, datasetType=type_) - if records: - object.__setattr__(rebuiltDatasetRef, "dataId", rebuiltDatasetRef.dataId.expanded(records)) return rebuiltDatasetRef