Optimizations in QuantumGraph loading

lsst · Jul 25, 2023 · 56872e4 · 56872e4
1 parent 1fe3838
commit 56872e4
Show file tree

Hide file tree

Showing 3 changed files with 25 additions and 21 deletions.
diff --git a/doc/changes/DM-40121.misc.md b/doc/changes/DM-40121.misc.md
@@ -0,0 +1 @@
+Added various optimizations to QuatnumGraph loading.
diff --git a/python/lsst/daf/butler/core/dimensions/_records.py b/python/lsst/daf/butler/core/dimensions/_records.py
@@ -23,6 +23,7 @@
 
 __all__ = ("DimensionRecord", "SerializedDimensionRecord")
 
+from collections.abc import Hashable
 from typing import TYPE_CHECKING, Any, ClassVar, Optional, Tuple
 
 import lsst.sphgeom
@@ -170,23 +171,22 @@ def direct(
 
         This method should only be called when the inputs are trusted.
         """
-        _recItems = record.items()
+        # This method requires tuples as values of the mapping, but JSON
+        # readers will read things in as lists. Be kind and transparently
+        # transform to tuples
+        _recItems = {k: v if type(v) != list else tuple(v) for k, v in record.items()}  # type: ignore
+
         # Type ignore because the ternary statement seems to confuse mypy
         # based on conflicting inferred types of v.
         key = (
             definition,
-            frozenset((k, v if not isinstance(v, list) else tuple(v)) for k, v in _recItems),  # type: ignore
+            frozenset(_recItems.items()),
         )
         cache = PersistenceContextVars.serializedDimensionRecordMapping.get()
         if cache is not None and (result := cache.get(key)) is not None:
             return result
 
-        # This method requires tuples as values of the mapping, but JSON
-        # readers will read things in as lists. Be kind and transparently
-        # transform to tuples
-        serialized_record = {k: v if type(v) != list else tuple(v) for k, v in record.items()}  # type: ignore
-
-        node = cls.model_construct(definition=definition, record=serialized_record)  # type: ignore
+        node = cls.model_construct(definition=definition, record=_recItems)  # type: ignore
 
         if cache is not None:
             cache[key] = node
@@ -351,6 +351,7 @@ def from_simple(
         simple: SerializedDimensionRecord,
         universe: DimensionUniverse | None = None,
         registry: Registry | None = None,
+        cacheKey: Hashable | None = None,
     ) -> DimensionRecord:
         """Construct a new object from the simplified form.
 
@@ -367,6 +368,10 @@ def from_simple(
         registry : `lsst.daf.butler.Registry`, optional
             Registry from which a universe can be extracted. Can be `None`
             if universe is provided explicitly.
+        cacheKey : `Hashable` or `None`
+            If this is not None, it will be used as a key for any cached
+            reconstruction instead of calculating a value from the serialized
+            format.
 
         Returns
         -------
@@ -380,12 +385,11 @@ def from_simple(
         if universe is None:
             # this is for mypy
             raise ValueError("Unable to determine a usable universe")
-        _recItems = simple.record.items()
         # Type ignore because the ternary statement seems to confuse mypy
         # based on conflicting inferred types of v.
-        key = (
+        key = cacheKey or (
             simple.definition,
-            frozenset((k, v if not isinstance(v, list) else tuple(v)) for k, v in _recItems),  # type: ignore
+            frozenset(simple.record.items()),  # type: ignore
         )
         cache = PersistenceContextVars.dimensionRecords.get()
         if cache is not None and (result := cache.get(key)) is not None:

diff --git a/python/lsst/daf/butler/core/quantum.py b/python/lsst/daf/butler/core/quantum.py
@@ -53,19 +53,18 @@ def _reconstructDatasetRef(
 ) -> DatasetRef:
     """Reconstruct a DatasetRef stored in a Serialized Quantum."""
     # Reconstruct the dimension records
+    # if the dimension record has been loaded previously use that,
+    # otherwise load it from the dict of Serialized DimensionRecords
+    if dimensionRecords is None and ids:
+        raise ValueError("Cannot construct from a SerializedQuantum with no dimension records. ")
     records = {}
     for dId in ids:
-        # if the dimension record has been loaded previously use that,
-        # otherwise load it from the dict of Serialized DimensionRecords
-        if dimensionRecords is None:
-            raise ValueError("Cannot construct from a SerializedQuantum with no dimension records. ")
-        tmpSerialized = dimensionRecords[dId]
-        reconstructedDim = DimensionRecord.from_simple(tmpSerialized, universe=universe)
-        records[sys.intern(reconstructedDim.definition.name)] = reconstructedDim
-    # turn the serialized form into an object and attach the dimension records
+        # Ignore typing because it is missing that the above if statement
+        # ensures that if there is a loop that dimensionRecords is not None.
+        tmpSerialized = dimensionRecords[dId]  # type: ignore
+        records[tmpSerialized.definition] = tmpSerialized
+    simple.dataId.records = records or None
     rebuiltDatasetRef = DatasetRef.from_simple(simple, universe, datasetType=type_)
-    if records:
-        object.__setattr__(rebuiltDatasetRef, "dataId", rebuiltDatasetRef.dataId.expanded(records))
     return rebuiltDatasetRef