Skip to content

Commit

Permalink
Optimizations in QuantumGraph loading
Browse files Browse the repository at this point in the history
  • Loading branch information
natelust committed Jul 25, 2023
1 parent 1fe3838 commit 56872e4
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 21 deletions.
1 change: 1 addition & 0 deletions doc/changes/DM-40121.misc.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added various optimizations to QuatnumGraph loading.
26 changes: 15 additions & 11 deletions python/lsst/daf/butler/core/dimensions/_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

__all__ = ("DimensionRecord", "SerializedDimensionRecord")

from collections.abc import Hashable
from typing import TYPE_CHECKING, Any, ClassVar, Optional, Tuple

import lsst.sphgeom
Expand Down Expand Up @@ -170,23 +171,22 @@ def direct(
This method should only be called when the inputs are trusted.
"""
_recItems = record.items()
# This method requires tuples as values of the mapping, but JSON
# readers will read things in as lists. Be kind and transparently
# transform to tuples
_recItems = {k: v if type(v) != list else tuple(v) for k, v in record.items()} # type: ignore

# Type ignore because the ternary statement seems to confuse mypy
# based on conflicting inferred types of v.
key = (
definition,
frozenset((k, v if not isinstance(v, list) else tuple(v)) for k, v in _recItems), # type: ignore
frozenset(_recItems.items()),
)
cache = PersistenceContextVars.serializedDimensionRecordMapping.get()
if cache is not None and (result := cache.get(key)) is not None:
return result

# This method requires tuples as values of the mapping, but JSON
# readers will read things in as lists. Be kind and transparently
# transform to tuples
serialized_record = {k: v if type(v) != list else tuple(v) for k, v in record.items()} # type: ignore

node = cls.model_construct(definition=definition, record=serialized_record) # type: ignore
node = cls.model_construct(definition=definition, record=_recItems) # type: ignore

if cache is not None:
cache[key] = node
Expand Down Expand Up @@ -351,6 +351,7 @@ def from_simple(
simple: SerializedDimensionRecord,
universe: DimensionUniverse | None = None,
registry: Registry | None = None,
cacheKey: Hashable | None = None,
) -> DimensionRecord:
"""Construct a new object from the simplified form.
Expand All @@ -367,6 +368,10 @@ def from_simple(
registry : `lsst.daf.butler.Registry`, optional
Registry from which a universe can be extracted. Can be `None`
if universe is provided explicitly.
cacheKey : `Hashable` or `None`
If this is not None, it will be used as a key for any cached
reconstruction instead of calculating a value from the serialized
format.
Returns
-------
Expand All @@ -380,12 +385,11 @@ def from_simple(
if universe is None:
# this is for mypy
raise ValueError("Unable to determine a usable universe")
_recItems = simple.record.items()
# Type ignore because the ternary statement seems to confuse mypy
# based on conflicting inferred types of v.
key = (
key = cacheKey or (
simple.definition,
frozenset((k, v if not isinstance(v, list) else tuple(v)) for k, v in _recItems), # type: ignore
frozenset(simple.record.items()), # type: ignore
)
cache = PersistenceContextVars.dimensionRecords.get()
if cache is not None and (result := cache.get(key)) is not None:
Expand Down
19 changes: 9 additions & 10 deletions python/lsst/daf/butler/core/quantum.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,19 +53,18 @@ def _reconstructDatasetRef(
) -> DatasetRef:
"""Reconstruct a DatasetRef stored in a Serialized Quantum."""
# Reconstruct the dimension records
# if the dimension record has been loaded previously use that,
# otherwise load it from the dict of Serialized DimensionRecords
if dimensionRecords is None and ids:
raise ValueError("Cannot construct from a SerializedQuantum with no dimension records. ")

Check warning on line 59 in python/lsst/daf/butler/core/quantum.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/core/quantum.py#L59

Added line #L59 was not covered by tests
records = {}
for dId in ids:
# if the dimension record has been loaded previously use that,
# otherwise load it from the dict of Serialized DimensionRecords
if dimensionRecords is None:
raise ValueError("Cannot construct from a SerializedQuantum with no dimension records. ")
tmpSerialized = dimensionRecords[dId]
reconstructedDim = DimensionRecord.from_simple(tmpSerialized, universe=universe)
records[sys.intern(reconstructedDim.definition.name)] = reconstructedDim
# turn the serialized form into an object and attach the dimension records
# Ignore typing because it is missing that the above if statement
# ensures that if there is a loop that dimensionRecords is not None.
tmpSerialized = dimensionRecords[dId] # type: ignore
records[tmpSerialized.definition] = tmpSerialized
simple.dataId.records = records or None
rebuiltDatasetRef = DatasetRef.from_simple(simple, universe, datasetType=type_)
if records:
object.__setattr__(rebuiltDatasetRef, "dataId", rebuiltDatasetRef.dataId.expanded(records))
return rebuiltDatasetRef


Expand Down

0 comments on commit 56872e4

Please sign in to comment.