-
Notifications
You must be signed in to change notification settings - Fork 14
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
DM-39582: Add caching for some butler primitives during deserialization #858
Changes from 6 commits
1310356
97137a2
93e3286
498258d
a2805b3
b849bea
253a01d
ce5b439
fbf00cb
5432818
ef41fb5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Deprecate reconstituteDimensions argument from Quantum.from_simple | ||
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
Added ability for some butler primitives to be cached and re-used on deserialization through a special | ||
interface. |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -30,6 +30,7 @@ | |
] | ||
|
||
import enum | ||
import sys | ||
import uuid | ||
from collections.abc import Iterable | ||
from typing import TYPE_CHECKING, Any, ClassVar | ||
|
@@ -41,6 +42,7 @@ | |
from ..dimensions import DataCoordinate, DimensionGraph, DimensionUniverse, SerializedDataCoordinate | ||
from ..json import from_json_pydantic, to_json_pydantic | ||
from ..named import NamedKeyDict | ||
from ..persistenceContext import PersistenceContextVars | ||
from .type import DatasetType, SerializedDatasetType | ||
|
||
if TYPE_CHECKING: | ||
|
@@ -142,6 +144,10 @@ def makeDatasetId( | |
return uuid.uuid5(self.NS_UUID, data) | ||
|
||
|
||
# This is constant, so don't recreate a set for each instance | ||
_serializedDatasetRefFieldsSet = {"id", "datasetType", "dataId", "run", "component"} | ||
|
||
|
||
class SerializedDatasetRef(BaseModel): | ||
"""Simplified model of a `DatasetRef` suitable for serialization.""" | ||
|
||
|
@@ -202,9 +208,9 @@ def direct( | |
datasetType if datasetType is None else SerializedDatasetType.direct(**datasetType), | ||
) | ||
setter(node, "dataId", dataId if dataId is None else SerializedDataCoordinate.direct(**dataId)) | ||
setter(node, "run", run) | ||
setter(node, "run", sys.intern(run)) | ||
setter(node, "component", component) | ||
setter(node, "__fields_set__", {"id", "datasetType", "dataId", "run", "component"}) | ||
setter(node, "__fields_set__", _serializedDatasetRefFieldsSet) | ||
return node | ||
|
||
|
||
|
@@ -254,7 +260,7 @@ class DatasetRef: | |
|
||
_serializedType = SerializedDatasetRef | ||
__slots__ = ( | ||
"id", | ||
"_id", | ||
"datasetType", | ||
"dataId", | ||
"run", | ||
|
@@ -277,12 +283,18 @@ def __init__( | |
self.dataId = dataId | ||
self.run = run | ||
if id is not None: | ||
self.id = id | ||
self._id = id.int | ||
else: | ||
self.id = DatasetIdFactory().makeDatasetId( | ||
self.run, self.datasetType, self.dataId, id_generation_mode | ||
self._id = ( | ||
DatasetIdFactory() | ||
.makeDatasetId(self.run, self.datasetType, self.dataId, id_generation_mode) | ||
.int | ||
) | ||
|
||
@property | ||
def id(self) -> DatasetId: | ||
return uuid.UUID(int=self._id) | ||
|
||
def __eq__(self, other: Any) -> bool: | ||
try: | ||
return (self.datasetType, self.dataId, self.id) == (other.datasetType, other.dataId, other.id) | ||
|
@@ -396,9 +408,18 @@ def from_simple( | |
ref : `DatasetRef` | ||
Newly-constructed object. | ||
""" | ||
cache = PersistenceContextVars.datasetRefs.get() | ||
localName = sys.intern( | ||
datasetType.name | ||
if datasetType is not None | ||
else (x.name if (x := simple.datasetType) is not None else "") | ||
) | ||
key = (simple.id.int, localName) | ||
natelust marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if cache is not None and (cachedRef := cache.get(key, None)) is not None: | ||
return cachedRef | ||
# Minimalist component will just specify component and id and | ||
# require registry to reconstruct | ||
if set(simple.dict(exclude_unset=True, exclude_defaults=True)).issubset({"id", "component"}): | ||
if not (simple.datasetType is not None or simple.dataId is not None or simple.run is not None): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you rewrite this as There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That is not logically the same thing, We only want to run this when they are all |
||
if registry is None: | ||
raise ValueError("Registry is required to construct component DatasetRef from integer id") | ||
if simple.id is None: | ||
|
@@ -408,6 +429,8 @@ def from_simple( | |
raise RuntimeError(f"No matching dataset found in registry for id {simple.id}") | ||
if simple.component: | ||
ref = ref.makeComponentRef(simple.component) | ||
if cache is not None: | ||
cache[key] = ref | ||
return ref | ||
|
||
if universe is None and registry is None: | ||
|
@@ -443,7 +466,10 @@ def from_simple( | |
f"Encountered with {simple!r}{dstr}." | ||
) | ||
|
||
return cls(datasetType, dataId, id=simple.id, run=simple.run) | ||
newRef = cls(datasetType, dataId, id=simple.id, run=simple.run) | ||
if cache is not None: | ||
cache[key] = newRef | ||
return newRef | ||
|
||
to_json = to_json_pydantic | ||
from_json: ClassVar = classmethod(from_json_pydantic) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -36,6 +36,7 @@ | |
|
||
from .datasets import DatasetId | ||
from .dimensions import DimensionUniverse | ||
from .persistenceContext import PersistenceContextVars | ||
from .storedFileInfo import StoredDatastoreItemInfo | ||
|
||
if TYPE_CHECKING: | ||
|
@@ -70,6 +71,11 @@ def direct( | |
|
||
This method should only be called when the inputs are trusted. | ||
""" | ||
key = frozenset(dataset_ids) | ||
cache = PersistenceContextVars.serializedDatastoreRecordMapping.get() | ||
if cache is not None and (value := cache.get(key)) is not None: | ||
return value | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would not expect many (or maybe any) cache hits for this. |
||
|
||
data = SerializedDatastoreRecordData.__new__(cls) | ||
setter = object.__setattr__ | ||
# JSON makes strings out of UUIDs, need to convert them back | ||
|
@@ -83,6 +89,8 @@ def direct( | |
if (id := record.get("dataset_id")) is not None: | ||
record["dataset_id"] = uuid.UUID(id) if isinstance(id, str) else id | ||
setter(data, "records", records) | ||
if cache is not None: | ||
cache[key] = data | ||
return data | ||
|
||
|
||
|
@@ -204,6 +212,10 @@ def from_simple( | |
item_info : `StoredDatastoreItemInfo` | ||
De-serialized instance of `StoredDatastoreItemInfo`. | ||
""" | ||
cache = PersistenceContextVars.dataStoreRecords.get() | ||
key = frozenset(simple.dataset_ids) | ||
if cache is not None and (record := cache.get(key)) is not None: | ||
return record | ||
records: dict[DatasetId, dict[str, list[StoredDatastoreItemInfo]]] = {} | ||
# make sure that all dataset IDs appear in the dict even if they don't | ||
# have records. | ||
|
@@ -216,4 +228,7 @@ def from_simple( | |
info = klass.from_record(record) | ||
dataset_type_records = records.setdefault(info.dataset_id, {}) | ||
dataset_type_records.setdefault(table_name, []).append(info) | ||
return cls(records=records) | ||
record = cls(records=records) | ||
if cache is not None: | ||
cache[key] = record | ||
return record |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think file name should be
removal
, notapi
, according to README.Add backticks around
reconstituteDimensions
andQuantum.from_simple
and period at the end.