diff --git a/doc/changes/DM-33155.feature.rst b/doc/changes/DM-33155.feature.rst new file mode 100644 index 000000000..4ba3247fe --- /dev/null +++ b/doc/changes/DM-33155.feature.rst @@ -0,0 +1,4 @@ +* Add ``TaskMetadata.to_dict()`` method (this is now used by the ``PropertySet.from_mapping()`` method and triggered by the Butler if type conversion is needed). +* Use the existing metadata storage class definition if one already exists in a repository. +* Switch `~lsst.pipe.base.Task` to use `~lsst.pipe.base.TaskMetadata` for storing task metadata, rather than ``lsst.daf.base.PropertySet``. + This removes a C++ dependency from the middleware. diff --git a/python/lsst/pipe/base/_task_metadata.py b/python/lsst/pipe/base/_task_metadata.py index 67abe89c0..f800ce1b5 100644 --- a/python/lsst/pipe/base/_task_metadata.py +++ b/python/lsst/pipe/base/_task_metadata.py @@ -104,7 +104,7 @@ def from_metadata(cls, ps: PropertySetLike) -> "TaskMetadata": Parameters ---------- - ps : `lsst.daf.base.PropertySet` or `TaskMetadata` + ps : `PropertySetLike` or `TaskMetadata` A ``PropertySet``-like object to be transformed to a `TaskMetadata`. A `TaskMetadata` can be copied using this class method. @@ -133,6 +133,29 @@ class method. metadata[key] = value return metadata + def to_dict(self) -> Dict[str, Any]: + """Convert the class to a simple dictionary. + + Returns + ------- + d : `dict` + Simple dictionary that can contain scalar values, array values + or other dictionary values. + + Notes + ----- + Unlike `dict()`, this method hides the model layout and combines + scalars, arrays, and other metadata in the same dictionary. Can be + used when a simple dictionary is needed. Use + `TaskMetadata.from_dict()` to convert it back. + """ + d = {} + d.update(self.scalars) + d.update(self.arrays) + for k, v in self.metadata.items(): + d[k] = v.to_dict() + return d + def add(self, name, value): """Store a new value, adding to a list if one already exists. diff --git a/python/lsst/pipe/base/pipeline.py b/python/lsst/pipe/base/pipeline.py index 348e87850..370317de6 100644 --- a/python/lsst/pipe/base/pipeline.py +++ b/python/lsst/pipe/base/pipeline.py @@ -902,13 +902,20 @@ def makeDatasetTypesSet(connectionType: str, freeze: bool = True) -> NamedValueS # optionally add output dataset for metadata outputs = makeDatasetTypesSet("outputs", freeze=False) if taskDef.metadataDatasetName is not None: - # Metadata is supposed to be of the PropertySet type, its - # dimensions correspond to a task quantum + # Metadata is supposed to be of the TaskMetadata type, its + # dimensions correspond to a task quantum. dimensions = registry.dimensions.extract(taskDef.connections.dimensions) - if _TASK_METADATA_TYPE is TaskMetadata: - storageClass = "TaskMetadata" + + # Allow the storage class definition to be read from the existing + # dataset type definition if present. + try: + current = registry.getDatasetType(taskDef.metadataDatasetName) + except KeyError: + # No previous definition so use the default. + storageClass = "TaskMetadata" if _TASK_METADATA_TYPE is TaskMetadata else "PropertySet" else: - storageClass = "PropertySet" + storageClass = current.storageClass.name + outputs |= {DatasetType(taskDef.metadataDatasetName, dimensions, storageClass)} if taskDef.logOutputDatasetName is not None: # Log output dimensions correspond to a task quantum. diff --git a/python/lsst/pipe/base/task.py b/python/lsst/pipe/base/task.py index 953188f07..4a547c8eb 100644 --- a/python/lsst/pipe/base/task.py +++ b/python/lsst/pipe/base/task.py @@ -38,20 +38,11 @@ from ._task_metadata import TaskMetadata -# The Task metadata can be represented as different Python types. -# Initially Task metadata was stored as a PropertyList but we want -# to migrate to TaskMetadata to have explicit control over how it works -# and how it is serialized. -METADATA_COMPATIBILITY = True - -if METADATA_COMPATIBILITY: - import lsst.daf.base as dafBase - - _TASK_METADATA_TYPE = dafBase.PropertyList - _TASK_FULL_METADATA_TYPE = dafBase.PropertySet -else: - _TASK_METADATA_TYPE = TaskMetadata - _TASK_FULL_METADATA_TYPE = TaskMetadata +# This defines the Python type to use for task metadata. It is a private +# class variable that can be accessed by other closely-related middleware +# code and test code. +_TASK_METADATA_TYPE = TaskMetadata +_TASK_FULL_METADATA_TYPE = TaskMetadata class TaskError(Exception): @@ -119,7 +110,7 @@ class Task: - ``log``: an `logging.Logger` or subclass. - ``config``: task-specific configuration; an instance of ``ConfigClass`` (see below). - - ``metadata``: an `lsst.daf.base.PropertyList` or `TaskMetadata` for + - ``metadata``: a `TaskMetadata` for collecting task-specific metadata, e.g. data quality and performance metrics. This is data that is only meant to be persisted, never to be used by the task. @@ -279,7 +270,7 @@ def getFullMetadata(self): Returns ------- - metadata : `lsst.daf.base.PropertySet` or `TaskMetadata` + metadata : `TaskMetadata` The keys are the full task name. Values are metadata for the top-level task and all subtasks, sub-subtasks, etc. diff --git a/python/lsst/pipe/base/timer.py b/python/lsst/pipe/base/timer.py index e169f2af1..9bb64df9c 100644 --- a/python/lsst/pipe/base/timer.py +++ b/python/lsst/pipe/base/timer.py @@ -43,7 +43,7 @@ def logInfo(obj, prefix, logLevel=logging.DEBUG, metadata=None, logger=None): obj : `lsst.pipe.base.Task`-type or `None` A `~lsst.pipe.base.Task` or any other object with these two attributes: - - ``metadata`` an instance of `lsst.daf.base.PropertyList`` (or other + - ``metadata`` an instance of `~lsst.pipe.base.TaskMetadata` (or other object with ``add(name, value)`` method). - ``log`` an instance of `logging.Logger` or subclass. @@ -55,7 +55,7 @@ def logInfo(obj, prefix, logLevel=logging.DEBUG, metadata=None, logger=None): ``prefix = End`` when the method ends. logLevel : `int`, optional Log level (an `logging` level constant, such as `logging.DEBUG`). - metadata : `lsst.daf.base.PropertyList`, optional + metadata : `lsst.pipe.base.TaskMetadata`, optional Metadata object to write entries to, overriding ``obj.metadata``. logger : `logging.Logger` Log object to write entries to, overriding ``obj.log``. diff --git a/tests/test_taskmetadata.py b/tests/test_taskmetadata.py index 461569bba..60e73ece8 100644 --- a/tests/test_taskmetadata.py +++ b/tests/test_taskmetadata.py @@ -180,6 +180,9 @@ def testDict(self): self.assertEqual(meta.getArray("d"), [1, 2]) self.assertEqual(meta["e.h.i"], 4) + d2 = meta.to_dict() + self.assertEqual(d2, d) + j = meta.json() meta2 = TaskMetadata.parse_obj(json.loads(j)) self.assertEqual(meta2, meta)