From ab02f9e275fcf64ebb6d37e39e448d8cff5e6e3d Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Fri, 7 Jan 2022 11:14:44 -0700 Subject: [PATCH 1/5] Add method to convert TaskMetadata to a dict The dict() method converts it to the internal model form of the class with distinct sections for scalars, arrays, and metadata. The to_dict() method looks like something the user would expect the content to look like. --- python/lsst/pipe/base/_task_metadata.py | 23 +++++++++++++++++++++++ tests/test_taskmetadata.py | 3 +++ 2 files changed, 26 insertions(+) diff --git a/python/lsst/pipe/base/_task_metadata.py b/python/lsst/pipe/base/_task_metadata.py index 67abe89c0..97112377e 100644 --- a/python/lsst/pipe/base/_task_metadata.py +++ b/python/lsst/pipe/base/_task_metadata.py @@ -133,6 +133,29 @@ class method. metadata[key] = value return metadata + def to_dict(self) -> Dict[str, Any]: + """Convert the class to a simple dictionary. + + Returns + ------- + d : `dict` + Simple dictionary that can contain scalar values, array values + or other dictionary values. + + Notes + ----- + Unlike `dict()`, this method hides the model layout and combines + scalars, arrays, and other metadata in the same dictionary. Can be + used when a simple dictionary is needed. Use + `TaskMetadata.from_dict()` to convert it back. + """ + d = {} + d.update(self.scalars) + d.update(self.arrays) + for k, v in self.metadata.items(): + d[k] = v.to_dict() + return d + def add(self, name, value): """Store a new value, adding to a list if one already exists. diff --git a/tests/test_taskmetadata.py b/tests/test_taskmetadata.py index 461569bba..60e73ece8 100644 --- a/tests/test_taskmetadata.py +++ b/tests/test_taskmetadata.py @@ -180,6 +180,9 @@ def testDict(self): self.assertEqual(meta.getArray("d"), [1, 2]) self.assertEqual(meta["e.h.i"], 4) + d2 = meta.to_dict() + self.assertEqual(d2, d) + j = meta.json() meta2 = TaskMetadata.parse_obj(json.loads(j)) self.assertEqual(meta2, meta) From 5823646991fbafea67d77940605939dc3523c9ac Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Fri, 7 Jan 2022 11:24:10 -0700 Subject: [PATCH 2/5] Prefer existing metadata storage class Previously the task_metadata storage classes use a definition that matches the metadata python type attached to the Task. Butler can now do type conversion on put and get so prefer a metadata storage class that matches the current definition. New metadata dataset types will use the default. This allows old registries to use old dataset type definitions without having to migrate them all but still allow Task to use updated metadata class. --- python/lsst/pipe/base/pipeline.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/python/lsst/pipe/base/pipeline.py b/python/lsst/pipe/base/pipeline.py index 348e87850..370317de6 100644 --- a/python/lsst/pipe/base/pipeline.py +++ b/python/lsst/pipe/base/pipeline.py @@ -902,13 +902,20 @@ def makeDatasetTypesSet(connectionType: str, freeze: bool = True) -> NamedValueS # optionally add output dataset for metadata outputs = makeDatasetTypesSet("outputs", freeze=False) if taskDef.metadataDatasetName is not None: - # Metadata is supposed to be of the PropertySet type, its - # dimensions correspond to a task quantum + # Metadata is supposed to be of the TaskMetadata type, its + # dimensions correspond to a task quantum. dimensions = registry.dimensions.extract(taskDef.connections.dimensions) - if _TASK_METADATA_TYPE is TaskMetadata: - storageClass = "TaskMetadata" + + # Allow the storage class definition to be read from the existing + # dataset type definition if present. + try: + current = registry.getDatasetType(taskDef.metadataDatasetName) + except KeyError: + # No previous definition so use the default. + storageClass = "TaskMetadata" if _TASK_METADATA_TYPE is TaskMetadata else "PropertySet" else: - storageClass = "PropertySet" + storageClass = current.storageClass.name + outputs |= {DatasetType(taskDef.metadataDatasetName, dimensions, storageClass)} if taskDef.logOutputDatasetName is not None: # Log output dimensions correspond to a task quantum. From d6e69fcbd2bc1f2f5ca5e21e263ad29e6ff0ee6d Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Fri, 7 Jan 2022 11:26:29 -0700 Subject: [PATCH 3/5] Switch default Task metadata class to TaskMetadata No longer use PropertySet by default. --- python/lsst/pipe/base/task.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/lsst/pipe/base/task.py b/python/lsst/pipe/base/task.py index 953188f07..48f0d2cdc 100644 --- a/python/lsst/pipe/base/task.py +++ b/python/lsst/pipe/base/task.py @@ -42,7 +42,7 @@ # Initially Task metadata was stored as a PropertyList but we want # to migrate to TaskMetadata to have explicit control over how it works # and how it is serialized. -METADATA_COMPATIBILITY = True +METADATA_COMPATIBILITY = False if METADATA_COMPATIBILITY: import lsst.daf.base as dafBase From 734c8069fc281a02fa3355d2532f77b780b84730 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Mon, 10 Jan 2022 13:59:05 -0700 Subject: [PATCH 4/5] Add news fragment --- doc/changes/DM-33155.feature.rst | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 doc/changes/DM-33155.feature.rst diff --git a/doc/changes/DM-33155.feature.rst b/doc/changes/DM-33155.feature.rst new file mode 100644 index 000000000..4ba3247fe --- /dev/null +++ b/doc/changes/DM-33155.feature.rst @@ -0,0 +1,4 @@ +* Add ``TaskMetadata.to_dict()`` method (this is now used by the ``PropertySet.from_mapping()`` method and triggered by the Butler if type conversion is needed). +* Use the existing metadata storage class definition if one already exists in a repository. +* Switch `~lsst.pipe.base.Task` to use `~lsst.pipe.base.TaskMetadata` for storing task metadata, rather than ``lsst.daf.base.PropertySet``. + This removes a C++ dependency from the middleware. From 854214cb4d7556f4e0ac7eac86640196096c72ac Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Mon, 10 Jan 2022 14:06:10 -0700 Subject: [PATCH 5/5] Remove PropertySet usage from package Now that TaskMetadata can be converted to PropertySet there is no need for Task to support both. --- python/lsst/pipe/base/_task_metadata.py | 2 +- python/lsst/pipe/base/task.py | 23 +++++++---------------- python/lsst/pipe/base/timer.py | 4 ++-- 3 files changed, 10 insertions(+), 19 deletions(-) diff --git a/python/lsst/pipe/base/_task_metadata.py b/python/lsst/pipe/base/_task_metadata.py index 97112377e..f800ce1b5 100644 --- a/python/lsst/pipe/base/_task_metadata.py +++ b/python/lsst/pipe/base/_task_metadata.py @@ -104,7 +104,7 @@ def from_metadata(cls, ps: PropertySetLike) -> "TaskMetadata": Parameters ---------- - ps : `lsst.daf.base.PropertySet` or `TaskMetadata` + ps : `PropertySetLike` or `TaskMetadata` A ``PropertySet``-like object to be transformed to a `TaskMetadata`. A `TaskMetadata` can be copied using this class method. diff --git a/python/lsst/pipe/base/task.py b/python/lsst/pipe/base/task.py index 48f0d2cdc..4a547c8eb 100644 --- a/python/lsst/pipe/base/task.py +++ b/python/lsst/pipe/base/task.py @@ -38,20 +38,11 @@ from ._task_metadata import TaskMetadata -# The Task metadata can be represented as different Python types. -# Initially Task metadata was stored as a PropertyList but we want -# to migrate to TaskMetadata to have explicit control over how it works -# and how it is serialized. -METADATA_COMPATIBILITY = False - -if METADATA_COMPATIBILITY: - import lsst.daf.base as dafBase - - _TASK_METADATA_TYPE = dafBase.PropertyList - _TASK_FULL_METADATA_TYPE = dafBase.PropertySet -else: - _TASK_METADATA_TYPE = TaskMetadata - _TASK_FULL_METADATA_TYPE = TaskMetadata +# This defines the Python type to use for task metadata. It is a private +# class variable that can be accessed by other closely-related middleware +# code and test code. +_TASK_METADATA_TYPE = TaskMetadata +_TASK_FULL_METADATA_TYPE = TaskMetadata class TaskError(Exception): @@ -119,7 +110,7 @@ class Task: - ``log``: an `logging.Logger` or subclass. - ``config``: task-specific configuration; an instance of ``ConfigClass`` (see below). - - ``metadata``: an `lsst.daf.base.PropertyList` or `TaskMetadata` for + - ``metadata``: a `TaskMetadata` for collecting task-specific metadata, e.g. data quality and performance metrics. This is data that is only meant to be persisted, never to be used by the task. @@ -279,7 +270,7 @@ def getFullMetadata(self): Returns ------- - metadata : `lsst.daf.base.PropertySet` or `TaskMetadata` + metadata : `TaskMetadata` The keys are the full task name. Values are metadata for the top-level task and all subtasks, sub-subtasks, etc. diff --git a/python/lsst/pipe/base/timer.py b/python/lsst/pipe/base/timer.py index e169f2af1..9bb64df9c 100644 --- a/python/lsst/pipe/base/timer.py +++ b/python/lsst/pipe/base/timer.py @@ -43,7 +43,7 @@ def logInfo(obj, prefix, logLevel=logging.DEBUG, metadata=None, logger=None): obj : `lsst.pipe.base.Task`-type or `None` A `~lsst.pipe.base.Task` or any other object with these two attributes: - - ``metadata`` an instance of `lsst.daf.base.PropertyList`` (or other + - ``metadata`` an instance of `~lsst.pipe.base.TaskMetadata` (or other object with ``add(name, value)`` method). - ``log`` an instance of `logging.Logger` or subclass. @@ -55,7 +55,7 @@ def logInfo(obj, prefix, logLevel=logging.DEBUG, metadata=None, logger=None): ``prefix = End`` when the method ends. logLevel : `int`, optional Log level (an `logging` level constant, such as `logging.DEBUG`). - metadata : `lsst.daf.base.PropertyList`, optional + metadata : `lsst.pipe.base.TaskMetadata`, optional Metadata object to write entries to, overriding ``obj.metadata``. logger : `logging.Logger` Log object to write entries to, overriding ``obj.log``.