From 8fbcfa0bca270213536677aed04f4775f7cdf673 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Fri, 28 Jul 2023 15:26:35 -0700 Subject: [PATCH 1/6] Clean up the TaskMetadata.names() topLevelOnly deprecation We should have been able to remove the parameter by now but this was made difficult by us still allowing code to use it without warning. --- python/lsst/pipe/base/_task_metadata.py | 54 ++++++++++++------------- tests/test_task.py | 2 +- tests/test_taskmetadata.py | 2 +- 3 files changed, 28 insertions(+), 30 deletions(-) diff --git a/python/lsst/pipe/base/_task_metadata.py b/python/lsst/pipe/base/_task_metadata.py index 14bf75fb..ab23bad1 100644 --- a/python/lsst/pipe/base/_task_metadata.py +++ b/python/lsst/pipe/base/_task_metadata.py @@ -28,11 +28,9 @@ from typing import Any, Protocol from lsst.daf.butler._compat import _BaseModelCompat +from lsst.utils.introspection import find_outside_stacklevel from pydantic import Field, StrictBool, StrictFloat, StrictInt, StrictStr -_DEPRECATION_REASON = "Will be removed after v25." -_DEPRECATION_VERSION = "v24" - # The types allowed in a Task metadata field are restricted # to allow predictable serialization. _ALLOWED_PRIMITIVE_TYPES = (str, float, int, bool) @@ -256,41 +254,41 @@ def getArray(self, key: str) -> list[Any]: # Report the correct key. raise KeyError(f"'{key}' not found") from None - def names(self, topLevelOnly: bool = True) -> set[str]: + def names(self, topLevelOnly: bool | None = None) -> set[str]: """Return the hierarchical keys from the metadata. Parameters ---------- - topLevelOnly : `bool` - If true, return top-level keys, otherwise full metadata item keys. + topLevelOnly : `bool` or `None`, optional + This parameter is deprecated and will be removed in the future. + If given it can only be `False`. All names in the hierarchy are + always returned. Returns ------- names : `collections.abc.Set` - A set of top-level keys or full metadata item keys, including - the top-level keys. - - Notes - ----- - Should never be called in new code with ``topLevelOnly`` set to `True` - -- this is equivalent to asking for the keys and is the default - when iterating through the task metadata. In this case a deprecation - message will be issued and the ability will raise an exception - in a future release. - - When ``topLevelOnly`` is `False` all keys, including those from the - hierarchy and the top-level hierarchy, are returned. + A set of all keys, including those from the hierarchy and the + top-level hierarchy. """ if topLevelOnly: - warnings.warn("Use keys() instead. " + _DEPRECATION_REASON, FutureWarning) - return set(self.keys()) - else: - names = set() - for k, v in self.items(): - names.add(k) # Always include the current level - if isinstance(v, TaskMetadata): - names.update({k + "." + item for item in v.names(topLevelOnly=topLevelOnly)}) - return names + raise RuntimeError( + "The topLevelOnly parameter is no longer supported and can not have a True value." + ) + + if topLevelOnly is False: + warnings.warn( + "The topLevelOnly parameter is deprecated and is always assumed to be False." + " It will be removed completely after v26.", + category=FutureWarning, + stacklevel=find_outside_stacklevel("lsst.pipe.base"), + ) + + names = set() + for k, v in self.items(): + names.add(k) # Always include the current level + if isinstance(v, TaskMetadata): + names.update({k + "." + item for item in v.names()}) + return names def paramNames(self, topLevelOnly: bool) -> set[str]: """Return hierarchical names. diff --git a/tests/test_task.py b/tests/test_task.py index 995904c6..13ab9c82 100644 --- a/tests/test_task.py +++ b/tests/test_task.py @@ -203,7 +203,7 @@ def testGetFullMetadata(self): self.assertIsInstance(fullMetadata["addMult:mult"], _TASK_METADATA_TYPE) self.assertEqual(set(fullMetadata), {"addMult", "addMult:add", "addMult:mult"}) - all_names = fullMetadata.names(topLevelOnly=False) + all_names = fullMetadata.names() self.assertIn("addMult", all_names) self.assertIn("addMult.runStartUtc", all_names) diff --git a/tests/test_taskmetadata.py b/tests/test_taskmetadata.py index 1bcbeca2..552e3842 100644 --- a/tests/test_taskmetadata.py +++ b/tests/test_taskmetadata.py @@ -72,7 +72,7 @@ def testTaskMetadata(self): self.assertEqual(meta.paramNames(topLevelOnly=False), {"test", "new.int", "new.str"}) self.assertEqual(meta.paramNames(topLevelOnly=True), {"test"}) - self.assertEqual(meta.names(topLevelOnly=False), {"test", "new", "new.int", "new.str"}) + self.assertEqual(meta.names(), {"test", "new", "new.int", "new.str"}) self.assertEqual(meta.keys(), ("test", "new")) self.assertEqual(len(meta), 2) self.assertEqual(len(meta["new"]), 2) From 9742ff2b7f0a705b8f807451b4d2a78a6a81816d Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Fri, 28 Jul 2023 15:56:22 -0700 Subject: [PATCH 2/6] Remove pickle support Deprecated for a long time. --- python/lsst/pipe/base/graph/graph.py | 79 ++++++---------------------- 1 file changed, 17 insertions(+), 62 deletions(-) diff --git a/python/lsst/pipe/base/graph/graph.py b/python/lsst/pipe/base/graph/graph.py index 8f0bbb31..24bd4b33 100644 --- a/python/lsst/pipe/base/graph/graph.py +++ b/python/lsst/pipe/base/graph/graph.py @@ -26,11 +26,9 @@ import json import lzma import os -import pickle import struct import time import uuid -import warnings from collections import defaultdict, deque from collections.abc import Generator, Iterable, Mapping, MutableMapping from itertools import chain @@ -901,9 +899,7 @@ def loadUri( uri : convertible to `~lsst.resources.ResourcePath` URI from where to load the graph. universe : `~lsst.daf.butler.DimensionUniverse`, optional - `~lsst.daf.butler.DimensionUniverse` instance, not used by the - method itself but needed to ensure that registry data structures - are initialized. If `None` it is loaded from the `QuantumGraph` + If `None` it is loaded from the `QuantumGraph` saved structure. If supplied, the `~lsst.daf.butler.DimensionUniverse` from the loaded `QuantumGraph` will be validated against the supplied argument for compatibility. @@ -929,7 +925,7 @@ def loadUri( Raises ------ TypeError - Raised if pickle contains instance of a type other than + Raised if file contains instance of a type other than `QuantumGraph`. ValueError Raised if one or more of the nodes requested is not in the @@ -940,33 +936,15 @@ def loadUri( Raise if Supplied `~lsst.daf.butler.DimensionUniverse` is not compatible with the `~lsst.daf.butler.DimensionUniverse` saved in the graph. - - Notes - ----- - Reading Quanta from pickle requires existence of singleton - `~lsst.daf.butler.DimensionUniverse` which is usually instantiated - during `~lsst.daf.butler.Registry` initialization. To make sure - that `~lsst.daf.butler.DimensionUniverse` exists this method - accepts dummy `~lsst.daf.butler.DimensionUniverse` argument. """ uri = ResourcePath(uri) - # With ResourcePath we have the choice of always using a local file - # or reading in the bytes directly. Reading in bytes can be more - # efficient for reasonably-sized pickle files when the resource - # is remote. For now use the local file variant. For a local file - # as_local() does nothing. - - if uri.getExtension() in (".pickle", ".pkl"): - with uri.as_local() as local, open(local.ospath, "rb") as fd: - warnings.warn("Pickle graphs are deprecated, please re-save your graph with the save method") - qgraph = pickle.load(fd) - elif uri.getExtension() in (".qgraph"): + if uri.getExtension() in {".qgraph"}: with LoadHelper(uri, minimumVersion) as loader: qgraph = loader.load(universe, nodes, graphID) else: - raise ValueError("Only know how to handle files saved as `pickle`, `pkl`, or `qgraph`") + raise ValueError(f"Only know how to handle files saved as `.qgraph`, not {uri}") if not isinstance(qgraph, QuantumGraph): - raise TypeError(f"QuantumGraph save file contains unexpected object type: {type(qgraph)}") + raise TypeError(f"QuantumGraph file {uri} contains unexpected object type: {type(qgraph)}") return qgraph @classmethod @@ -995,17 +973,14 @@ def readHeader(cls, uri: ResourcePathExpression, minimumVersion: int = 3) -> str Raises ------ ValueError - Raised if `QuantumGraph` was saved as a pickle. Raised if the extension of the file specified by uri is not a `QuantumGraph` extension. """ uri = ResourcePath(uri) - if uri.getExtension() in (".pickle", ".pkl"): - raise ValueError("Reading a header from a pickle save is not supported") - elif uri.getExtension() in (".qgraph"): + if uri.getExtension() in {".qgraph"}: return LoadHelper(uri, minimumVersion).readHeader() else: - raise ValueError("Only know how to handle files saved as `qgraph`") + raise ValueError("Only know how to handle files saved as `.qgraph`") def buildAndPrintHeader(self) -> None: """Create a header that would be used in a save of this object and @@ -1020,7 +995,7 @@ def save(self, file: BinaryIO) -> None: Parameters ---------- file : `io.BufferedIOBase` - File to write pickle data open in binary mode. + File to write data open in binary mode. """ buffer = self._buildSaveObject() file.write(buffer) # type: ignore # Ignore because bytearray is safe to use in place of bytes @@ -1155,22 +1130,18 @@ def _buildSaveObject(self, returnHeader: bool = False) -> bytearray | tuple[byte map_lengths = struct.pack(fmt_string, len(header_encode)) # write each component of the save out in a deterministic order - # buffer = io.BytesIO() - # buffer.write(map_lengths) - # buffer.write(taskDef_pickle) - # buffer.write(map_pickle) buffer = bytearray() buffer.extend(MAGIC_BYTES) buffer.extend(save_bytes) buffer.extend(map_lengths) buffer.extend(header_encode) - # Iterate over the length of pickleData, and for each element pop the + # Iterate over the length of jsonData, and for each element pop the # leftmost element off the deque and write it out. This is to save # memory, as the memory is added to the buffer object, it is removed # from from the container. # - # Only this section needs to worry about memory pressue because - # everything else written to the buffer prior to this pickle data is + # Only this section needs to worry about memory pressure because + # everything else written to the buffer prior to this data is # only on the order of kilobytes to low numbers of megabytes. while jsonData: buffer.extend(jsonData.popleft()) @@ -1193,11 +1164,9 @@ def load( Parameters ---------- file : `io.IO` of bytes - File with pickle data open in binary mode. + File with data open in binary mode. universe : `~lsst.daf.butler.DimensionUniverse`, optional - `~lsst.daf.butler.DimensionUniverse` instance, not used by the - method itself but needed to ensure that registry data structures - are initialized. If `None` it is loaded from the `QuantumGraph` + If `None` it is loaded from the `QuantumGraph` saved structure. If supplied, the `~lsst.daf.butler.DimensionUniverse` from the loaded `QuantumGraph` will be validated against the supplied argument for compatibility. @@ -1223,32 +1192,18 @@ def load( Raises ------ TypeError - Raised if pickle contains instance of a type other than + Raised if data contains instance of a type other than `QuantumGraph`. ValueError Raised if one or more of the nodes requested is not in the `QuantumGraph` or if graphID parameter does not match the graph being loaded or if the supplied uri does not point at a valid `QuantumGraph` save file. - - Notes - ----- - Reading Quanta from pickle requires existence of singleton - `~lsst.daf.butler.DimensionUniverse` which is usually instantiated - during `~lsst.daf.butler.Registry` initialization. To make sure that - `~lsst.daf.butler.DimensionUniverse` exists this method accepts dummy - `~lsst.daf.butler.DimensionUniverse` argument. """ - # Try to see if the file handle contains pickle data, this will be - # removed in the future - try: - qgraph = pickle.load(file) - warnings.warn("Pickle graphs are deprecated, please re-save your graph with the save method") - except pickle.UnpicklingError: - with LoadHelper(file, minimumVersion) as loader: - qgraph = loader.load(universe, nodes, graphID) + with LoadHelper(file, minimumVersion) as loader: + qgraph = loader.load(universe, nodes, graphID) if not isinstance(qgraph, QuantumGraph): - raise TypeError(f"QuantumGraph pickle file has contains unexpected object type: {type(qgraph)}") + raise TypeError(f"QuantumGraph file contains unexpected object type: {type(qgraph)}") return qgraph def iterTaskGraph(self) -> Generator[TaskDef, None, None]: From 9d79499911a72cf2f69c80d4ffc6455e9d1ea563 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Fri, 28 Jul 2023 15:59:23 -0700 Subject: [PATCH 3/6] Add stacklevel to warnings --- python/lsst/pipe/base/_datasetQueryConstraints.py | 8 +++++++- python/lsst/pipe/base/pipelineIR.py | 4 +++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/python/lsst/pipe/base/_datasetQueryConstraints.py b/python/lsst/pipe/base/_datasetQueryConstraints.py index 0390c8b0..7bf8cc2e 100644 --- a/python/lsst/pipe/base/_datasetQueryConstraints.py +++ b/python/lsst/pipe/base/_datasetQueryConstraints.py @@ -31,6 +31,8 @@ from collections.abc import Iterable, Iterator from typing import Protocol +from lsst.utils.introspection import find_outside_stacklevel + class DatasetQueryConstraintVariant(Iterable, Protocol): """Base for all the valid variants for controlling @@ -83,7 +85,11 @@ def fromExpression(cls, expression: str) -> "DatasetQueryConstraintVariant": return cls.OFF else: if " " in expression: - warnings.warn("Whitespace found in expression will be trimmed", RuntimeWarning) + warnings.warn( + "Whitespace found in expression will be trimmed", + RuntimeWarning, + stacklevel=find_outside_stacklevel("lsst.pipe.base"), + ) expression = expression.replace(" ", "") members = expression.split(",") return cls.LIST(members) diff --git a/python/lsst/pipe/base/pipelineIR.py b/python/lsst/pipe/base/pipelineIR.py index f8a57dd8..ed9b82d7 100644 --- a/python/lsst/pipe/base/pipelineIR.py +++ b/python/lsst/pipe/base/pipelineIR.py @@ -43,6 +43,7 @@ import yaml from lsst.resources import ResourcePath, ResourcePathExpression +from lsst.utils.introspection import find_outside_stacklevel class _Tags(enum.Enum): @@ -315,7 +316,8 @@ def formatted(self, parameters: ParametersIR) -> ConfigIR: warnings.warn( f"config {key} contains value {match.group(0)} which is formatted like a " "Pipeline parameter but was not found within the Pipeline, if this was not " - "intentional, check for a typo" + "intentional, check for a typo", + stacklevel=find_outside_stacklevel("lsst.pipe.base"), ) return new_config From 21a940ff00b87760697193b8ec555814156303ec Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Thu, 3 Aug 2023 07:44:20 -0700 Subject: [PATCH 4/6] Add news fragment --- doc/changes/DM-40032.removal.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 doc/changes/DM-40032.removal.rst diff --git a/doc/changes/DM-40032.removal.rst b/doc/changes/DM-40032.removal.rst new file mode 100644 index 00000000..6e11549b --- /dev/null +++ b/doc/changes/DM-40032.removal.rst @@ -0,0 +1 @@ +Removed support for reading quantum graphs in pickle format. From 95d54eb163e9db1b3301783d09c3f040afcaa96e Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Thu, 3 Aug 2023 07:47:12 -0700 Subject: [PATCH 5/6] Add tests for deprecated TaskMetadata.names --- tests/test_taskmetadata.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/test_taskmetadata.py b/tests/test_taskmetadata.py index 552e3842..77421401 100644 --- a/tests/test_taskmetadata.py +++ b/tests/test_taskmetadata.py @@ -235,6 +235,14 @@ def testNumpy(self): with self.assertRaises(ValueError): meta["numpy"] = numpy.zeros(5) + def test_deprecated(self): + meta = TaskMetadata() + with self.assertRaises(RuntimeError): + meta.names(topLevelOnly=True) + + with self.assertWarns(FutureWarning): + meta.names(topLevelOnly=False) + if __name__ == "__main__": unittest.main() From 1aa3c807db5a494d5cd4ddad6635848ce7a2336c Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Thu, 3 Aug 2023 07:48:37 -0700 Subject: [PATCH 6/6] Add python-requires field to pyproject.toml --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 12024248..29cff295 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,6 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "lsst-pipe-base" +requires-python = ">=3.10.0" description = "Pipeline infrastructure for the Rubin Science Pipelines." license = {text = "GPLv3+ License"} readme = "README.md"