From 9f829a8a7581ac2d2c40a520904ef32ab4e5441d Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Thu, 30 Jan 2025 11:49:37 -0700 Subject: [PATCH 01/15] Fix typo --- python/lsst/pipe/base/_quantumContext.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/lsst/pipe/base/_quantumContext.py b/python/lsst/pipe/base/_quantumContext.py index abb0235b1..ef338c7bf 100644 --- a/python/lsst/pipe/base/_quantumContext.py +++ b/python/lsst/pipe/base/_quantumContext.py @@ -261,7 +261,7 @@ def get( Returns ------- return : `object` - This function returns arbitrary objects fetched from the bulter. + This function returns arbitrary objects fetched from the butler. The structure these objects are returned in depends on the type of the input argument. If the input dataset argument is a `InputQuantizedConnection`, then the return type will be a From 1b573a6a94a49b42c1c83fbe67e3f525790b6c88 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Wed, 29 Jan 2025 15:09:33 -0700 Subject: [PATCH 02/15] Refresh pre-commit --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 53c0b08fe..df8119a79 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,7 +7,7 @@ repos: - id: trailing-whitespace - id: check-toml - repo: https://github.com/psf/black - rev: 24.10.0 + rev: 25.1.0 hooks: - id: black # It is recommended to specify the latest version of Python @@ -16,7 +16,7 @@ repos: # https://pre-commit.com/#top_level-default_language_version language_version: python3.11 - repo: https://github.com/pycqa/isort - rev: 5.13.2 + rev: 6.0.0 hooks: - id: isort name: isort (python) From 8b10178fc4b703bde3406870b59be120e6efeea0 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Wed, 29 Jan 2025 15:24:08 -0700 Subject: [PATCH 03/15] Add ruff format configuration --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 27c94c40f..e578109cb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -139,6 +139,9 @@ target-version = "py311" exclude = [ "__init__.py", ] +[tool.ruff.format] +docstring-code-format = true +docstring-code-line-length = 79 [tool.ruff.lint] ignore = [ From cce9aee61fe945a5704c3ff4da2940de3c8cb5c8 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Wed, 29 Jan 2025 15:26:02 -0700 Subject: [PATCH 04/15] Format doc string using ruff --- python/lsst/pipe/base/connections.py | 42 ++++++++++++++++------------ 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/python/lsst/pipe/base/connections.py b/python/lsst/pipe/base/connections.py index 86b0b6dac..9f2df0e25 100644 --- a/python/lsst/pipe/base/connections.py +++ b/python/lsst/pipe/base/connections.py @@ -25,8 +25,7 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -"""Module defining connection classes for PipelineTask. -""" +"""Module defining connection classes for PipelineTask.""" from __future__ import annotations @@ -590,26 +589,33 @@ class attribute must match a function argument name in the ``run`` >>> from lsst.pipe.base import connectionTypes as cT >>> from lsst.pipe.base import PipelineTaskConnections >>> from lsst.pipe.base import PipelineTaskConfig - >>> class ExampleConnections(PipelineTaskConnections, - ... dimensions=("A", "B"), - ... defaultTemplates={"foo": "Example"}): - ... inputConnection = cT.Input(doc="Example input", - ... dimensions=("A", "B"), - ... storageClass=Exposure, - ... name="{foo}Dataset") - ... outputConnection = cT.Output(doc="Example output", - ... dimensions=("A", "B"), - ... storageClass=Exposure, - ... name="{foo}output") - >>> class ExampleConfig(PipelineTaskConfig, - ... pipelineConnections=ExampleConnections): - ... pass + >>> class ExampleConnections( + ... PipelineTaskConnections, + ... dimensions=("A", "B"), + ... defaultTemplates={"foo": "Example"}, + ... ): + ... inputConnection = cT.Input( + ... doc="Example input", + ... dimensions=("A", "B"), + ... storageClass=Exposure, + ... name="{foo}Dataset", + ... ) + ... outputConnection = cT.Output( + ... doc="Example output", + ... dimensions=("A", "B"), + ... storageClass=Exposure, + ... name="{foo}output", + ... ) + >>> class ExampleConfig( + ... PipelineTaskConfig, pipelineConnections=ExampleConnections + ... ): + ... pass >>> config = ExampleConfig() >>> config.connections.foo = Modified >>> config.connections.outputConnection = "TotallyDifferent" >>> connections = ExampleConnections(config=config) - >>> assert(connections.inputConnection.name == "ModifiedDataset") - >>> assert(connections.outputConnection.name == "TotallyDifferent") + >>> assert connections.inputConnection.name == "ModifiedDataset" + >>> assert connections.outputConnection.name == "TotallyDifferent" """ # We annotate these attributes as mutable sets because that's what they are From 55ccab050e389602e69462df56f8ef0c39b15653 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Wed, 29 Jan 2025 15:30:15 -0700 Subject: [PATCH 05/15] Add missing docstring to public methods --- python/lsst/pipe/base/mermaid_tools.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/python/lsst/pipe/base/mermaid_tools.py b/python/lsst/pipe/base/mermaid_tools.py index e472a70c3..3563d6a94 100644 --- a/python/lsst/pipe/base/mermaid_tools.py +++ b/python/lsst/pipe/base/mermaid_tools.py @@ -363,11 +363,33 @@ def pipeline2mermaid( edges: list[tuple[str, str, bool]] = [] def get_task_id(idx: int) -> str: - """Generate a safe Mermaid node ID for a task.""" + """Generate a safe Mermaid node ID for a task. + + Parameters + ---------- + idx : `int` + Task index. + + Returns + ------- + id : `str` + Node ID for a task. + """ return f"TASK_{idx}" def get_dataset_id(name: str) -> str: - """Generate a safe Mermaid node ID for a dataset.""" + """Generate a safe Mermaid node ID for a dataset. + + Parameters + ---------- + name : `str` + Dataset name. + + Returns + ------- + id : `str` + Node ID for the dataset. + """ # Replace non-alphanumerics with underscores. return "DATASET_" + re.sub(r"[^0-9A-Za-z_]", "_", name) From 6b48f1fd06ea616ef85ff7f7dadde33b049d4a86 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Wed, 29 Jan 2025 15:30:44 -0700 Subject: [PATCH 06/15] Reformat single-line docstrings for new black --- python/lsst/pipe/base/config.py | 3 +-- python/lsst/pipe/base/configOverrides.py | 3 +-- python/lsst/pipe/base/pipelineTask.py | 3 +-- python/lsst/pipe/base/taskFactory.py | 3 +-- python/lsst/pipe/base/tests/pipelineStepTester.py | 3 +-- python/lsst/pipe/base/tests/simpleQGraph.py | 3 +-- tests/test_cliCmdRegisterInstrument.py | 3 +-- tests/test_configOverrides.py | 3 +-- tests/test_config_formatter.py | 3 +-- tests/test_connections.py | 3 +-- tests/test_dot_tools.py | 3 +-- tests/test_execution_reports.py | 3 +-- tests/test_instrument.py | 3 +-- tests/test_mermaid_tools.py | 3 +-- tests/test_pipeline.py | 3 +-- tests/test_pipelineTask.py | 3 +-- tests/test_quantum_provenance_graph.py | 3 +-- 17 files changed, 17 insertions(+), 34 deletions(-) diff --git a/python/lsst/pipe/base/config.py b/python/lsst/pipe/base/config.py index fcab1d905..d0e35b30b 100644 --- a/python/lsst/pipe/base/config.py +++ b/python/lsst/pipe/base/config.py @@ -25,8 +25,7 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -"""Module defining config classes for PipelineTask. -""" +"""Module defining config classes for PipelineTask.""" from __future__ import annotations diff --git a/python/lsst/pipe/base/configOverrides.py b/python/lsst/pipe/base/configOverrides.py index b25eb555b..e88af133f 100644 --- a/python/lsst/pipe/base/configOverrides.py +++ b/python/lsst/pipe/base/configOverrides.py @@ -25,8 +25,7 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -"""Module which defines ConfigOverrides class and related methods. -""" +"""Module which defines ConfigOverrides class and related methods.""" from __future__ import annotations __all__ = ["ConfigOverrides"] diff --git a/python/lsst/pipe/base/pipelineTask.py b/python/lsst/pipe/base/pipelineTask.py index 969cef8c2..f065c4c73 100644 --- a/python/lsst/pipe/base/pipelineTask.py +++ b/python/lsst/pipe/base/pipelineTask.py @@ -25,8 +25,7 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -"""Define `PipelineTask` class and related methods. -""" +"""Define `PipelineTask` class and related methods.""" from __future__ import annotations diff --git a/python/lsst/pipe/base/taskFactory.py b/python/lsst/pipe/base/taskFactory.py index 423a25bca..a998e539e 100644 --- a/python/lsst/pipe/base/taskFactory.py +++ b/python/lsst/pipe/base/taskFactory.py @@ -25,8 +25,7 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -"""Module defining TaskFactory interface. -""" +"""Module defining TaskFactory interface.""" from __future__ import annotations diff --git a/python/lsst/pipe/base/tests/pipelineStepTester.py b/python/lsst/pipe/base/tests/pipelineStepTester.py index 0509300a7..0ef3ace95 100644 --- a/python/lsst/pipe/base/tests/pipelineStepTester.py +++ b/python/lsst/pipe/base/tests/pipelineStepTester.py @@ -25,8 +25,7 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -"""Utility to facilitate testing of pipelines consisting of multiple steps. -""" +"""Utility to facilitate testing of pipelines consisting of multiple steps.""" __all__ = ["PipelineStepTester"] diff --git a/python/lsst/pipe/base/tests/simpleQGraph.py b/python/lsst/pipe/base/tests/simpleQGraph.py index 6dbac9dc0..07532238b 100644 --- a/python/lsst/pipe/base/tests/simpleQGraph.py +++ b/python/lsst/pipe/base/tests/simpleQGraph.py @@ -25,8 +25,7 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -"""Bunch of common classes and methods for use in unit tests. -""" +"""Bunch of common classes and methods for use in unit tests.""" from __future__ import annotations __all__ = ["AddTaskConfig", "AddTask", "AddTaskFactoryMock"] diff --git a/tests/test_cliCmdRegisterInstrument.py b/tests/test_cliCmdRegisterInstrument.py index 105abeee5..b7185e652 100644 --- a/tests/test_cliCmdRegisterInstrument.py +++ b/tests/test_cliCmdRegisterInstrument.py @@ -25,8 +25,7 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -"""Unit tests for daf_butler CLI register-instrument command. -""" +"""Unit tests for daf_butler CLI register-instrument command.""" import unittest diff --git a/tests/test_configOverrides.py b/tests/test_configOverrides.py index b5291ab4d..02e155632 100644 --- a/tests/test_configOverrides.py +++ b/tests/test_configOverrides.py @@ -25,8 +25,7 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -"""Simple unit test for configOverrides. -""" +"""Simple unit test for configOverrides.""" import tempfile import unittest diff --git a/tests/test_config_formatter.py b/tests/test_config_formatter.py index 6be481d8a..a5b6d8bc8 100644 --- a/tests/test_config_formatter.py +++ b/tests/test_config_formatter.py @@ -25,8 +25,7 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -"""Tests for PexConfigFormatter. -""" +"""Tests for PexConfigFormatter.""" import os import unittest diff --git a/tests/test_connections.py b/tests/test_connections.py index 8ea3d161a..a9788efde 100644 --- a/tests/test_connections.py +++ b/tests/test_connections.py @@ -25,8 +25,7 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -"""Simple unit test for PipelineTaskConnections. -""" +"""Simple unit test for PipelineTaskConnections.""" import unittest import warnings diff --git a/tests/test_dot_tools.py b/tests/test_dot_tools.py index e760d04ab..a10efaef6 100644 --- a/tests/test_dot_tools.py +++ b/tests/test_dot_tools.py @@ -25,8 +25,7 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -"""Simple unit test for Pipeline visualization. -""" +"""Simple unit test for Pipeline visualization.""" import io import re diff --git a/tests/test_execution_reports.py b/tests/test_execution_reports.py index 71f39eaa1..c55ea9faa 100644 --- a/tests/test_execution_reports.py +++ b/tests/test_execution_reports.py @@ -25,8 +25,7 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -"""Simple unit test for execution_reports. -""" +"""Simple unit test for execution_reports.""" import unittest diff --git a/tests/test_instrument.py b/tests/test_instrument.py index 72bac2d01..9b57d6934 100644 --- a/tests/test_instrument.py +++ b/tests/test_instrument.py @@ -25,8 +25,7 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -"""Tests of the Instrument class. -""" +"""Tests of the Instrument class.""" import datetime import math diff --git a/tests/test_mermaid_tools.py b/tests/test_mermaid_tools.py index 4e8609474..1ec230bfb 100644 --- a/tests/test_mermaid_tools.py +++ b/tests/test_mermaid_tools.py @@ -25,8 +25,7 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -"""Simple unit test for Pipeline visualization. -""" +"""Simple unit test for Pipeline visualization.""" import io import unittest diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index a59aa9cc8..68b69f8ca 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -25,8 +25,7 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -"""Simple unit test for Pipeline. -""" +"""Simple unit test for Pipeline.""" import pickle import textwrap diff --git a/tests/test_pipelineTask.py b/tests/test_pipelineTask.py index 191030bc7..ce348e041 100644 --- a/tests/test_pipelineTask.py +++ b/tests/test_pipelineTask.py @@ -25,8 +25,7 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -"""Simple unit test for PipelineTask. -""" +"""Simple unit test for PipelineTask.""" import copy import pickle diff --git a/tests/test_quantum_provenance_graph.py b/tests/test_quantum_provenance_graph.py index d50778125..9bb2ef928 100644 --- a/tests/test_quantum_provenance_graph.py +++ b/tests/test_quantum_provenance_graph.py @@ -26,8 +26,7 @@ # # You should have received a copy of the GNU General Public License # # along with this program. If not, see . -"""Simple unit test for quantum_provenance_graph. -""" +"""Simple unit test for quantum_provenance_graph.""" import unittest From 60999584a2708bdddc09261101a3e22f86dd3091 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Wed, 29 Jan 2025 15:32:06 -0700 Subject: [PATCH 07/15] Fix some multi-line strings that could be single line --- python/lsst/pipe/base/all_dimensions_quantum_graph_builder.py | 3 +-- python/lsst/pipe/base/pipeline_graph/_pipeline_graph.py | 2 +- python/lsst/pipe/base/quantum_provenance_graph.py | 3 +-- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/python/lsst/pipe/base/all_dimensions_quantum_graph_builder.py b/python/lsst/pipe/base/all_dimensions_quantum_graph_builder.py index 3c61505c3..d334ec328 100644 --- a/python/lsst/pipe/base/all_dimensions_quantum_graph_builder.py +++ b/python/lsst/pipe/base/all_dimensions_quantum_graph_builder.py @@ -489,8 +489,7 @@ def from_builder( result.query_args["collections"] = builder.input_collections else: raise QuantumGraphBuilderError( - f"Unable to handle type {builder.dataset_query_constraint} " - "given as datasetQueryConstraint." + f"Unable to handle type {builder.dataset_query_constraint} given as datasetQueryConstraint." ) builder.log.verbose("Querying for data IDs with arguments:") builder.log.verbose(" dimensions=%s,", list(result.query_args["dimensions"].names)) diff --git a/python/lsst/pipe/base/pipeline_graph/_pipeline_graph.py b/python/lsst/pipe/base/pipeline_graph/_pipeline_graph.py index ba754e56e..a94d67ad4 100644 --- a/python/lsst/pipe/base/pipeline_graph/_pipeline_graph.py +++ b/python/lsst/pipe/base/pipeline_graph/_pipeline_graph.py @@ -311,7 +311,7 @@ def diff_tasks(self, other: PipelineGraph) -> list[str]: b = other.tasks[label] if a.task_class != b.task_class: messages.append( - f"Task {label!r} has class {a.task_class_name} in A, " f"but {b.task_class_name} in B." + f"Task {label!r} has class {a.task_class_name} in A, but {b.task_class_name} in B." ) messages.extend(a.diff_edges(b)) return messages diff --git a/python/lsst/pipe/base/quantum_provenance_graph.py b/python/lsst/pipe/base/quantum_provenance_graph.py index 32c1d131a..42c3e4af8 100644 --- a/python/lsst/pipe/base/quantum_provenance_graph.py +++ b/python/lsst/pipe/base/quantum_provenance_graph.py @@ -1249,8 +1249,7 @@ def assemble_quantum_provenance_graph( """ if read_caveats not in ("lazy", "exhaustive", None): raise TypeError( - f"Invalid option {read_caveats!r} for read_caveats; " - "should be 'lazy', 'exhaustive', or None." + f"Invalid option {read_caveats!r} for read_caveats; should be 'lazy', 'exhaustive', or None." ) output_runs = [] for graph in qgraphs: From 0f1dbf780360906da9388834b0582278609fa69f Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Thu, 30 Jan 2025 12:49:21 -0700 Subject: [PATCH 08/15] Allow TaskMetadata to handle empty lists --- python/lsst/pipe/base/_task_metadata.py | 9 ++++++++- tests/test_taskmetadata.py | 4 ++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/python/lsst/pipe/base/_task_metadata.py b/python/lsst/pipe/base/_task_metadata.py index 594ec1a5b..736df3c2f 100644 --- a/python/lsst/pipe/base/_task_metadata.py +++ b/python/lsst/pipe/base/_task_metadata.py @@ -432,7 +432,12 @@ def __getitem__(self, key: str) -> Any: if key0 in self.metadata: return self.metadata[key0] if key0 in self.arrays: - return self.arrays[key0][-1] + arr = self.arrays[key0] + if not arr: + # If there are no elements then returning a scalar + # is an error. + raise KeyError(f"'{key}' not found") + return arr[-1] raise KeyError(f"'{key}' not found") # Hierarchical lookup so the top key can only be in the metadata # property. Trap KeyError and reraise so that the correct key @@ -613,6 +618,8 @@ def _validate_value(self, value: Any) -> tuple[str, Any]: # For model consistency, need to check that every item in the # list has the same type. value = list(value) + if not value: + return "array", value type0 = type(value[0]) for i in value: diff --git a/tests/test_taskmetadata.py b/tests/test_taskmetadata.py index a394f1eec..aaf763bbf 100644 --- a/tests/test_taskmetadata.py +++ b/tests/test_taskmetadata.py @@ -91,6 +91,10 @@ def testTaskMetadata(self): self.assertEqual(meta.getArray("new_array"), ["a", "b", "c"]) meta["new_array"] = [1, 2, 3] self.assertEqual(meta.getArray("new_array"), [1, 2, 3]) + meta["empty_array"] = [] + self.assertEqual(meta.getArray("empty_array"), []) + with self.assertRaises(KeyError): + meta["empty_array"] meta["meta"] = 5 meta["meta"] = TaskMetadata() From 54b528063d521d9cb19c466ed85e704c5a3a526d Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Thu, 23 Jan 2025 17:27:06 -0700 Subject: [PATCH 09/15] Track inputs retrieved in quantum context --- python/lsst/pipe/base/_quantumContext.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/python/lsst/pipe/base/_quantumContext.py b/python/lsst/pipe/base/_quantumContext.py index ef338c7bf..b2fbcd3ab 100644 --- a/python/lsst/pipe/base/_quantumContext.py +++ b/python/lsst/pipe/base/_quantumContext.py @@ -39,7 +39,15 @@ from typing import Any import astropy.units as u -from lsst.daf.butler import DataCoordinate, DatasetRef, DatasetType, DimensionUniverse, LimitedButler, Quantum +from lsst.daf.butler import ( + DataCoordinate, + DatasetProvenance, + DatasetRef, + DatasetType, + DimensionUniverse, + LimitedButler, + Quantum, +) from lsst.utils.introspection import get_full_type_name from lsst.utils.logging import PeriodicLogger, getLogger @@ -215,17 +223,20 @@ def __init__( self.allOutputs.add((ref.datasetType, ref.dataId)) self.outputsPut: set[tuple[DatasetType, DataCoordinate]] = set() self.__butler = butler + self.dataset_provenance = DatasetProvenance() def _get(self, ref: DeferredDatasetRef | DatasetRef | None) -> Any: # Butler methods below will check for unresolved DatasetRefs and # raise appropriately, so no need for us to do that here. if isinstance(ref, DeferredDatasetRef): self._checkMembership(ref.datasetRef, self.allInputs) + self.dataset_provenance.add_input(ref.datasetRef) return self.__butler.getDeferred(ref.datasetRef) elif ref is None: return None else: self._checkMembership(ref, self.allInputs) + self.dataset_provenance.add_input(ref) return self.__butler.get(ref) def _put(self, value: Any, ref: DatasetRef) -> None: From d27f022ad1abd01c44dd09131dfba0f35c77aa7e Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Fri, 24 Jan 2025 13:02:55 -0700 Subject: [PATCH 10/15] Pass provenance to QuantumContext put --- python/lsst/pipe/base/_quantumContext.py | 2 +- tests/test_pipelineTask.py | 17 +++++++++++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/python/lsst/pipe/base/_quantumContext.py b/python/lsst/pipe/base/_quantumContext.py index b2fbcd3ab..f71f1d0a9 100644 --- a/python/lsst/pipe/base/_quantumContext.py +++ b/python/lsst/pipe/base/_quantumContext.py @@ -242,7 +242,7 @@ def _get(self, ref: DeferredDatasetRef | DatasetRef | None) -> Any: def _put(self, value: Any, ref: DatasetRef) -> None: """Store data in butler.""" self._checkMembership(ref, self.allOutputs) - self.__butler.put(value, ref) + self.__butler.put(value, ref, provenance=self.dataset_provenance) self.outputsPut.add((ref.datasetType, ref.dataId)) def get( diff --git a/tests/test_pipelineTask.py b/tests/test_pipelineTask.py index ce348e041..a333e4541 100644 --- a/tests/test_pipelineTask.py +++ b/tests/test_pipelineTask.py @@ -37,7 +37,14 @@ import lsst.pipe.base as pipeBase import lsst.utils.logging import lsst.utils.tests -from lsst.daf.butler import DataCoordinate, DatasetRef, DatasetType, DimensionUniverse, Quantum +from lsst.daf.butler import ( + DataCoordinate, + DatasetProvenance, + DatasetRef, + DatasetType, + DimensionUniverse, + Quantum, +) class ButlerMock: @@ -53,7 +60,13 @@ def get(self, ref: DatasetRef) -> Any: return dsdata.get(ref.dataId) return None - def put(self, inMemoryDataset: Any, dsRef: DatasetRef, producer: Any = None): + def put( + self, + inMemoryDataset: Any, + dsRef: DatasetRef, + provenance: DatasetProvenance | None = None, + producer: Any = None, + ): key = dsRef.dataId name = dsRef.datasetType.name dsdata = self.datasets.setdefault(name, {}) From f255e67ee99c4c2ab35332e0487cf38571eb5679 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Wed, 29 Jan 2025 15:03:04 -0700 Subject: [PATCH 11/15] Add quantum_id to QuantumContext constructor --- python/lsst/pipe/base/_quantumContext.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/python/lsst/pipe/base/_quantumContext.py b/python/lsst/pipe/base/_quantumContext.py index f71f1d0a9..4b2a897b9 100644 --- a/python/lsst/pipe/base/_quantumContext.py +++ b/python/lsst/pipe/base/_quantumContext.py @@ -34,6 +34,7 @@ __all__ = ("ExecutionResources", "QuantumContext") import numbers +import uuid from collections.abc import Callable, Sequence from dataclasses import dataclass from typing import Any @@ -182,6 +183,8 @@ class QuantumContext: single execution of this node in the pipeline graph. resources : `ExecutionResources`, optional The resources allocated for executing quanta. + quantum_id : `uuid.UUID` or `None`, optional + The ID of the quantum being executed. Used for provenance. Notes ----- @@ -199,7 +202,12 @@ class QuantumContext: resources: ExecutionResources def __init__( - self, butler: LimitedButler, quantum: Quantum, *, resources: ExecutionResources | None = None + self, + butler: LimitedButler, + quantum: Quantum, + *, + resources: ExecutionResources | None = None, + quantum_id: uuid.UUID | None = None, ): self.quantum = quantum if resources is None: @@ -223,7 +231,7 @@ def __init__( self.allOutputs.add((ref.datasetType, ref.dataId)) self.outputsPut: set[tuple[DatasetType, DataCoordinate]] = set() self.__butler = butler - self.dataset_provenance = DatasetProvenance() + self.dataset_provenance = DatasetProvenance(quantum_id=quantum_id) def _get(self, ref: DeferredDatasetRef | DatasetRef | None) -> Any: # Butler methods below will check for unresolved DatasetRefs and From 3e28d0b452a76482bff40b38cfa083160c19c1dc Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Wed, 29 Jan 2025 15:05:59 -0700 Subject: [PATCH 12/15] Add provenance to caching limited butler put --- python/lsst/pipe/base/caching_limited_butler.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/lsst/pipe/base/caching_limited_butler.py b/python/lsst/pipe/base/caching_limited_butler.py index f1d60aa75..3251d17c1 100644 --- a/python/lsst/pipe/base/caching_limited_butler.py +++ b/python/lsst/pipe/base/caching_limited_butler.py @@ -35,6 +35,7 @@ from lsst.daf.butler import ( DatasetId, + DatasetProvenance, DatasetRef, DeferredDatasetHandle, DimensionUniverse, @@ -155,7 +156,7 @@ def stored_many(self, refs: Iterable[DatasetRef]) -> dict[DatasetRef, bool]: def isWriteable(self) -> bool: return self._wrapped.isWriteable() - def put(self, obj: Any, ref: DatasetRef) -> DatasetRef: + def put(self, obj: Any, ref: DatasetRef, /, *, provenance: DatasetProvenance | None = None) -> DatasetRef: if ref.datasetType.name in self._cache_on_put: self._cache[ref.datasetType.name] = ( ref.id, @@ -167,7 +168,7 @@ def put(self, obj: Any, ref: DatasetRef) -> DatasetRef: ), ) _LOG.debug("Cached dataset %s on put", ref) - return self._wrapped.put(obj, ref) + return self._wrapped.put(obj, ref, provenance=provenance) def pruneDatasets( self, From e0b8a7a0bced975013f2ce2b339eadbe481f96f9 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Wed, 29 Jan 2025 16:10:41 -0700 Subject: [PATCH 13/15] Add method to allow runQuantum to register extra provenance --- python/lsst/pipe/base/_quantumContext.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/python/lsst/pipe/base/_quantumContext.py b/python/lsst/pipe/base/_quantumContext.py index 4b2a897b9..604cf4a09 100644 --- a/python/lsst/pipe/base/_quantumContext.py +++ b/python/lsst/pipe/base/_quantumContext.py @@ -453,3 +453,17 @@ def dimensions(self) -> DimensionUniverse: repository (`~lsst.daf.butler.DimensionUniverse`). """ return self.__butler.dimensions + + def add_additional_provenance(self, ref: DatasetRef, extra: dict[str, int | float | str | bool]) -> None: + """Add additional provenance information to the dataset provenance. + + Parameters + ---------- + ref : `DatasetRef` + The dataset to attach provenance to. This dataset must have been + retrieved by this quantum context. + extra : `dict` [ `str`, `int` | `float` | `str` | `bool` ] + Additional information to attach as provenance information. Keys + must be strings and values must be simple scalars. + """ + self.dataset_provenance.add_extra_provenance(ref.id, extra) From e135b994f1549821bbc2366d77322f2afbcd443c Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Thu, 30 Jan 2025 11:47:26 -0700 Subject: [PATCH 14/15] Add the UUID to the outputs structure Now that the UUID is always known it becomes useful to track this information for provenance purposes. --- python/lsst/pipe/base/_quantumContext.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/python/lsst/pipe/base/_quantumContext.py b/python/lsst/pipe/base/_quantumContext.py index 604cf4a09..3d5944ac7 100644 --- a/python/lsst/pipe/base/_quantumContext.py +++ b/python/lsst/pipe/base/_quantumContext.py @@ -218,7 +218,7 @@ def __init__( self.allOutputs = set() for refs in quantum.inputs.values(): for ref in refs: - self.allInputs.add((ref.datasetType, ref.dataId)) + self.allInputs.add((ref.datasetType, ref.dataId, ref.id)) for dataset_type, refs in quantum.outputs.items(): if dataset_type.name.endswith(METADATA_OUTPUT_CONNECTION_NAME) or dataset_type.name.endswith( LOG_OUTPUT_CONNECTION_NAME @@ -228,8 +228,8 @@ def __init__( # write them itself; that's for the execution system to do. continue for ref in refs: - self.allOutputs.add((ref.datasetType, ref.dataId)) - self.outputsPut: set[tuple[DatasetType, DataCoordinate]] = set() + self.allOutputs.add((ref.datasetType, ref.dataId, ref.id)) + self.outputsPut: set[tuple[DatasetType, DataCoordinate, uuid.UUID]] = set() self.__butler = butler self.dataset_provenance = DatasetProvenance(quantum_id=quantum_id) @@ -251,7 +251,7 @@ def _put(self, value: Any, ref: DatasetRef) -> None: """Store data in butler.""" self._checkMembership(ref, self.allOutputs) self.__butler.put(value, ref, provenance=self.dataset_provenance) - self.outputsPut.add((ref.datasetType, ref.dataId)) + self.outputsPut.add((ref.datasetType, ref.dataId, ref.id)) def get( self, @@ -444,7 +444,7 @@ def _checkMembership(self, ref: list[DatasetRef] | DatasetRef, inout: set) -> No if not isinstance(ref, list | tuple): ref = [ref] for r in ref: - if (r.datasetType, r.dataId) not in inout: + if (r.datasetType, r.dataId, r.id) not in inout: raise ValueError("DatasetRef is not part of the Quantum being processed") @property From dd217c8faba33c1a813d931888388019df40a041 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Thu, 30 Jan 2025 13:13:57 -0700 Subject: [PATCH 15/15] Add news fragment --- doc/changes/DM-35396.feature.rst | 3 +++ doc/changes/DM-35396.misc.rst | 2 ++ 2 files changed, 5 insertions(+) create mode 100644 doc/changes/DM-35396.feature.rst create mode 100644 doc/changes/DM-35396.misc.rst diff --git a/doc/changes/DM-35396.feature.rst b/doc/changes/DM-35396.feature.rst new file mode 100644 index 000000000..8bde34909 --- /dev/null +++ b/doc/changes/DM-35396.feature.rst @@ -0,0 +1,3 @@ +* Modified ``QuantumContext`` such that it now tracks all datasets that are retrieved and records them in ``dataset_provenance``. + This provenance is then passed to Butler on ``put()``. +* Added ``QuantumContext.add_additional_provenance()`` to allow a pipeline task author to attach additional provenance information to be recorded and associated with a particular input dataset. diff --git a/doc/changes/DM-35396.misc.rst b/doc/changes/DM-35396.misc.rst new file mode 100644 index 000000000..ddda73026 --- /dev/null +++ b/doc/changes/DM-35396.misc.rst @@ -0,0 +1,2 @@ +Modified ``TaskMetadata`` such that it can now be assigned an empty list. +This list can be retrieved with ``getArray`` but if an attempt is made to get a scalar `KeyError` will be raised.