From 9f829a8a7581ac2d2c40a520904ef32ab4e5441d Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Thu, 30 Jan 2025 11:49:37 -0700
Subject: [PATCH 01/15] Fix typo

---
 python/lsst/pipe/base/_quantumContext.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/lsst/pipe/base/_quantumContext.py b/python/lsst/pipe/base/_quantumContext.py
index abb0235b1..ef338c7bf 100644
--- a/python/lsst/pipe/base/_quantumContext.py
+++ b/python/lsst/pipe/base/_quantumContext.py
@@ -261,7 +261,7 @@ def get(
         Returns
         -------
         return : `object`
-            This function returns arbitrary objects fetched from the bulter.
+            This function returns arbitrary objects fetched from the butler.
             The structure these objects are returned in depends on the type of
             the input argument. If the input dataset argument is a
             `InputQuantizedConnection`, then the return type will be a

From 1b573a6a94a49b42c1c83fbe67e3f525790b6c88 Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Wed, 29 Jan 2025 15:09:33 -0700
Subject: [PATCH 02/15] Refresh pre-commit

---
 .pre-commit-config.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 53c0b08fe..df8119a79 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -7,7 +7,7 @@ repos:
       - id: trailing-whitespace
       - id: check-toml
   - repo: https://github.com/psf/black
-    rev: 24.10.0
+    rev: 25.1.0
     hooks:
       - id: black
         # It is recommended to specify the latest version of Python
@@ -16,7 +16,7 @@ repos:
         # https://pre-commit.com/#top_level-default_language_version
         language_version: python3.11
   - repo: https://github.com/pycqa/isort
-    rev: 5.13.2
+    rev: 6.0.0
     hooks:
       - id: isort
         name: isort (python)

From 8b10178fc4b703bde3406870b59be120e6efeea0 Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Wed, 29 Jan 2025 15:24:08 -0700
Subject: [PATCH 03/15] Add ruff format configuration

---
 pyproject.toml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index 27c94c40f..e578109cb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -139,6 +139,9 @@ target-version = "py311"
 exclude = [
     "__init__.py",
 ]
+[tool.ruff.format]
+docstring-code-format = true
+docstring-code-line-length = 79
 
 [tool.ruff.lint]
 ignore = [

From cce9aee61fe945a5704c3ff4da2940de3c8cb5c8 Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Wed, 29 Jan 2025 15:26:02 -0700
Subject: [PATCH 04/15] Format doc string using ruff

---
 python/lsst/pipe/base/connections.py | 42 ++++++++++++++++------------
 1 file changed, 24 insertions(+), 18 deletions(-)

diff --git a/python/lsst/pipe/base/connections.py b/python/lsst/pipe/base/connections.py
index 86b0b6dac..9f2df0e25 100644
--- a/python/lsst/pipe/base/connections.py
+++ b/python/lsst/pipe/base/connections.py
@@ -25,8 +25,7 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-"""Module defining connection classes for PipelineTask.
-"""
+"""Module defining connection classes for PipelineTask."""
 
 from __future__ import annotations
 
@@ -590,26 +589,33 @@ class attribute must match a function argument name in the ``run``
     >>> from lsst.pipe.base import connectionTypes as cT
     >>> from lsst.pipe.base import PipelineTaskConnections
     >>> from lsst.pipe.base import PipelineTaskConfig
-    >>> class ExampleConnections(PipelineTaskConnections,
-    ...                          dimensions=("A", "B"),
-    ...                          defaultTemplates={"foo": "Example"}):
-    ...     inputConnection = cT.Input(doc="Example input",
-    ...                                dimensions=("A", "B"),
-    ...                                storageClass=Exposure,
-    ...                                name="{foo}Dataset")
-    ...     outputConnection = cT.Output(doc="Example output",
-    ...                                  dimensions=("A", "B"),
-    ...                                  storageClass=Exposure,
-    ...                                  name="{foo}output")
-    >>> class ExampleConfig(PipelineTaskConfig,
-    ...                     pipelineConnections=ExampleConnections):
-    ...    pass
+    >>> class ExampleConnections(
+    ...     PipelineTaskConnections,
+    ...     dimensions=("A", "B"),
+    ...     defaultTemplates={"foo": "Example"},
+    ... ):
+    ...     inputConnection = cT.Input(
+    ...         doc="Example input",
+    ...         dimensions=("A", "B"),
+    ...         storageClass=Exposure,
+    ...         name="{foo}Dataset",
+    ...     )
+    ...     outputConnection = cT.Output(
+    ...         doc="Example output",
+    ...         dimensions=("A", "B"),
+    ...         storageClass=Exposure,
+    ...         name="{foo}output",
+    ...     )
+    >>> class ExampleConfig(
+    ...     PipelineTaskConfig, pipelineConnections=ExampleConnections
+    ... ):
+    ...     pass
     >>> config = ExampleConfig()
     >>> config.connections.foo = Modified
     >>> config.connections.outputConnection = "TotallyDifferent"
     >>> connections = ExampleConnections(config=config)
-    >>> assert(connections.inputConnection.name == "ModifiedDataset")
-    >>> assert(connections.outputConnection.name == "TotallyDifferent")
+    >>> assert connections.inputConnection.name == "ModifiedDataset"
+    >>> assert connections.outputConnection.name == "TotallyDifferent"
     """
 
     # We annotate these attributes as mutable sets because that's what they are

From 55ccab050e389602e69462df56f8ef0c39b15653 Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Wed, 29 Jan 2025 15:30:15 -0700
Subject: [PATCH 05/15] Add missing docstring to public methods

---
 python/lsst/pipe/base/mermaid_tools.py | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/python/lsst/pipe/base/mermaid_tools.py b/python/lsst/pipe/base/mermaid_tools.py
index e472a70c3..3563d6a94 100644
--- a/python/lsst/pipe/base/mermaid_tools.py
+++ b/python/lsst/pipe/base/mermaid_tools.py
@@ -363,11 +363,33 @@ def pipeline2mermaid(
     edges: list[tuple[str, str, bool]] = []
 
     def get_task_id(idx: int) -> str:
-        """Generate a safe Mermaid node ID for a task."""
+        """Generate a safe Mermaid node ID for a task.
+
+        Parameters
+        ----------
+        idx : `int`
+            Task index.
+
+        Returns
+        -------
+        id : `str`
+            Node ID for a task.
+        """
         return f"TASK_{idx}"
 
     def get_dataset_id(name: str) -> str:
-        """Generate a safe Mermaid node ID for a dataset."""
+        """Generate a safe Mermaid node ID for a dataset.
+
+        Parameters
+        ----------
+        name : `str`
+            Dataset name.
+
+        Returns
+        -------
+        id : `str`
+            Node ID for the dataset.
+        """
         # Replace non-alphanumerics with underscores.
         return "DATASET_" + re.sub(r"[^0-9A-Za-z_]", "_", name)
 

From 6b48f1fd06ea616ef85ff7f7dadde33b049d4a86 Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Wed, 29 Jan 2025 15:30:44 -0700
Subject: [PATCH 06/15] Reformat single-line docstrings for new black

---
 python/lsst/pipe/base/config.py                   | 3 +--
 python/lsst/pipe/base/configOverrides.py          | 3 +--
 python/lsst/pipe/base/pipelineTask.py             | 3 +--
 python/lsst/pipe/base/taskFactory.py              | 3 +--
 python/lsst/pipe/base/tests/pipelineStepTester.py | 3 +--
 python/lsst/pipe/base/tests/simpleQGraph.py       | 3 +--
 tests/test_cliCmdRegisterInstrument.py            | 3 +--
 tests/test_configOverrides.py                     | 3 +--
 tests/test_config_formatter.py                    | 3 +--
 tests/test_connections.py                         | 3 +--
 tests/test_dot_tools.py                           | 3 +--
 tests/test_execution_reports.py                   | 3 +--
 tests/test_instrument.py                          | 3 +--
 tests/test_mermaid_tools.py                       | 3 +--
 tests/test_pipeline.py                            | 3 +--
 tests/test_pipelineTask.py                        | 3 +--
 tests/test_quantum_provenance_graph.py            | 3 +--
 17 files changed, 17 insertions(+), 34 deletions(-)

diff --git a/python/lsst/pipe/base/config.py b/python/lsst/pipe/base/config.py
index fcab1d905..d0e35b30b 100644
--- a/python/lsst/pipe/base/config.py
+++ b/python/lsst/pipe/base/config.py
@@ -25,8 +25,7 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-"""Module defining config classes for PipelineTask.
-"""
+"""Module defining config classes for PipelineTask."""
 
 from __future__ import annotations
 
diff --git a/python/lsst/pipe/base/configOverrides.py b/python/lsst/pipe/base/configOverrides.py
index b25eb555b..e88af133f 100644
--- a/python/lsst/pipe/base/configOverrides.py
+++ b/python/lsst/pipe/base/configOverrides.py
@@ -25,8 +25,7 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-"""Module which defines ConfigOverrides class and related methods.
-"""
+"""Module which defines ConfigOverrides class and related methods."""
 from __future__ import annotations
 
 __all__ = ["ConfigOverrides"]
diff --git a/python/lsst/pipe/base/pipelineTask.py b/python/lsst/pipe/base/pipelineTask.py
index 969cef8c2..f065c4c73 100644
--- a/python/lsst/pipe/base/pipelineTask.py
+++ b/python/lsst/pipe/base/pipelineTask.py
@@ -25,8 +25,7 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-"""Define `PipelineTask` class and related methods.
-"""
+"""Define `PipelineTask` class and related methods."""
 
 from __future__ import annotations
 
diff --git a/python/lsst/pipe/base/taskFactory.py b/python/lsst/pipe/base/taskFactory.py
index 423a25bca..a998e539e 100644
--- a/python/lsst/pipe/base/taskFactory.py
+++ b/python/lsst/pipe/base/taskFactory.py
@@ -25,8 +25,7 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-"""Module defining TaskFactory interface.
-"""
+"""Module defining TaskFactory interface."""
 
 from __future__ import annotations
 
diff --git a/python/lsst/pipe/base/tests/pipelineStepTester.py b/python/lsst/pipe/base/tests/pipelineStepTester.py
index 0509300a7..0ef3ace95 100644
--- a/python/lsst/pipe/base/tests/pipelineStepTester.py
+++ b/python/lsst/pipe/base/tests/pipelineStepTester.py
@@ -25,8 +25,7 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-"""Utility to facilitate testing of pipelines consisting of multiple steps.
-"""
+"""Utility to facilitate testing of pipelines consisting of multiple steps."""
 
 __all__ = ["PipelineStepTester"]
 
diff --git a/python/lsst/pipe/base/tests/simpleQGraph.py b/python/lsst/pipe/base/tests/simpleQGraph.py
index 6dbac9dc0..07532238b 100644
--- a/python/lsst/pipe/base/tests/simpleQGraph.py
+++ b/python/lsst/pipe/base/tests/simpleQGraph.py
@@ -25,8 +25,7 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-"""Bunch of common classes and methods for use in unit tests.
-"""
+"""Bunch of common classes and methods for use in unit tests."""
 from __future__ import annotations
 
 __all__ = ["AddTaskConfig", "AddTask", "AddTaskFactoryMock"]
diff --git a/tests/test_cliCmdRegisterInstrument.py b/tests/test_cliCmdRegisterInstrument.py
index 105abeee5..b7185e652 100644
--- a/tests/test_cliCmdRegisterInstrument.py
+++ b/tests/test_cliCmdRegisterInstrument.py
@@ -25,8 +25,7 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-"""Unit tests for daf_butler CLI register-instrument command.
-"""
+"""Unit tests for daf_butler CLI register-instrument command."""
 
 import unittest
 
diff --git a/tests/test_configOverrides.py b/tests/test_configOverrides.py
index b5291ab4d..02e155632 100644
--- a/tests/test_configOverrides.py
+++ b/tests/test_configOverrides.py
@@ -25,8 +25,7 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-"""Simple unit test for configOverrides.
-"""
+"""Simple unit test for configOverrides."""
 
 import tempfile
 import unittest
diff --git a/tests/test_config_formatter.py b/tests/test_config_formatter.py
index 6be481d8a..a5b6d8bc8 100644
--- a/tests/test_config_formatter.py
+++ b/tests/test_config_formatter.py
@@ -25,8 +25,7 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-"""Tests for PexConfigFormatter.
-"""
+"""Tests for PexConfigFormatter."""
 
 import os
 import unittest
diff --git a/tests/test_connections.py b/tests/test_connections.py
index 8ea3d161a..a9788efde 100644
--- a/tests/test_connections.py
+++ b/tests/test_connections.py
@@ -25,8 +25,7 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-"""Simple unit test for PipelineTaskConnections.
-"""
+"""Simple unit test for PipelineTaskConnections."""
 
 import unittest
 import warnings
diff --git a/tests/test_dot_tools.py b/tests/test_dot_tools.py
index e760d04ab..a10efaef6 100644
--- a/tests/test_dot_tools.py
+++ b/tests/test_dot_tools.py
@@ -25,8 +25,7 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
 
-"""Simple unit test for Pipeline visualization.
-"""
+"""Simple unit test for Pipeline visualization."""
 
 import io
 import re
diff --git a/tests/test_execution_reports.py b/tests/test_execution_reports.py
index 71f39eaa1..c55ea9faa 100644
--- a/tests/test_execution_reports.py
+++ b/tests/test_execution_reports.py
@@ -25,8 +25,7 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-"""Simple unit test for execution_reports.
-"""
+"""Simple unit test for execution_reports."""
 
 import unittest
 
diff --git a/tests/test_instrument.py b/tests/test_instrument.py
index 72bac2d01..9b57d6934 100644
--- a/tests/test_instrument.py
+++ b/tests/test_instrument.py
@@ -25,8 +25,7 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-"""Tests of the Instrument class.
-"""
+"""Tests of the Instrument class."""
 
 import datetime
 import math
diff --git a/tests/test_mermaid_tools.py b/tests/test_mermaid_tools.py
index 4e8609474..1ec230bfb 100644
--- a/tests/test_mermaid_tools.py
+++ b/tests/test_mermaid_tools.py
@@ -25,8 +25,7 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
 
-"""Simple unit test for Pipeline visualization.
-"""
+"""Simple unit test for Pipeline visualization."""
 
 import io
 import unittest
diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py
index a59aa9cc8..68b69f8ca 100644
--- a/tests/test_pipeline.py
+++ b/tests/test_pipeline.py
@@ -25,8 +25,7 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-"""Simple unit test for Pipeline.
-"""
+"""Simple unit test for Pipeline."""
 
 import pickle
 import textwrap
diff --git a/tests/test_pipelineTask.py b/tests/test_pipelineTask.py
index 191030bc7..ce348e041 100644
--- a/tests/test_pipelineTask.py
+++ b/tests/test_pipelineTask.py
@@ -25,8 +25,7 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-"""Simple unit test for PipelineTask.
-"""
+"""Simple unit test for PipelineTask."""
 
 import copy
 import pickle
diff --git a/tests/test_quantum_provenance_graph.py b/tests/test_quantum_provenance_graph.py
index d50778125..9bb2ef928 100644
--- a/tests/test_quantum_provenance_graph.py
+++ b/tests/test_quantum_provenance_graph.py
@@ -26,8 +26,7 @@
 # # You should have received a copy of the GNU General Public License
 # # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-"""Simple unit test for quantum_provenance_graph.
-"""
+"""Simple unit test for quantum_provenance_graph."""
 
 import unittest
 

From 60999584a2708bdddc09261101a3e22f86dd3091 Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Wed, 29 Jan 2025 15:32:06 -0700
Subject: [PATCH 07/15] Fix some multi-line strings that could be single line

---
 python/lsst/pipe/base/all_dimensions_quantum_graph_builder.py | 3 +--
 python/lsst/pipe/base/pipeline_graph/_pipeline_graph.py       | 2 +-
 python/lsst/pipe/base/quantum_provenance_graph.py             | 3 +--
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/python/lsst/pipe/base/all_dimensions_quantum_graph_builder.py b/python/lsst/pipe/base/all_dimensions_quantum_graph_builder.py
index 3c61505c3..d334ec328 100644
--- a/python/lsst/pipe/base/all_dimensions_quantum_graph_builder.py
+++ b/python/lsst/pipe/base/all_dimensions_quantum_graph_builder.py
@@ -489,8 +489,7 @@ def from_builder(
             result.query_args["collections"] = builder.input_collections
         else:
             raise QuantumGraphBuilderError(
-                f"Unable to handle type {builder.dataset_query_constraint} "
-                "given as datasetQueryConstraint."
+                f"Unable to handle type {builder.dataset_query_constraint} given as datasetQueryConstraint."
             )
         builder.log.verbose("Querying for data IDs with arguments:")
         builder.log.verbose("  dimensions=%s,", list(result.query_args["dimensions"].names))
diff --git a/python/lsst/pipe/base/pipeline_graph/_pipeline_graph.py b/python/lsst/pipe/base/pipeline_graph/_pipeline_graph.py
index ba754e56e..a94d67ad4 100644
--- a/python/lsst/pipe/base/pipeline_graph/_pipeline_graph.py
+++ b/python/lsst/pipe/base/pipeline_graph/_pipeline_graph.py
@@ -311,7 +311,7 @@ def diff_tasks(self, other: PipelineGraph) -> list[str]:
             b = other.tasks[label]
             if a.task_class != b.task_class:
                 messages.append(
-                    f"Task {label!r} has class {a.task_class_name} in A, " f"but {b.task_class_name} in B."
+                    f"Task {label!r} has class {a.task_class_name} in A, but {b.task_class_name} in B."
                 )
             messages.extend(a.diff_edges(b))
         return messages
diff --git a/python/lsst/pipe/base/quantum_provenance_graph.py b/python/lsst/pipe/base/quantum_provenance_graph.py
index 32c1d131a..42c3e4af8 100644
--- a/python/lsst/pipe/base/quantum_provenance_graph.py
+++ b/python/lsst/pipe/base/quantum_provenance_graph.py
@@ -1249,8 +1249,7 @@ def assemble_quantum_provenance_graph(
         """
         if read_caveats not in ("lazy", "exhaustive", None):
             raise TypeError(
-                f"Invalid option {read_caveats!r} for read_caveats; "
-                "should be 'lazy', 'exhaustive', or None."
+                f"Invalid option {read_caveats!r} for read_caveats; should be 'lazy', 'exhaustive', or None."
             )
         output_runs = []
         for graph in qgraphs:

From 0f1dbf780360906da9388834b0582278609fa69f Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Thu, 30 Jan 2025 12:49:21 -0700
Subject: [PATCH 08/15] Allow TaskMetadata to handle empty lists

---
 python/lsst/pipe/base/_task_metadata.py | 9 ++++++++-
 tests/test_taskmetadata.py              | 4 ++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/python/lsst/pipe/base/_task_metadata.py b/python/lsst/pipe/base/_task_metadata.py
index 594ec1a5b..736df3c2f 100644
--- a/python/lsst/pipe/base/_task_metadata.py
+++ b/python/lsst/pipe/base/_task_metadata.py
@@ -432,7 +432,12 @@ def __getitem__(self, key: str) -> Any:
             if key0 in self.metadata:
                 return self.metadata[key0]
             if key0 in self.arrays:
-                return self.arrays[key0][-1]
+                arr = self.arrays[key0]
+                if not arr:
+                    # If there are no elements then returning a scalar
+                    # is an error.
+                    raise KeyError(f"'{key}' not found")
+                return arr[-1]
             raise KeyError(f"'{key}' not found")
         # Hierarchical lookup so the top key can only be in the metadata
         # property. Trap KeyError and reraise so that the correct key
@@ -613,6 +618,8 @@ def _validate_value(self, value: Any) -> tuple[str, Any]:
             # For model consistency, need to check that every item in the
             # list has the same type.
             value = list(value)
+            if not value:
+                return "array", value
 
             type0 = type(value[0])
             for i in value:
diff --git a/tests/test_taskmetadata.py b/tests/test_taskmetadata.py
index a394f1eec..aaf763bbf 100644
--- a/tests/test_taskmetadata.py
+++ b/tests/test_taskmetadata.py
@@ -91,6 +91,10 @@ def testTaskMetadata(self):
         self.assertEqual(meta.getArray("new_array"), ["a", "b", "c"])
         meta["new_array"] = [1, 2, 3]
         self.assertEqual(meta.getArray("new_array"), [1, 2, 3])
+        meta["empty_array"] = []
+        self.assertEqual(meta.getArray("empty_array"), [])
+        with self.assertRaises(KeyError):
+            meta["empty_array"]
 
         meta["meta"] = 5
         meta["meta"] = TaskMetadata()

From 54b528063d521d9cb19c466ed85e704c5a3a526d Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Thu, 23 Jan 2025 17:27:06 -0700
Subject: [PATCH 09/15] Track inputs retrieved in quantum context

---
 python/lsst/pipe/base/_quantumContext.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/python/lsst/pipe/base/_quantumContext.py b/python/lsst/pipe/base/_quantumContext.py
index ef338c7bf..b2fbcd3ab 100644
--- a/python/lsst/pipe/base/_quantumContext.py
+++ b/python/lsst/pipe/base/_quantumContext.py
@@ -39,7 +39,15 @@
 from typing import Any
 
 import astropy.units as u
-from lsst.daf.butler import DataCoordinate, DatasetRef, DatasetType, DimensionUniverse, LimitedButler, Quantum
+from lsst.daf.butler import (
+    DataCoordinate,
+    DatasetProvenance,
+    DatasetRef,
+    DatasetType,
+    DimensionUniverse,
+    LimitedButler,
+    Quantum,
+)
 from lsst.utils.introspection import get_full_type_name
 from lsst.utils.logging import PeriodicLogger, getLogger
 
@@ -215,17 +223,20 @@ def __init__(
                 self.allOutputs.add((ref.datasetType, ref.dataId))
         self.outputsPut: set[tuple[DatasetType, DataCoordinate]] = set()
         self.__butler = butler
+        self.dataset_provenance = DatasetProvenance()
 
     def _get(self, ref: DeferredDatasetRef | DatasetRef | None) -> Any:
         # Butler methods below will check for unresolved DatasetRefs and
         # raise appropriately, so no need for us to do that here.
         if isinstance(ref, DeferredDatasetRef):
             self._checkMembership(ref.datasetRef, self.allInputs)
+            self.dataset_provenance.add_input(ref.datasetRef)
             return self.__butler.getDeferred(ref.datasetRef)
         elif ref is None:
             return None
         else:
             self._checkMembership(ref, self.allInputs)
+            self.dataset_provenance.add_input(ref)
             return self.__butler.get(ref)
 
     def _put(self, value: Any, ref: DatasetRef) -> None:

From d27f022ad1abd01c44dd09131dfba0f35c77aa7e Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Fri, 24 Jan 2025 13:02:55 -0700
Subject: [PATCH 10/15] Pass provenance to QuantumContext put

---
 python/lsst/pipe/base/_quantumContext.py |  2 +-
 tests/test_pipelineTask.py               | 17 +++++++++++++++--
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/python/lsst/pipe/base/_quantumContext.py b/python/lsst/pipe/base/_quantumContext.py
index b2fbcd3ab..f71f1d0a9 100644
--- a/python/lsst/pipe/base/_quantumContext.py
+++ b/python/lsst/pipe/base/_quantumContext.py
@@ -242,7 +242,7 @@ def _get(self, ref: DeferredDatasetRef | DatasetRef | None) -> Any:
     def _put(self, value: Any, ref: DatasetRef) -> None:
         """Store data in butler."""
         self._checkMembership(ref, self.allOutputs)
-        self.__butler.put(value, ref)
+        self.__butler.put(value, ref, provenance=self.dataset_provenance)
         self.outputsPut.add((ref.datasetType, ref.dataId))
 
     def get(
diff --git a/tests/test_pipelineTask.py b/tests/test_pipelineTask.py
index ce348e041..a333e4541 100644
--- a/tests/test_pipelineTask.py
+++ b/tests/test_pipelineTask.py
@@ -37,7 +37,14 @@
 import lsst.pipe.base as pipeBase
 import lsst.utils.logging
 import lsst.utils.tests
-from lsst.daf.butler import DataCoordinate, DatasetRef, DatasetType, DimensionUniverse, Quantum
+from lsst.daf.butler import (
+    DataCoordinate,
+    DatasetProvenance,
+    DatasetRef,
+    DatasetType,
+    DimensionUniverse,
+    Quantum,
+)
 
 
 class ButlerMock:
@@ -53,7 +60,13 @@ def get(self, ref: DatasetRef) -> Any:
             return dsdata.get(ref.dataId)
         return None
 
-    def put(self, inMemoryDataset: Any, dsRef: DatasetRef, producer: Any = None):
+    def put(
+        self,
+        inMemoryDataset: Any,
+        dsRef: DatasetRef,
+        provenance: DatasetProvenance | None = None,
+        producer: Any = None,
+    ):
         key = dsRef.dataId
         name = dsRef.datasetType.name
         dsdata = self.datasets.setdefault(name, {})

From f255e67ee99c4c2ab35332e0487cf38571eb5679 Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Wed, 29 Jan 2025 15:03:04 -0700
Subject: [PATCH 11/15] Add quantum_id to QuantumContext constructor

---
 python/lsst/pipe/base/_quantumContext.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/python/lsst/pipe/base/_quantumContext.py b/python/lsst/pipe/base/_quantumContext.py
index f71f1d0a9..4b2a897b9 100644
--- a/python/lsst/pipe/base/_quantumContext.py
+++ b/python/lsst/pipe/base/_quantumContext.py
@@ -34,6 +34,7 @@
 __all__ = ("ExecutionResources", "QuantumContext")
 
 import numbers
+import uuid
 from collections.abc import Callable, Sequence
 from dataclasses import dataclass
 from typing import Any
@@ -182,6 +183,8 @@ class QuantumContext:
         single execution of this node in the pipeline graph.
     resources : `ExecutionResources`, optional
         The resources allocated for executing quanta.
+    quantum_id : `uuid.UUID` or `None`, optional
+        The ID of the quantum being executed. Used for provenance.
 
     Notes
     -----
@@ -199,7 +202,12 @@ class QuantumContext:
     resources: ExecutionResources
 
     def __init__(
-        self, butler: LimitedButler, quantum: Quantum, *, resources: ExecutionResources | None = None
+        self,
+        butler: LimitedButler,
+        quantum: Quantum,
+        *,
+        resources: ExecutionResources | None = None,
+        quantum_id: uuid.UUID | None = None,
     ):
         self.quantum = quantum
         if resources is None:
@@ -223,7 +231,7 @@ def __init__(
                 self.allOutputs.add((ref.datasetType, ref.dataId))
         self.outputsPut: set[tuple[DatasetType, DataCoordinate]] = set()
         self.__butler = butler
-        self.dataset_provenance = DatasetProvenance()
+        self.dataset_provenance = DatasetProvenance(quantum_id=quantum_id)
 
     def _get(self, ref: DeferredDatasetRef | DatasetRef | None) -> Any:
         # Butler methods below will check for unresolved DatasetRefs and

From 3e28d0b452a76482bff40b38cfa083160c19c1dc Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Wed, 29 Jan 2025 15:05:59 -0700
Subject: [PATCH 12/15] Add provenance to caching limited butler put

---
 python/lsst/pipe/base/caching_limited_butler.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/python/lsst/pipe/base/caching_limited_butler.py b/python/lsst/pipe/base/caching_limited_butler.py
index f1d60aa75..3251d17c1 100644
--- a/python/lsst/pipe/base/caching_limited_butler.py
+++ b/python/lsst/pipe/base/caching_limited_butler.py
@@ -35,6 +35,7 @@
 
 from lsst.daf.butler import (
     DatasetId,
+    DatasetProvenance,
     DatasetRef,
     DeferredDatasetHandle,
     DimensionUniverse,
@@ -155,7 +156,7 @@ def stored_many(self, refs: Iterable[DatasetRef]) -> dict[DatasetRef, bool]:
     def isWriteable(self) -> bool:
         return self._wrapped.isWriteable()
 
-    def put(self, obj: Any, ref: DatasetRef) -> DatasetRef:
+    def put(self, obj: Any, ref: DatasetRef, /, *, provenance: DatasetProvenance | None = None) -> DatasetRef:
         if ref.datasetType.name in self._cache_on_put:
             self._cache[ref.datasetType.name] = (
                 ref.id,
@@ -167,7 +168,7 @@ def put(self, obj: Any, ref: DatasetRef) -> DatasetRef:
                 ),
             )
             _LOG.debug("Cached dataset %s on put", ref)
-        return self._wrapped.put(obj, ref)
+        return self._wrapped.put(obj, ref, provenance=provenance)
 
     def pruneDatasets(
         self,

From e0b8a7a0bced975013f2ce2b339eadbe481f96f9 Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Wed, 29 Jan 2025 16:10:41 -0700
Subject: [PATCH 13/15] Add method to allow runQuantum to register extra
 provenance

---
 python/lsst/pipe/base/_quantumContext.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/python/lsst/pipe/base/_quantumContext.py b/python/lsst/pipe/base/_quantumContext.py
index 4b2a897b9..604cf4a09 100644
--- a/python/lsst/pipe/base/_quantumContext.py
+++ b/python/lsst/pipe/base/_quantumContext.py
@@ -453,3 +453,17 @@ def dimensions(self) -> DimensionUniverse:
         repository (`~lsst.daf.butler.DimensionUniverse`).
         """
         return self.__butler.dimensions
+
+    def add_additional_provenance(self, ref: DatasetRef, extra: dict[str, int | float | str | bool]) -> None:
+        """Add additional provenance information to the dataset provenance.
+
+        Parameters
+        ----------
+        ref : `DatasetRef`
+            The dataset to attach provenance to. This dataset must have been
+            retrieved by this quantum context.
+        extra : `dict` [ `str`, `int` | `float` | `str` | `bool` ]
+            Additional information to attach as provenance information. Keys
+            must be strings and values must be simple scalars.
+        """
+        self.dataset_provenance.add_extra_provenance(ref.id, extra)

From e135b994f1549821bbc2366d77322f2afbcd443c Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Thu, 30 Jan 2025 11:47:26 -0700
Subject: [PATCH 14/15] Add the UUID to the outputs structure

Now that the UUID is always known it becomes useful to track
this information for provenance purposes.
---
 python/lsst/pipe/base/_quantumContext.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/python/lsst/pipe/base/_quantumContext.py b/python/lsst/pipe/base/_quantumContext.py
index 604cf4a09..3d5944ac7 100644
--- a/python/lsst/pipe/base/_quantumContext.py
+++ b/python/lsst/pipe/base/_quantumContext.py
@@ -218,7 +218,7 @@ def __init__(
         self.allOutputs = set()
         for refs in quantum.inputs.values():
             for ref in refs:
-                self.allInputs.add((ref.datasetType, ref.dataId))
+                self.allInputs.add((ref.datasetType, ref.dataId, ref.id))
         for dataset_type, refs in quantum.outputs.items():
             if dataset_type.name.endswith(METADATA_OUTPUT_CONNECTION_NAME) or dataset_type.name.endswith(
                 LOG_OUTPUT_CONNECTION_NAME
@@ -228,8 +228,8 @@ def __init__(
                 # write them itself; that's for the execution system to do.
                 continue
             for ref in refs:
-                self.allOutputs.add((ref.datasetType, ref.dataId))
-        self.outputsPut: set[tuple[DatasetType, DataCoordinate]] = set()
+                self.allOutputs.add((ref.datasetType, ref.dataId, ref.id))
+        self.outputsPut: set[tuple[DatasetType, DataCoordinate, uuid.UUID]] = set()
         self.__butler = butler
         self.dataset_provenance = DatasetProvenance(quantum_id=quantum_id)
 
@@ -251,7 +251,7 @@ def _put(self, value: Any, ref: DatasetRef) -> None:
         """Store data in butler."""
         self._checkMembership(ref, self.allOutputs)
         self.__butler.put(value, ref, provenance=self.dataset_provenance)
-        self.outputsPut.add((ref.datasetType, ref.dataId))
+        self.outputsPut.add((ref.datasetType, ref.dataId, ref.id))
 
     def get(
         self,
@@ -444,7 +444,7 @@ def _checkMembership(self, ref: list[DatasetRef] | DatasetRef, inout: set) -> No
         if not isinstance(ref, list | tuple):
             ref = [ref]
         for r in ref:
-            if (r.datasetType, r.dataId) not in inout:
+            if (r.datasetType, r.dataId, r.id) not in inout:
                 raise ValueError("DatasetRef is not part of the Quantum being processed")
 
     @property

From dd217c8faba33c1a813d931888388019df40a041 Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Thu, 30 Jan 2025 13:13:57 -0700
Subject: [PATCH 15/15] Add news fragment

---
 doc/changes/DM-35396.feature.rst | 3 +++
 doc/changes/DM-35396.misc.rst    | 2 ++
 2 files changed, 5 insertions(+)
 create mode 100644 doc/changes/DM-35396.feature.rst
 create mode 100644 doc/changes/DM-35396.misc.rst

diff --git a/doc/changes/DM-35396.feature.rst b/doc/changes/DM-35396.feature.rst
new file mode 100644
index 000000000..8bde34909
--- /dev/null
+++ b/doc/changes/DM-35396.feature.rst
@@ -0,0 +1,3 @@
+* Modified ``QuantumContext`` such that it now tracks all datasets that are retrieved and records them in ``dataset_provenance``.
+  This provenance is then passed to Butler on ``put()``.
+* Added ``QuantumContext.add_additional_provenance()`` to allow a pipeline task author to attach additional provenance information to be recorded and associated with a particular input dataset.
diff --git a/doc/changes/DM-35396.misc.rst b/doc/changes/DM-35396.misc.rst
new file mode 100644
index 000000000..ddda73026
--- /dev/null
+++ b/doc/changes/DM-35396.misc.rst
@@ -0,0 +1,2 @@
+Modified ``TaskMetadata`` such that it can now be assigned an empty list.
+This list can be retrieved with ``getArray`` but if an attempt is made to get a scalar `KeyError` will be raised.