From 7e142bb0b9c37ba084a9cefa151be1254fa41cf7 Mon Sep 17 00:00:00 2001
From: Orion Eiger <eiger@slac.stanford.edu>
Date: Thu, 23 May 2024 11:38:54 -0700
Subject: [PATCH] Add flag for what to do with failed log datasets

---
 doc/changes/DM-41711.feature.md               |  8 +++
 .../pipe/base/quantum_provenance_graph.py     | 52 ++++++++++++++-----
 2 files changed, 46 insertions(+), 14 deletions(-)

diff --git a/doc/changes/DM-41711.feature.md b/doc/changes/DM-41711.feature.md
index 91dd2309..10c3ef1f 100644
--- a/doc/changes/DM-41711.feature.md
+++ b/doc/changes/DM-41711.feature.md
@@ -1,3 +1,11 @@
 Create a QuantumProvenanceGraph, which details the status of every quantum
 and dataset over multiple attempts at executing graphs, noting when quanta
 have been recovered.
+
+Step through all the quantum graphs associated with certain tasks or
+processing steps. For each graph/attempt, the status of each quantum and
+dataset is recorded in `QuantumProvenanceGraph.add_new_graph` and outcomes
+of quanta over multiple runs are resolved in
+`QuantumProvenanceGraph.resolve_duplicates`. At the end of this process,
+we can combine all attempts into a summary. This serves to answer the
+question "What happened to this data ID?" in a wholistic sense.
diff --git a/python/lsst/pipe/base/quantum_provenance_graph.py b/python/lsst/pipe/base/quantum_provenance_graph.py
index 242df310..2b08416a 100644
--- a/python/lsst/pipe/base/quantum_provenance_graph.py
+++ b/python/lsst/pipe/base/quantum_provenance_graph.py
@@ -97,6 +97,9 @@ class DatasetKey(NamedTuple):
     """
 
     is_prerequisite: ClassVar[Literal[False]] = False
+    """Whether this node is a prerequisite to another node (also always
+    `False`).
+    """
 
 
 class PrerequisiteDatasetKey(NamedTuple):
@@ -122,6 +125,8 @@ class PrerequisiteDatasetKey(NamedTuple):
     """
 
     is_prerequisite: ClassVar[Literal[True]] = True
+    """Whether this node is a prerequisite to another node (always `True`).
+    """
 
 
 QuantumRunStatus: TypeAlias = Literal["failed", "successful", "logs_missing", "blocked", "metadata_missing"]
@@ -279,13 +284,13 @@ class TaskSummary(pydantic.BaseModel):
     """The number of quanta expected by the graph.
     """
 
-    @pydantic.computed_field  # type: ignore[misc]
+    @pydantic.computed_field  # type: ignore[prop-decorator]
     @property
     def n_wonky(self) -> int:
         """Return a count of `wonky` quanta."""
         return len(self.wonky_quanta)
 
-    @pydantic.computed_field  # type: ignore[misc]
+    @pydantic.computed_field  # type: ignore[prop-decorator]
     @property
     def n_failed(self) -> int:
         """Return a count of `failed` quanta."""
@@ -417,8 +422,8 @@ class DatasetTypeSummary(pydantic.BaseModel):
     """
 
     n_published: int = 0
-    """A count of the datasets of this type which were published in the final
-    collection.
+    """A count of the datasets of this type which were published in the
+    finalized collection(s).
     """
     n_unpublished: int = 0
     """A count of the datasets of this type which were produced but not
@@ -435,13 +440,13 @@ class DatasetTypeSummary(pydantic.BaseModel):
     """The number of datasets of this type expected by the graph.
     """
 
-    @pydantic.computed_field  # type: ignore[misc]
+    @pydantic.computed_field  # type: ignore[prop-decorator]
     @property
     def n_cursed(self) -> int:
         """Return a count of cursed datasets."""
         return len(self.cursed_datasets)
 
-    @pydantic.computed_field  # type: ignore[misc]
+    @pydantic.computed_field  # type: ignore[prop-decorator]
     @property
     def n_unsuccessful(self) -> int:
         """Return a count of unsuccessful datasets."""
@@ -528,7 +533,7 @@ def __init__(self) -> None:
         # name.
         self._datasets: dict[str, set[DatasetKey]] = {}
         # Bool representing whether the graph has been finalized. This is set
-        # to True when resolve_duplicates is
+        # to True when resolve_duplicates completes.
         self._finalized: bool = False
 
     def get_quantum_info(self, key: QuantumKey) -> QuantumInfo:
@@ -571,7 +576,7 @@ def add_new_graph(self, butler: Butler, qgraph: QuantumGraph | ResourcePathExpre
         to assign to the overall `QuantumInfo`. For example, if a
         previous run associated with a quantum had the status `failed`,
         and the status from the new graph reads `successful`, we can
-        mark the overall quantum status as `successful` and list the id
+        mark the overall quantum status as `successful` and list the data_id
         as `recovered`.
 
         Parameters
@@ -750,7 +755,11 @@ def add_new_graph(self, butler: Butler, qgraph: QuantumGraph | ResourcePathExpre
             quantum_info["status"] = new_status
 
     def resolve_duplicates(
-        self, butler: Butler, collections: Sequence[str] | None = None, where: str = ""
+        self,
+        butler: Butler,
+        collections: Sequence[str] | None = None,
+        where: str = "",
+        curse_failed_logs: bool = False,
     ) -> None:
         """After quantum graphs associated with each run have been added
         to the `QuantumProvenanceGraph, resolve any discrepancies between
@@ -777,6 +786,14 @@ def resolve_duplicates(
 
         where : `str`
             A "where" string to use to constrain the collections, if passed.
+
+        curse_failed_logs : `bool`
+            Mark log datasets as `cursed` if they are published in the final
+            output collection. Note that a campaign-level collection must be
+            used here for `collections` if `curse_failed_logs` is `True`; if
+            `resolve_duplicates` is run on a list of group-level collections
+            then each will show logs from their own failures as published
+            the datasets will show as cursed regardless of this flag.
         """
         # First thing: raise an error if resolve_duplicates has been run
         # before on this qpg.
@@ -839,11 +856,18 @@ def resolve_duplicates(
                         # a published dataset, that dataset is cursed. Set the
                         # status for the dataset to cursed and note the reason
                         # for labeling the dataset as cursed.
-                        case (_, "published") if not dataset_type_name.endswith("_log"):
-                            dataset_info["status"] = "cursed"
-                            dataset_info["messages"].append(
-                                "Published dataset is from an unsuccessful quantum."
-                            )
+                        case (_, "published"):
+                            # Avoiding publishing failed logs is difficult
+                            # without using tagged collections, so flag them as
+                            # merely unsuccessful unless the user requests it.
+                            if dataset_type_name.endswith("_log") and not curse_failed_logs:
+                                dataset_info["status"] = "unsuccessful"
+                            else:
+                                dataset_info["status"] = "cursed"
+                                dataset_info["messages"].append(
+                                    f"Unsuccessful dataset {dataset_type_name} published in "
+                                    "final output collection."
+                                )
                         # any other produced dataset (produced but not
                         # published and not successful) is a regular
                         # failure.