Add flag for what to do with failed log datasets

lsst · Jul 31, 2024 · f93595f · f93595f
1 parent aa36967
commit f93595f
Showing 1 changed file with 25 additions and 6 deletions.
diff --git a/python/lsst/pipe/base/quantum_provenance_graph.py b/python/lsst/pipe/base/quantum_provenance_graph.py
@@ -750,7 +750,11 @@ def add_new_graph(self, butler: Butler, qgraph: QuantumGraph | ResourcePathExpre
             quantum_info["status"] = new_status
 
     def resolve_duplicates(
-        self, butler: Butler, collections: Sequence[str] | None = None, where: str = ""
+        self,
+        butler: Butler,
+        collections: Sequence[str] | None = None,
+        where: str = "",
+        curse_failed_logs: bool = False,
     ) -> None:
         """After quantum graphs associated with each run have been added
         to the `QuantumProvenanceGraph, resolve any discrepancies between
@@ -777,6 +781,14 @@ def resolve_duplicates(
 
         where : `str`
             A "where" string to use to constrain the collections, if passed.
+
+        curse_failed_logs : `bool`
+            Mark log datasets as `cursed` if they are published in the final
+            output collection. Note that a campaign-level collection must be
+            used here for `collections` if `curse_failed_logs` is `True`; if
+            `resolve_duplicates` is run on a list of group-level collections
+            then each will show logs from their own failures as published
+            the datasets will show as cursed regardless of this flag.
         """
         # First thing: raise an error if resolve_duplicates has been run
         # before on this qpg.
@@ -839,11 +851,18 @@ def resolve_duplicates(
                         # a published dataset, that dataset is cursed. Set the
                         # status for the dataset to cursed and note the reason
                         # for labeling the dataset as cursed.
-                        case (_, "published") if not dataset_type_name.endswith("_log"):
-                            dataset_info["status"] = "cursed"
-                            dataset_info["messages"].append(
-                                "Published dataset is from an unsuccessful quantum."
-                            )
+                        case (_, "published"):
+                            # Avoiding publishing failed logs is difficult
+                            # without using tagged collections, so flag them as
+                            # merely unsuccessful unless the user requests it.
+                            if dataset_type_name.endswith("_log") and not curse_failed_logs:
+                                dataset_info["status"] = "unsuccessful"
+                            else:
+                                dataset_info["status"] = "cursed"
+                                dataset_info["messages"].append(
+                                    f"Unsuccessful dataset {dataset_type_name} published in "
+                                    "final output collection."
+                                )
                         # any other produced dataset (produced but not
                         # published and not successful) is a regular
                         # failure.