Merge pull request #277 from lsst/tickets/DM-41606

DM-41606: Option to output pipetask report info to command line
lsst · Mar 14, 2024 · a2f6597 · a2f6597
2 parents 1ce94f1 + 39d2baa
commit a2f6597
Show file tree

Hide file tree

Showing 5 changed files with 103 additions and 26 deletions.
diff --git a/doc/changes/DM-41606.feature.md b/doc/changes/DM-41606.feature.md
@@ -0,0 +1,7 @@
+Make option to output `pipetask report` information to the command-line using
+astropy tables and set to default.
+Unpack a more human-readable dictionary from
+`lsst.pipe.base.QuantumGraphExecutionReports.to_summary_dict` and print summary
+tables of quanta and datasets to the command-line. Save error messages and
+associated data ids to a yaml file in the working directory, or optionally print
+them to screen as well.
diff --git a/python/lsst/ctrl/mpexec/cli/cmd/commands.py b/python/lsst/ctrl/mpexec/cli/cmd/commands.py
@@ -329,16 +329,17 @@ def update_graph_run(
 @click.command(cls=PipetaskCommand)
 @repo_argument()
 @ctrlMpExecOpts.qgraph_argument()
-@click.argument("output_yaml", type=click.Path(exists=False))
+@click.option("--full-output-filename", default="", help="Summarize report in a yaml file")
 @click.option("--logs/--no-logs", default=True, help="Get butler log datasets for extra information.")
-def report(repo: str, qgraph: str, output_yaml: str, logs: bool = True) -> None:
+@click.option("--show-errors", default=False, help="Pretty-print a dict of errors from failed quanta.")
+def report(
+    repo: str, qgraph: str, full_output_filename: str = "", logs: bool = True, show_errors: bool = False
+) -> None:
     """Write a yaml file summarizing the produced and missing expected datasets
     in a quantum graph.
 
     REPO is the location of the butler/registry config file.
 
     QGRAPH is the URL to a serialized Quantum Graph file.
-
-    OUTPUT_YAML is the URL to store the summary report.
     """
-    script.report(repo, qgraph, output_yaml, logs)
+    script.report(repo, qgraph, full_output_filename, logs)
diff --git a/python/lsst/ctrl/mpexec/cli/script/report.py b/python/lsst/ctrl/mpexec/cli/script/report.py
@@ -24,34 +24,86 @@
 #
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
+import pprint
 
+import yaml
+from astropy.table import Table
 from lsst.daf.butler import Butler
 from lsst.pipe.base import QuantumGraph
 from lsst.pipe.base.execution_reports import QuantumGraphExecutionReport
 
 
-def report(butler_config: str, qgraph_uri: str, output_yaml: str, logs: bool = True) -> None:
-    """Write a yaml file summarizing the produced and missing expected datasets
-    in a quantum graph.
+def report(
+    butler_config: str,
+    qgraph_uri: str,
+    full_output_filename: str | None,
+    logs: bool = True,
+    show_errors: bool = False,
+) -> None:
+    """Summarize the produced and missing expected dataset in a quantum graph.
 
     Parameters
     ----------
-    butler_config : `str`
-        The Butler used for this report. This should match the Butler used
-        for the run associated with the executed quantum graph.
-    qgraph_uri : `str`
-        The uri of the location of said quantum graph.
-    output_yaml : `str`
-        The name to be used for the summary yaml file.
-    logs : `bool`
-        Get butler log datasets for extra information.
-
-    See Also
-    --------
-    lsst.pipe.base.QuantumGraphExecutionReport.make_reports : Making reports.
-    lsst.pipe.base.QuantumGraphExecutionReport.write_summary_yaml : Summaries.
+        butler_config : `str`
+            The Butler used for this report. This should match the Butler used
+            for the run associated with the executed quantum graph.
+        qgraph_uri : `str`
+            The uri of the location of said quantum graph.
+        full_output_filename : `str`
+            Output the full summary report to a yaml file (named herein).
+            Each data id and error message is keyed to a quantum graph node id.
+            A convenient output format for error-matching and cataloguing tools
+            such as the ones in the Campaign Management database. If this is
+            not included, quanta and dataset information will be printed to the
+            command-line instead.
+        logs : `bool`
+            Get butler log datasets for extra information (error messages).
+        show_errors : `bool`
+            If no output yaml is provided, print error messages to the
+            command-line along with the report. By default, these messages and
+            their associated data ids are stored in a yaml file with format
+            `{run timestamp}_err.yaml` in the working directory instead.
     """
     butler = Butler.from_config(butler_config, writeable=False)
     qgraph = QuantumGraph.loadUri(qgraph_uri)
     report = QuantumGraphExecutionReport.make_reports(butler, qgraph)
-    report.write_summary_yaml(butler, output_yaml, do_store_logs=logs)
+    if not full_output_filename:
+        # this is the option to print to the command-line
+        summary_dict = report.to_summary_dict(butler, logs, human_readable=True)
+        dataset_table_rows = []
+        data_products = []
+        quanta_summary = []
+        error_summary = []
+        for task in summary_dict.keys():
+            for data_product in summary_dict[task]["outputs"]:
+                dataset_table_rows.append(summary_dict[task]["outputs"][data_product])
+                data_products.append(data_product)
+
+            quanta_summary.append(
+                {
+                    "Task": task,
+                    "Failed Quanta": summary_dict[task]["failed_quanta"],
+                    "Blocked Quanta": summary_dict[task]["n_quanta_blocked"],
+                }
+            )
+
+            if "errors" in summary_dict[task].keys():
+                error_summary.append({task: summary_dict[task]["errors"]})
+        quanta = Table(quanta_summary)
+        datasets = Table(dataset_table_rows)
+        datasets.add_column(data_products, index=0, name="DatasetType")
+        quanta.pprint_all()
+        print("\n")
+        if show_errors:
+            pprint.pprint(error_summary)
+            print("\n")
+        else:
+            assert qgraph.metadata is not None, "Saved QGs always have metadata."
+            collection = qgraph.metadata["output_run"]
+            collection = str(collection)
+            run_name = collection.split("/")[-1]
+            with open(f"{run_name}_err.yaml", "w") as stream:
+                yaml.safe_dump(error_summary, stream)
+        datasets.pprint_all()
+    else:
+        report.write_summary_yaml(butler, full_output_filename, do_store_logs=logs)
diff --git a/tests/test_cliCmdReport.py b/tests/test_cliCmdReport.py
@@ -53,7 +53,7 @@ def tearDown(self) -> None:
 
     def test_report(self):
         """Test for making a report on the produced and missing expected
-        datasets in a quantum graph. in a graph.
+        datasets in a quantum graph.
         """
         metadata = {"output_run": "run"}
         butler, qgraph = makeSimpleQGraph(
@@ -71,10 +71,9 @@ def test_report(self):
 
         result = self.runner.invoke(
             pipetask_cli,
-            ["report", self.root, graph_uri, test_filename, "--no-logs"],
+            ["report", self.root, graph_uri, "--full-output-filename", test_filename, "--no-logs"],
             input="no",
         )
-
         # Check that we can read from the command line
         self.assertEqual(result.exit_code, 0, clickResultMsg(result))
 
@@ -84,6 +83,23 @@ def test_report(self):
         self.assertIsNotNone(report_output_dict["task0"])
         self.assertIsNotNone(report_output_dict["task0"]["failed_quanta"])
 
+        result_hr = self.runner.invoke(
+            pipetask_cli,
+            ["report", self.root, graph_uri, "--no-logs"],
+            input="no",
+        )
+
+        # Check that we can read from the command line
+        self.assertEqual(result_hr.exit_code, 0, clickResultMsg(result_hr))
+
+        # Check that we get string output
+        self.assertIsInstance(result_hr.stdout, str)
+
+        # Check that task0 and the failed quanta for task0 exist in the string
+        self.assertIn("task0", result_hr.stdout)
+        self.assertIn("Failed Quanta", result_hr.stdout)
+        self.assertIn("{'data_id': {'instrument': 'INSTR', 'detector': 0}}", result_hr.stdout)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/types.txt b/types.txt
@@ -0,0 +1 @@
+types-PyYAML