feat: Add Pydantic validation diff table to dandiset summary

dandi · Jan 14, 2025 · d1a7a76 · d1a7a76
1 parent ccc2140
commit d1a7a76
Show file tree

Hide file tree

Showing 2 changed files with 46 additions and 0 deletions.
diff --git a/src/dandisets_linkml_status_tools/cmd_funcs/diff_manifests_reports.py b/src/dandisets_linkml_status_tools/cmd_funcs/diff_manifests_reports.py
@@ -36,9 +36,11 @@
     gen_pydantic_validation_errs_cell,
     gen_row,
     validation_err_count_table,
+    validation_err_diff_table,
 )
 from dandisets_linkml_status_tools.tools.validation_err_counter import (
     ValidationErrCounter,
+    validation_err_diff,
 )
 
 logger = logging.getLogger(__name__)
@@ -365,6 +367,10 @@ def err_categorizer(err: tuple) -> tuple[str, str, tuple[str, ...]]:
     pydantic_validation_errs1_ctr.count(err1_reps)
     pydantic_validation_errs2_ctr.count(err2_reps)
 
+    pydantic_validation_err_diff = validation_err_diff(
+        pydantic_validation_errs1_ctr, pydantic_validation_errs2_ctr
+    )
+
     with (output_dir / summary_file_name).open("w") as summary_f:
         # === Output counts of different categories of Pydantic validation errors for
         # validations done with separate schemas ===
@@ -379,6 +385,13 @@ def err_categorizer(err: tuple) -> tuple[str, str, tuple[str, ...]]:
             validation_err_count_table(pydantic_validation_errs2_ctr.counts_by_cat)
         )
 
+        # Output a table of the differences in the different categories of
+        # Pydantic validation errors between the two sets of validation results where
+        # each set is represented, and counted, by a `ValidationErrCounter` object
+        summary_f.write("\n")
+        summary_f.write("### Pydantic errs diff\n\n")
+        summary_f.write(validation_err_diff_table(pydantic_validation_err_diff))
+
         # Write the header and alignment rows of the summary table
         summary_f.write("\n")
         summary_f.write(gen_header_and_alignment_rows(summary_headers))

diff --git a/src/dandisets_linkml_status_tools/tools/md.py b/src/dandisets_linkml_status_tools/tools/md.py
@@ -1,5 +1,6 @@
 # This file contains helpers for generating Markdown files
 
+from collections import Counter
 from collections.abc import Iterable
 from pathlib import Path
 
@@ -115,3 +116,35 @@ def escape(text: str) -> str:
         escaped_substrs.append(escaped)
 
     return "".join(escaped_substrs)
+
+
+def validation_err_diff_table(
+    diff: dict[tuple, tuple[Counter[tuple], Counter[tuple]]]
+) -> str:
+    """
+    Generate a table displaying the differences in two sets of validation errors by
+    categories
+
+    :param diff: The differences represented in a dictionary where the keys are tuples
+        representing the categories and the values are tuples consisting of a `Counter`
+        representing the validation errors removed and a `Counter` representing the
+        validation errors gained from the first set of validation errors to the second
+        set of validation errors in the corresponding categories
+    :return: The string presenting the table in Markdown format
+    """
+    return (
+        # The header row and the alignment row
+        gen_header_and_alignment_rows(["Error category", "Removed", "Gained"])
+        +
+        # The content rows
+        "".join(
+            gen_row(
+                (
+                    escape(str(cat)),
+                    removed.total(),
+                    gained.total(),
+                )
+            )
+            for cat, (removed, gained) in sorted(diff.items())
+        )
+    )