Merge pull request #52 from candleindark/enh

Add detailed validation error diff tables to dandiset validation diff summary
dandi · Jan 15, 2025 · 15452f4 · 15452f4
2 parents f20ea44 + 253f754
commit 15452f4
Show file tree

Hide file tree

Showing 2 changed files with 127 additions and 10 deletions.
diff --git a/src/dandisets_linkml_status_tools/cmd_funcs/diff_manifests_reports.py b/src/dandisets_linkml_status_tools/cmd_funcs/diff_manifests_reports.py
@@ -35,7 +35,9 @@
     gen_diff_cell,
     gen_pydantic_validation_errs_cell,
     gen_row,
+    pydantic_validation_err_diff_detailed_table,
     validation_err_count_table,
+    validation_err_diff_detailed_tables,
     validation_err_diff_table,
 )
 from dandisets_linkml_status_tools.tools.validation_err_counter import (
@@ -320,26 +322,30 @@ def _output_dandiset_validation_diff_reports(
     logger.info("Creating dandiset validation diff report directory %s", output_dir)
     output_dir.mkdir(parents=True)
 
-    err1_rep_iters: list[Iterable[tuple[str, str, tuple[str | int], Path]]] = []
-    err2_rep_iters: list[Iterable[tuple[str, str, tuple[str | int], Path]]] = []
+    err1_rep_lsts: list[list[tuple[str, str, tuple[str | int], Path]]] = []
+    err2_rep_lsts: list[list[tuple[str, str, tuple[str | int], Path]]] = []
     for r in reports:
         p = Path(r.dandiset_identifier, r.dandiset_version)
 
         # Tuple representation of the Pydantic validation errors
-        err1_rep_iters.append(
-            (e["type"], e["msg"], tuple(e["loc"]), p)
-            for e in r.pydantic_validation_errs1
+        err1_rep_lsts.append(
+            [
+                (e["type"], e["msg"], tuple(e["loc"]), p)
+                for e in r.pydantic_validation_errs1
+            ]
         )
-        err2_rep_iters.append(
-            (e["type"], e["msg"], tuple(e["loc"]), p)
-            for e in r.pydantic_validation_errs2
+        err2_rep_lsts.append(
+            [
+                (e["type"], e["msg"], tuple(e["loc"]), p)
+                for e in r.pydantic_validation_errs2
+            ]
         )
 
     err1_reps: Iterable[tuple[str, str, tuple[str | int, ...], Path]] = (
-        chain.from_iterable(err1_rep_iters)
+        chain.from_iterable(err1_rep_lsts)
     )
     err2_reps: Iterable[tuple[str, str, tuple[str | int, ...], Path]] = (
-        chain.from_iterable(err2_rep_iters)
+        chain.from_iterable(err2_rep_lsts)
     )
 
     def err_categorizer(err: tuple) -> tuple[str, str, tuple[str, ...]]:
@@ -392,6 +398,18 @@ def err_categorizer(err: tuple) -> tuple[str, str, tuple[str, ...]]:
         summary_f.write("### Pydantic errs diff\n\n")
         summary_f.write(validation_err_diff_table(pydantic_validation_err_diff))
 
+        # Write a sequence of tables detailing the differences in Pydantic validation
+        # errors between the two sets of validation results
+        summary_f.write("\n")
+        summary_f.write("## Pydantic errs diff detailed tables\n\n")
+        # noinspection PyTypeChecker
+        summary_f.write(
+            validation_err_diff_detailed_tables(
+                pydantic_validation_err_diff,
+                pydantic_validation_err_diff_detailed_table,
+            )
+        )
+
         # Write the header and alignment rows of the summary table
         summary_f.write("\n")
         summary_f.write(gen_header_and_alignment_rows(summary_headers))

diff --git a/src/dandisets_linkml_status_tools/tools/md.py b/src/dandisets_linkml_status_tools/tools/md.py
@@ -3,6 +3,7 @@
 from collections import Counter
 from collections.abc import Iterable
 from pathlib import Path
+from typing import Protocol
 
 from dandisets_linkml_status_tools.models import PydanticValidationErrsType
 from dandisets_linkml_status_tools.tools.typing import Stringable
@@ -148,3 +149,101 @@ def validation_err_diff_table(
             for cat, (removed, gained) in sorted(diff.items())
         )
     )
+
+
+class TableGenerator(Protocol):
+    """
+    Protocol of generators of tables for a specific category of validation errors
+    """
+
+    def __call__(
+        self, cat: tuple, diff: Counter[tuple], *, is_removed: bool
+    ) -> str: ...
+
+
+def validation_err_diff_detailed_tables(
+    diff: dict[tuple, tuple[Counter[tuple], Counter[tuple]]],
+    table_gen_func: TableGenerator,
+) -> str:
+    """
+    Generate a sequence of tables detailing the differences in two sets of validation
+    errors by categories. Each table details the validation errors removed or gained in
+    a specific category
+
+    :param diff: This parameter is the same as the `diff` parameter in the
+        `validation_err_diff_table` function
+    :param table_gen_func: The function that generates a table detailing the differences
+        in a specific category of validation errors. The function should take three
+        parameters: the category of the validation errors, the differences represented
+        as a `Counter` object, and a boolean value, as a keyword argument, indicating
+        whether the differences represent the validation errors removed or gained
+    :return: The string presenting the tables in Markdown format
+    :raises ValueError: If the removed and gained validation errors are not mutually
+        exclusive for any category
+    """
+    tables: list[str] = []
+
+    for cat, (removed, gained) in sorted(diff.items()):
+        # === Generate one table for each category ===
+
+        if not removed and not gained:
+            # There is no difference in this category
+            continue
+        if removed and gained:
+            # This is not possible, there must have been an error in the diff
+            msg = (
+                f"The removed and gained validation errors should be mutually exclusive"
+                f" for the category {cat!r}"
+            )
+            raise ValueError(msg)
+
+        if removed:
+            table = table_gen_func(cat, removed, is_removed=True)
+
+        else:
+            table = table_gen_func(cat, gained, is_removed=False)
+
+        tables.append(table)
+
+    return "\n".join(tables)
+
+
+def pydantic_validation_err_diff_detailed_table(
+    cat: tuple, diff: Counter[tuple], *, is_removed: bool
+) -> str:
+    """
+    Generate a table for a specific category of Pydantic validation errors
+
+    :param cat: The category of the Pydantic validation errors
+    :param diff: The differences of Pydantic validation errors in the given category
+        represented as a `Counter` object
+    :param is_removed: A boolean value indicating whether `diff` represents the
+        validation errors removed or gained
+    :return: The string presenting the table in Markdown format
+    """
+    # Header of the count column
+    count_col_header = "Removed" if is_removed else "Gained"
+
+    heading = f"### {escape(str(cat))}\n\n"
+    header_and_alignment_rows = gen_header_and_alignment_rows(
+        ("type", "msg", "loc", "Data instance path", count_col_header)
+    )
+    rows = "".join(
+        gen_row(
+            (
+                # The "type" attribute of the Pydantic validation error
+                err_rep[0],
+                # The "msg" attribute of the Pydantic validation error
+                err_rep[1],
+                # The "loc" attribute of the Pydantic validation error
+                escape(str(err_rep[2])),
+                # The path of the data instance
+                f"[{err_rep[3]}]({err_rep[3]})",
+                # The count of removed or gained of the represented error
+                count,
+            )
+        )
+        for err_rep, count in sorted(diff.items())
+    )
+
+    return f"{heading}{header_and_alignment_rows}{rows}"