Skip to content

Commit

Permalink
feat: Add Pydantic validation diff table to dandiset summary
Browse files Browse the repository at this point in the history
  • Loading branch information
candleindark committed Jan 14, 2025
1 parent ccc2140 commit d1a7a76
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,11 @@
gen_pydantic_validation_errs_cell,
gen_row,
validation_err_count_table,
validation_err_diff_table,
)
from dandisets_linkml_status_tools.tools.validation_err_counter import (
ValidationErrCounter,
validation_err_diff,
)

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -365,6 +367,10 @@ def err_categorizer(err: tuple) -> tuple[str, str, tuple[str, ...]]:
pydantic_validation_errs1_ctr.count(err1_reps)
pydantic_validation_errs2_ctr.count(err2_reps)

pydantic_validation_err_diff = validation_err_diff(
pydantic_validation_errs1_ctr, pydantic_validation_errs2_ctr
)

with (output_dir / summary_file_name).open("w") as summary_f:
# === Output counts of different categories of Pydantic validation errors for
# validations done with separate schemas ===
Expand All @@ -379,6 +385,13 @@ def err_categorizer(err: tuple) -> tuple[str, str, tuple[str, ...]]:
validation_err_count_table(pydantic_validation_errs2_ctr.counts_by_cat)
)

# Output a table of the differences in the different categories of
# Pydantic validation errors between the two sets of validation results where
# each set is represented, and counted, by a `ValidationErrCounter` object
summary_f.write("\n")
summary_f.write("### Pydantic errs diff\n\n")
summary_f.write(validation_err_diff_table(pydantic_validation_err_diff))

# Write the header and alignment rows of the summary table
summary_f.write("\n")
summary_f.write(gen_header_and_alignment_rows(summary_headers))
Expand Down
33 changes: 33 additions & 0 deletions src/dandisets_linkml_status_tools/tools/md.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# This file contains helpers for generating Markdown files

from collections import Counter
from collections.abc import Iterable
from pathlib import Path

Expand Down Expand Up @@ -115,3 +116,35 @@ def escape(text: str) -> str:
escaped_substrs.append(escaped)

return "".join(escaped_substrs)


def validation_err_diff_table(
diff: dict[tuple, tuple[Counter[tuple], Counter[tuple]]]
) -> str:
"""
Generate a table displaying the differences in two sets of validation errors by
categories
:param diff: The differences represented in a dictionary where the keys are tuples
representing the categories and the values are tuples consisting of a `Counter`
representing the validation errors removed and a `Counter` representing the
validation errors gained from the first set of validation errors to the second
set of validation errors in the corresponding categories
:return: The string presenting the table in Markdown format
"""
return (
# The header row and the alignment row
gen_header_and_alignment_rows(["Error category", "Removed", "Gained"])
+
# The content rows
"".join(
gen_row(
(
escape(str(cat)),
removed.total(),
gained.total(),
)
)
for cat, (removed, gained) in sorted(diff.items())
)
)

0 comments on commit d1a7a76

Please sign in to comment.