Skip to content

Commit

Permalink
Merge pull request #52 from candleindark/enh
Browse files Browse the repository at this point in the history
Add detailed validation error diff tables to dandiset validation diff summary
  • Loading branch information
candleindark authored Jan 15, 2025
2 parents f20ea44 + 253f754 commit 15452f4
Show file tree
Hide file tree
Showing 2 changed files with 127 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@
gen_diff_cell,
gen_pydantic_validation_errs_cell,
gen_row,
pydantic_validation_err_diff_detailed_table,
validation_err_count_table,
validation_err_diff_detailed_tables,
validation_err_diff_table,
)
from dandisets_linkml_status_tools.tools.validation_err_counter import (
Expand Down Expand Up @@ -320,26 +322,30 @@ def _output_dandiset_validation_diff_reports(
logger.info("Creating dandiset validation diff report directory %s", output_dir)
output_dir.mkdir(parents=True)

err1_rep_iters: list[Iterable[tuple[str, str, tuple[str | int], Path]]] = []
err2_rep_iters: list[Iterable[tuple[str, str, tuple[str | int], Path]]] = []
err1_rep_lsts: list[list[tuple[str, str, tuple[str | int], Path]]] = []
err2_rep_lsts: list[list[tuple[str, str, tuple[str | int], Path]]] = []
for r in reports:
p = Path(r.dandiset_identifier, r.dandiset_version)

# Tuple representation of the Pydantic validation errors
err1_rep_iters.append(
(e["type"], e["msg"], tuple(e["loc"]), p)
for e in r.pydantic_validation_errs1
err1_rep_lsts.append(
[
(e["type"], e["msg"], tuple(e["loc"]), p)
for e in r.pydantic_validation_errs1
]
)
err2_rep_iters.append(
(e["type"], e["msg"], tuple(e["loc"]), p)
for e in r.pydantic_validation_errs2
err2_rep_lsts.append(
[
(e["type"], e["msg"], tuple(e["loc"]), p)
for e in r.pydantic_validation_errs2
]
)

err1_reps: Iterable[tuple[str, str, tuple[str | int, ...], Path]] = (
chain.from_iterable(err1_rep_iters)
chain.from_iterable(err1_rep_lsts)
)
err2_reps: Iterable[tuple[str, str, tuple[str | int, ...], Path]] = (
chain.from_iterable(err2_rep_iters)
chain.from_iterable(err2_rep_lsts)
)

def err_categorizer(err: tuple) -> tuple[str, str, tuple[str, ...]]:
Expand Down Expand Up @@ -392,6 +398,18 @@ def err_categorizer(err: tuple) -> tuple[str, str, tuple[str, ...]]:
summary_f.write("### Pydantic errs diff\n\n")
summary_f.write(validation_err_diff_table(pydantic_validation_err_diff))

# Write a sequence of tables detailing the differences in Pydantic validation
# errors between the two sets of validation results
summary_f.write("\n")
summary_f.write("## Pydantic errs diff detailed tables\n\n")
# noinspection PyTypeChecker
summary_f.write(
validation_err_diff_detailed_tables(
pydantic_validation_err_diff,
pydantic_validation_err_diff_detailed_table,
)
)

# Write the header and alignment rows of the summary table
summary_f.write("\n")
summary_f.write(gen_header_and_alignment_rows(summary_headers))
Expand Down
99 changes: 99 additions & 0 deletions src/dandisets_linkml_status_tools/tools/md.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from collections import Counter
from collections.abc import Iterable
from pathlib import Path
from typing import Protocol

from dandisets_linkml_status_tools.models import PydanticValidationErrsType
from dandisets_linkml_status_tools.tools.typing import Stringable
Expand Down Expand Up @@ -148,3 +149,101 @@ def validation_err_diff_table(
for cat, (removed, gained) in sorted(diff.items())
)
)


class TableGenerator(Protocol):
"""
Protocol of generators of tables for a specific category of validation errors
"""

def __call__(
self, cat: tuple, diff: Counter[tuple], *, is_removed: bool
) -> str: ...


def validation_err_diff_detailed_tables(
diff: dict[tuple, tuple[Counter[tuple], Counter[tuple]]],
table_gen_func: TableGenerator,
) -> str:
"""
Generate a sequence of tables detailing the differences in two sets of validation
errors by categories. Each table details the validation errors removed or gained in
a specific category
:param diff: This parameter is the same as the `diff` parameter in the
`validation_err_diff_table` function
:param table_gen_func: The function that generates a table detailing the differences
in a specific category of validation errors. The function should take three
parameters: the category of the validation errors, the differences represented
as a `Counter` object, and a boolean value, as a keyword argument, indicating
whether the differences represent the validation errors removed or gained
:return: The string presenting the tables in Markdown format
:raises ValueError: If the removed and gained validation errors are not mutually
exclusive for any category
"""
tables: list[str] = []

for cat, (removed, gained) in sorted(diff.items()):
# === Generate one table for each category ===

if not removed and not gained:
# There is no difference in this category
continue
if removed and gained:
# This is not possible, there must have been an error in the diff
msg = (
f"The removed and gained validation errors should be mutually exclusive"
f" for the category {cat!r}"
)
raise ValueError(msg)

if removed:
table = table_gen_func(cat, removed, is_removed=True)

else:
table = table_gen_func(cat, gained, is_removed=False)

tables.append(table)

return "\n".join(tables)


def pydantic_validation_err_diff_detailed_table(
cat: tuple, diff: Counter[tuple], *, is_removed: bool
) -> str:
"""
Generate a table for a specific category of Pydantic validation errors
:param cat: The category of the Pydantic validation errors
:param diff: The differences of Pydantic validation errors in the given category
represented as a `Counter` object
:param is_removed: A boolean value indicating whether `diff` represents the
validation errors removed or gained
:return: The string presenting the table in Markdown format
"""
# Header of the count column
count_col_header = "Removed" if is_removed else "Gained"

heading = f"### {escape(str(cat))}\n\n"
header_and_alignment_rows = gen_header_and_alignment_rows(
("type", "msg", "loc", "Data instance path", count_col_header)
)
rows = "".join(
gen_row(
(
# The "type" attribute of the Pydantic validation error
err_rep[0],
# The "msg" attribute of the Pydantic validation error
err_rep[1],
# The "loc" attribute of the Pydantic validation error
escape(str(err_rep[2])),
# The path of the data instance
f"[{err_rep[3]}]({err_rep[3]})",
# The count of removed or gained of the represented error
count,
)
)
for err_rep, count in sorted(diff.items())
)

return f"{heading}{header_and_alignment_rows}{rows}"

0 comments on commit 15452f4

Please sign in to comment.