Skip to content

Commit

Permalink
Aggregate failures in console output (#98)
Browse files Browse the repository at this point in the history
Failures are grouped by the check name, description, and aggregate the
distinct values and rows where those failing values occur

Signed-off-by: Joshua Kwan <[email protected]>
  • Loading branch information
joshk0 authored Nov 13, 2023
1 parent d3196e4 commit 6fe474a
Showing 1 changed file with 27 additions and 1 deletion.
28 changes: 27 additions & 1 deletion focus_validator/outputter/outputter_console.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import math
import pandas as pd
from tabulate import tabulate

Expand Down Expand Up @@ -27,6 +28,7 @@ def __restructure_check_list__(result_set: ValidationResult):
}
)
rows.append(row_obj)

df = pd.DataFrame(rows)
df.rename(
columns={
Expand Down Expand Up @@ -59,11 +61,35 @@ def write(self, result_set: ValidationResult):
print(tabulate(checklist, headers="keys", tablefmt="psql"))

if result_set.failure_cases is not None:
aggregated_failures = result_set.failure_cases.groupby(by=['Check Name', 'Column', 'Description'], as_index=False).aggregate(lambda x: maybe_collapse_range(x.unique().tolist()))

print("Checks summary:")
print(
tabulate(
tabular_data=result_set.failure_cases, # type: ignore
tabular_data=aggregated_failures, # type: ignore
headers="keys",
tablefmt="psql",
)
)

def maybe_collapse_range(l):
start = None
i = None
collapsed = []
for n in sorted(l):
if not isinstance(n, int) and not (isinstance(n, float) and not math.isnan(n)):
return l
elif i is None:
start = i = n
elif n == i + 1:
i = n
elif i:
if i == start: collapsed.append(f'{int(start)}')
else: collapsed.append(f'{int(start)}-{int(i)}')
start = i = n

if start is not None:
if i == start: collapsed.append(int(start))
else: collapsed.append(f'{int(start)}-{int(i)}')

return collapsed

0 comments on commit 6fe474a

Please sign in to comment.