Skip to content

Commit

Permalink
Aggregate failures in console output
Browse files Browse the repository at this point in the history
Failures are grouped by the check name, description, and aggregate the distinct values and rows where those failing values occur

Signed-off-by: Joshua Kwan <[email protected]>
  • Loading branch information
joshk0 committed Nov 10, 2023
1 parent 6b58290 commit 4fb6a88
Showing 1 changed file with 27 additions and 1 deletion.
28 changes: 27 additions & 1 deletion focus_validator/outputter/outputter_console.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import math
import pandas as pd
from tabulate import tabulate

Expand Down Expand Up @@ -27,6 +28,7 @@ def __restructure_check_list__(result_set: ValidationResult):
}
)
rows.append(row_obj)

df = pd.DataFrame(rows)
df.rename(
columns={
Expand Down Expand Up @@ -59,11 +61,35 @@ def write(self, result_set: ValidationResult):
print(tabulate(checklist, headers="keys", tablefmt="psql"))

if result_set.failure_cases is not None:
aggregated_failures = result_set.failure_cases.groupby(by=['Check Name', 'Column', 'Description'], as_index=False).aggregate(lambda x: maybe_collapse_range(x.unique().tolist()))

print("Checks summary:")
print(
tabulate(
tabular_data=result_set.failure_cases, # type: ignore
tabular_data=aggregated_failures, # type: ignore
headers="keys",
tablefmt="psql",
)
)

def maybe_collapse_range(l):
start = None
i = None
collapsed = []
for n in sorted(l):
if not isinstance(n, int) and not (isinstance(n, float) and not math.isnan(n)):
return l
elif i is None:
start = i = n
elif n == i + 1:
i = n
elif i:
if i == start: collapsed.append(f'{int(start)}')
else: collapsed.append(f'{int(start)}-{int(i)}')
start = i = n

if start is not None:
if i == start: collapsed.append(int(start))
else: collapsed.append(f'{int(start)}-{int(i)}')

return collapsed

0 comments on commit 4fb6a88

Please sign in to comment.