From 6fe474a2f84d7c3a03bfa56e9d91945ed05692bc Mon Sep 17 00:00:00 2001 From: Joshua Kwan Date: Mon, 13 Nov 2023 07:13:31 -0500 Subject: [PATCH] Aggregate failures in console output (#98) Failures are grouped by the check name, description, and aggregate the distinct values and rows where those failing values occur Signed-off-by: Joshua Kwan --- .../outputter/outputter_console.py | 28 ++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/focus_validator/outputter/outputter_console.py b/focus_validator/outputter/outputter_console.py index 759f46a..9c6260d 100644 --- a/focus_validator/outputter/outputter_console.py +++ b/focus_validator/outputter/outputter_console.py @@ -1,3 +1,4 @@ +import math import pandas as pd from tabulate import tabulate @@ -27,6 +28,7 @@ def __restructure_check_list__(result_set: ValidationResult): } ) rows.append(row_obj) + df = pd.DataFrame(rows) df.rename( columns={ @@ -59,11 +61,35 @@ def write(self, result_set: ValidationResult): print(tabulate(checklist, headers="keys", tablefmt="psql")) if result_set.failure_cases is not None: + aggregated_failures = result_set.failure_cases.groupby(by=['Check Name', 'Column', 'Description'], as_index=False).aggregate(lambda x: maybe_collapse_range(x.unique().tolist())) + print("Checks summary:") print( tabulate( - tabular_data=result_set.failure_cases, # type: ignore + tabular_data=aggregated_failures, # type: ignore headers="keys", tablefmt="psql", ) ) + +def maybe_collapse_range(l): + start = None + i = None + collapsed = [] + for n in sorted(l): + if not isinstance(n, int) and not (isinstance(n, float) and not math.isnan(n)): + return l + elif i is None: + start = i = n + elif n == i + 1: + i = n + elif i: + if i == start: collapsed.append(f'{int(start)}') + else: collapsed.append(f'{int(start)}-{int(i)}') + start = i = n + + if start is not None: + if i == start: collapsed.append(int(start)) + else: collapsed.append(f'{int(start)}-{int(i)}') + + return collapsed