From 5fae2e000cc5a83309721fdc18ea2767902da62b Mon Sep 17 00:00:00 2001 From: Arthur Prevot Date: Sun, 16 Jun 2024 21:51:52 +0200 Subject: [PATCH] added column summary --- jobs/generic/compare_pandas_job.py | 1 - yaetos/libs/analysis_toolkit/query_helper.py | 6 ++++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/jobs/generic/compare_pandas_job.py b/jobs/generic/compare_pandas_job.py index 848ab0be..33294bbe 100644 --- a/jobs/generic/compare_pandas_job.py +++ b/jobs/generic/compare_pandas_job.py @@ -6,7 +6,6 @@ class Job(ETL_Base): def transform(self, tableA, tableB): - # tableA['action'].iloc[2] = 'other action' # only for testing. # Comparing columns diff_columns = list(set(tableA.columns) - set(tableB.columns)) diff --git a/yaetos/libs/analysis_toolkit/query_helper.py b/yaetos/libs/analysis_toolkit/query_helper.py index 2cf26712..1819853e 100644 --- a/yaetos/libs/analysis_toolkit/query_helper.py +++ b/yaetos/libs/analysis_toolkit/query_helper.py @@ -202,14 +202,16 @@ def check_delta(row): df_joined['_delta_' + item1] = df_joined.apply(lambda row: (row[item1] if not pd.isna(row[item1]) else 0.0) - (row[item2] if not pd.isna(row[item2]) else 0.0), axis=1) df_joined['_delta_' + item1 + '_%'] = df_joined.apply(check_delta, axis=1) df_joined['_no_deltas'] = df_joined.apply(lambda row: row['_no_deltas'] is True and row['_delta_' + item1 + '_%'] < threshold, axis=1) + print(f"Column summary, all_equal = {df_joined['_delta_' + item1 + '_%'].apply(lambda cell: cell < threshold).all()}. within treshold {threshold}") except Exception as err: raise Exception("Failed item={}, error: \n{}".format(item1, err)) elif is_str_1 and is_str_2: df_joined['_delta_' + item1] = df_joined.apply(lambda row: row[item1] == row[item2], axis=1) df_joined['_no_deltas'] = df_joined.apply(lambda row: row['_no_deltas'] is True and row['_delta_' + item1], axis=1) + print(f"Column summary, all_equal = {df_joined['_delta_' + item1].all()}.") else: - df_joined['_no_deltas'] = df_joined.apply(lambda row: row['_no_deltas'] is False], axis=1) - print(f'The column to compare ({item1} and {item2}) have mismatched types, or are not numerical nor strings.') + df_joined['_no_deltas'] = df_joined.apply(lambda row: row['_no_deltas'] is False, axis=1) + print(f'Column summary, all_equal = False. The columns to compare (i.e. {item1} and {item2}) have mismatched types, or are not numerical nor strings.') np.seterr(divide='raise') if filter_deltas: