Skip to content

Commit

Permalink
added column summary
Browse files Browse the repository at this point in the history
  • Loading branch information
arthurprevot committed Jun 16, 2024
1 parent cb2d7dc commit 5fae2e0
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 3 deletions.
1 change: 0 additions & 1 deletion jobs/generic/compare_pandas_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
class Job(ETL_Base):

def transform(self, tableA, tableB):
# tableA['action'].iloc[2] = 'other action' # only for testing.

# Comparing columns
diff_columns = list(set(tableA.columns) - set(tableB.columns))
Expand Down
6 changes: 4 additions & 2 deletions yaetos/libs/analysis_toolkit/query_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,14 +202,16 @@ def check_delta(row):
df_joined['_delta_' + item1] = df_joined.apply(lambda row: (row[item1] if not pd.isna(row[item1]) else 0.0) - (row[item2] if not pd.isna(row[item2]) else 0.0), axis=1)
df_joined['_delta_' + item1 + '_%'] = df_joined.apply(check_delta, axis=1)
df_joined['_no_deltas'] = df_joined.apply(lambda row: row['_no_deltas'] is True and row['_delta_' + item1 + '_%'] < threshold, axis=1)
print(f"Column summary, all_equal = {df_joined['_delta_' + item1 + '_%'].apply(lambda cell: cell < threshold).all()}. within treshold {threshold}")
except Exception as err:
raise Exception("Failed item={}, error: \n{}".format(item1, err))
elif is_str_1 and is_str_2:
df_joined['_delta_' + item1] = df_joined.apply(lambda row: row[item1] == row[item2], axis=1)
df_joined['_no_deltas'] = df_joined.apply(lambda row: row['_no_deltas'] is True and row['_delta_' + item1], axis=1)
print(f"Column summary, all_equal = {df_joined['_delta_' + item1].all()}.")
else:
df_joined['_no_deltas'] = df_joined.apply(lambda row: row['_no_deltas'] is False], axis=1)
print(f'The column to compare ({item1} and {item2}) have mismatched types, or are not numerical nor strings.')
df_joined['_no_deltas'] = df_joined.apply(lambda row: row['_no_deltas'] is False, axis=1)
print(f'Column summary, all_equal = False. The columns to compare (i.e. {item1} and {item2}) have mismatched types, or are not numerical nor strings.')

np.seterr(divide='raise')
if filter_deltas:
Expand Down

0 comments on commit 5fae2e0

Please sign in to comment.