Skip to content

Commit

Permalink
fix case when columns are in different order
Browse files Browse the repository at this point in the history
  • Loading branch information
arthurprevot committed Jun 16, 2024
1 parent 2e82827 commit 203c1c5
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 4 deletions.
6 changes: 3 additions & 3 deletions jobs/generic/compare_pandas_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@ def transform(self, tableA, tableB):
print('The deltas in columns are: ', diff_columns)
print('The columns in common are: ', common_columns)
print('Rest of the comparison will be based on columns in common')
tableA = tableA[common_columns]
tableB = tableB[common_columns]

tableA = tableA[common_columns] # applied even if columns are all the same to make sure they are in the same order (impt for checks below)
tableB = tableB[common_columns]

# Comparing datasets length
diff_row_count = len(tableA) != len(tableB)
Expand Down Expand Up @@ -52,7 +53,6 @@ def transform(self, tableA, tableB):
df_out = compare_dfs(tableA, pks1, compare1, tableB, pks2, compare2, strip, filter_deltas, threshold=0.01)
print('Finishing compare, column by column.')

# import ipdb; ipdb.set_trace()
return df_out


Expand Down
1 change: 0 additions & 1 deletion yaetos/libs/analysis_toolkit/query_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,6 @@ def check_delta(row):
if item1 == item2: # check necessary for next step. See comment below.
item1 = item1 + '_1'
item2 = item2 + '_2'
# raise Exception(f'Error. item1: {item1}, item2: {item2}')
is_numeric_1 = pd.api.types.is_numeric_dtype(df_joined[item1])
is_numeric_2 = pd.api.types.is_numeric_dtype(df_joined[item2])
is_str_1 = pd.api.types.is_string_dtype(df_joined[item1])
Expand Down

0 comments on commit 203c1c5

Please sign in to comment.