Skip to content

Commit

Permalink
refac: Use exceptAll instead of subtract in assert when comparing dat…
Browse files Browse the repository at this point in the history
…aframes (#3050)

* avoid many counts

* avoid many counts

* ...

* re-introduce count check

* sample.yml

* use exceptAll instead of subtract in assert
  • Loading branch information
JesperGomes authored Dec 2, 2024
1 parent bded432 commit 284150c
Showing 1 changed file with 2 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ def set_column(


def assert_dataframes_equal(actual: DataFrame, expected: DataFrame) -> None:
actual_excess = actual.subtract(expected)
expected_excess = expected.subtract(actual)
actual_excess = actual.exceptAll(expected)
expected_excess = expected.exceptAll(actual)

# do the count once here to avoid materializing multiple times
actual_excess_count = actual_excess.count()
Expand Down Expand Up @@ -155,20 +155,6 @@ def assert_dataframe_and_schema(
expected.subtract(actual).show(3000, False)
raise

try:
assert actual.count() == expected.count()
except AssertionError:

if (
not feature_tests_configuration.show_columns_when_actual_and_expected_are_equal
):
actual, expected = drop_columns_if_the_same(actual, expected)

print(
f"NUMBER OF ROWS MISMATCH: Actual: {actual.count()}, Expected: {expected.count()}"
)
raise


def drop_columns_if_the_same(df1: DataFrame, df2: DataFrame) -> (DataFrame, DataFrame):
column_names = df1.columns
Expand Down

0 comments on commit 284150c

Please sign in to comment.