Skip to content

Commit

Permalink
Merge pull request #384 from capitalone/develop
Browse files Browse the repository at this point in the history
Release v0.16.3
  • Loading branch information
fdosani authored Feb 21, 2025
2 parents 37b4bc7 + fa8e539 commit 124e952
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 5 deletions.
2 changes: 1 addition & 1 deletion datacompy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
Then extended to carry that functionality over to Spark Dataframes.
"""

__version__ = "0.16.2"
__version__ = "0.16.3"

import platform
from warnings import warn
Expand Down
4 changes: 2 additions & 2 deletions datacompy/snowflake.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def df1(self, df1: tuple[Union[str, "sp.DataFrame"], str | None]) -> None:
if len(table_name) != 3:
errmsg = f"{df} is not a valid table name. Be sure to include the target db and schema."
raise ValueError(errmsg)
self.df1_name = df_name.upper() if df_name else table_name[2]
self.df1_name = df_name.upper() if df_name else "__".join(table_name)
self._df1 = self.session.table(df)
else:
self._df1 = df
Expand All @@ -184,7 +184,7 @@ def df2(self, df2: tuple[Union[str, "sp.DataFrame"], str | None]) -> None:
if len(table_name) != 3:
errmsg = f"{df} is not a valid table name. Be sure to include the target db and schema."
raise ValueError(errmsg)
self.df2_name = df_name.upper() if df_name else table_name[2]
self.df2_name = df_name.upper() if df_name else "__".join(table_name)
self._df2 = self.session.table(df)
else:
self._df2 = df
Expand Down
7 changes: 7 additions & 0 deletions docs/source/snowflake_usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ For ``SnowflakeCompare``
- Joining is done using ``EQUAL_NULL`` which is the equality test that is safe for null values.
- Compares ``snowflake.snowpark.DataFrame``, which can be provided as either raw Snowflake dataframes
or as the names of full names of valid snowflake tables, which we will process into Snowpark dataframes.
- Note that if Snowflake tables are provided, that dataframe names will default to the full name of their
respective Snowflake tables. This can be overriden by setting the ``df1_name`` and ``df2_name`` arguments
when creating the Compare object.


SnowflakeCompare setup
Expand Down Expand Up @@ -57,6 +60,8 @@ Provide Snowpark dataframes
session,
df_1,
df_2,
#df1_name='original', # optional param for naming df1
#df2_name='new' # optional param for naming df2
join_columns=['acct_id'],
rel_tol=1e-03,
abs_tol=1e-04,
Expand All @@ -80,6 +85,8 @@ Given the dataframes from the prior examples...
session,
f"{db}.{schema}.toy_table_1",
f"{db}.{schema}.toy_table_2",
#df1_name='original', # optional param for naming df1
#df2_name='new' # optional param for naming df2
join_columns=['acct_id'],
rel_tol=1e-03,
abs_tol=1e-04,
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ maintainers = [
{ name="Raymond Haffar", email="[email protected]" },
]
license = {text = "Apache Software License"}
dependencies = ["pandas<=2.2.3,>=0.25.0", "numpy<=2.2.0,>=1.22.0", "ordered-set<=4.1.0,>=4.0.2", "polars[pandas]<=1.17.1,>=0.20.4"]
dependencies = ["pandas<=2.2.3,>=0.25.0", "numpy<=2.2.3,>=1.22.0", "ordered-set<=4.1.0,>=4.0.2", "polars[pandas]<=1.22.0,>=0.20.4"]
requires-python = ">=3.10.0"
classifiers = [
"Intended Audience :: Developers",
Expand Down Expand Up @@ -55,7 +55,7 @@ version = {attr = "datacompy.__version__"}
python-tag = "py3"

[project.optional-dependencies]
fugue = ["fugue[duckdb,dask,ray]<=0.9.1,>=0.8.7"]
fugue = ["fugue[dask,duckdb,ray]<=0.9.1,>=0.8.7"]
spark = ["pyspark[connect]>=3.1.1; python_version < \"3.11\"", "pyspark[connect]>=3.4; python_version >= \"3.11\""]
snowflake = ["snowflake-connector-python", "snowflake-snowpark-python"]
docs = ["sphinx", "furo", "myst-parser"]
Expand Down
25 changes: 25 additions & 0 deletions tests/test_snowflake.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,31 @@ def test_compare_table_setter_bad(snowpark_session):
)


@mock.patch(
"datacompy.snowflake.SnowflakeCompare._validate_dataframe", new=mock.MagicMock()
)
@mock.patch("datacompy.snowflake.SnowflakeCompare._compare", new=mock.MagicMock())
def test_compare_table_unique_names(snowpark_session):
# Assert that two tables with the same name but from a different DB/Schema have unique names
# Same schema/name, different DB
compare = SnowflakeCompare(
snowpark_session,
"test_db1.test_schema.test_name",
"test_db2.test_schema.test_name",
["A"],
)
assert compare.df1_name != compare.df2_name

# Same db/name, different schema
compare = SnowflakeCompare(
snowpark_session,
"test_db.test_schema1.test_name",
"test_db.test_schema2.test_name",
["A"],
)
assert compare.df1_name != compare.df2_name


def test_compare_table_setter_good(snowpark_session):
data = """ACCT_ID,DOLLAR_AMT,NAME,FLOAT_FLD,DATE_FLD
10000001234,123.4,George Michael Bluth,14530.155,
Expand Down

0 comments on commit 124e952

Please sign in to comment.