From 294373d1aced37f6bf9901cb08720ad50aa486e7 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 1 May 2024 12:33:27 -0700 Subject: [PATCH 01/10] Enable FutureWarnings/DeprecationWarnings as errors for dask_cudf --- .../dask_cudf/dask_cudf/tests/test_accessor.py | 6 +++--- python/dask_cudf/dask_cudf/tests/test_groupby.py | 10 +++++----- python/dask_cudf/dask_cudf/tests/test_join.py | 16 ++++++++++++---- python/dask_cudf/pyproject.toml | 8 ++++++++ 4 files changed, 28 insertions(+), 12 deletions(-) diff --git a/python/dask_cudf/dask_cudf/tests/test_accessor.py b/python/dask_cudf/dask_cudf/tests/test_accessor.py index ebb8e4be187..d2474945e1e 100644 --- a/python/dask_cudf/dask_cudf/tests/test_accessor.py +++ b/python/dask_cudf/dask_cudf/tests/test_accessor.py @@ -533,7 +533,7 @@ def test_struct_explode(data): def test_tz_localize(): - data = Series(date_range("2000-04-01", "2000-04-03", freq="H")) + data = Series(date_range("2000-04-01", "2000-04-03", freq="h")) expect = data.dt.tz_localize( "US/Eastern", ambiguous="NaT", nonexistent="NaT" ) @@ -550,8 +550,8 @@ def test_tz_localize(): @pytest.mark.parametrize( "data", [ - date_range("2000-04-01", "2000-04-03", freq="H").tz_localize("UTC"), - date_range("2000-04-01", "2000-04-03", freq="H").tz_localize( + date_range("2000-04-01", "2000-04-03", freq="h").tz_localize("UTC"), + date_range("2000-04-01", "2000-04-03", freq="h").tz_localize( "US/Eastern" ), ], diff --git a/python/dask_cudf/dask_cudf/tests/test_groupby.py b/python/dask_cudf/dask_cudf/tests/test_groupby.py index 1e22dd95475..e2a6f1bb78b 100644 --- a/python/dask_cudf/dask_cudf/tests/test_groupby.py +++ b/python/dask_cudf/dask_cudf/tests/test_groupby.py @@ -243,7 +243,7 @@ def test_groupby_split_out(split_out, column): gddf = dask_cudf.from_cudf(gdf, npartitions=3) ddf_result = ( - ddf.groupby(column) + ddf.groupby(column, observed=True) .a.mean(split_out=split_out) .compute() .sort_values() @@ -378,10 +378,10 @@ def test_groupby_dropna_dask(dropna, by): if dropna is None: dask_cudf_result = gddf.groupby(by).e.sum() - dask_result = ddf.groupby(by).e.sum() + dask_result = ddf.groupby(by, observed=True).e.sum() else: dask_cudf_result = gddf.groupby(by, dropna=dropna).e.sum() - dask_result = ddf.groupby(by, dropna=dropna).e.sum() + dask_result = ddf.groupby(by, dropna=dropna, observed=True).e.sum() dd.assert_eq(dask_cudf_result, dask_result) @@ -515,7 +515,7 @@ def test_groupby_reset_index_dtype(): a = df.groupby("a").agg({"b": ["count"]}) assert a.index.dtype == "int8" - assert a.reset_index().dtypes[0] == "int8" + assert a.reset_index().dtypes.iloc[0] == "int8" def test_groupby_reset_index_names(): @@ -573,7 +573,7 @@ def test_groupby_categorical_key(): # (See: https://github.com/dask/dask/issues/9515) expect = ( ddf.compute() - .groupby("name", sort=True) + .groupby("name", sort=True, observed=True) .agg({"x": ["mean", "max"], "y": ["mean", "count"]}) ) dd.assert_eq(expect, got) diff --git a/python/dask_cudf/dask_cudf/tests/test_join.py b/python/dask_cudf/dask_cudf/tests/test_join.py index 42ecc130298..ed291ef31a7 100644 --- a/python/dask_cudf/dask_cudf/tests/test_join.py +++ b/python/dask_cudf/dask_cudf/tests/test_join.py @@ -66,8 +66,12 @@ def test_join_inner(left_nrows, right_nrows, left_nkeys, right_nkeys): def gather(df, grows): grows[df["x"].values[0]] = (set(df.al), set(df.ar)) - expect.reset_index().groupby("x").apply(partial(gather, grows=expect_rows)) - expect.reset_index().groupby("x").apply(partial(gather, grows=got_rows)) + expect.reset_index().groupby("x")[["x", "al", "ar"]].apply( + partial(gather, grows=expect_rows) + ) + expect.reset_index().groupby("x")[["x", "al", "ar"]].apply( + partial(gather, grows=got_rows) + ) assert got_rows == expect_rows @@ -127,9 +131,13 @@ def gather(df, grows): grows[df["x"].values[0]] = (cola, colb) - expect.reset_index().groupby("x").apply(partial(gather, grows=expect_rows)) + expect.reset_index().groupby("x")[["x", "al", "ar"]].apply( + partial(gather, grows=expect_rows) + ) - expect.reset_index().groupby("x").apply(partial(gather, grows=got_rows)) + expect.reset_index().groupby("x")[["x", "al", "ar"]].apply( + partial(gather, grows=got_rows) + ) for k in expect_rows: np.testing.assert_array_equal(expect_rows[k][0], got_rows[k][0]) diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml index fcf83e82989..3d384f496cf 100644 --- a/python/dask_cudf/pyproject.toml +++ b/python/dask_cudf/pyproject.toml @@ -107,3 +107,11 @@ skip = [ "build", "dist", ] + +[tool.pytest.ini_options] +xfail_strict = true +filterwarnings = [ + "error::FutureWarning", + "error::DeprecationWarning", + "ignore:create_block_manager_from_blocks is deprecated and will be removed in a future version. Use public APIs instead.:DeprecationWarning:partd", +] From 00eb81cfa9dadea4aa358b01df016d4c88c07194 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 1 May 2024 12:35:05 -0700 Subject: [PATCH 02/10] Remove xfail strict true --- python/dask_cudf/pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml index 3d384f496cf..bdb4abbfe70 100644 --- a/python/dask_cudf/pyproject.toml +++ b/python/dask_cudf/pyproject.toml @@ -109,7 +109,6 @@ skip = [ ] [tool.pytest.ini_options] -xfail_strict = true filterwarnings = [ "error::FutureWarning", "error::DeprecationWarning", From 45068a1f1a92e9556793a9c5342004039918707c Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 1 May 2024 16:17:42 -0700 Subject: [PATCH 03/10] Adress non query planning builds, adjust FutureWarning --- python/cudf/cudf/core/index.py | 18 +++++++++++++++--- python/cudf/cudf/tests/test_index.py | 12 ++++++++---- python/dask_cudf/pyproject.toml | 3 +++ 3 files changed, 26 insertions(+), 7 deletions(-) diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index f55fa4c05b5..3a98979ea26 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -1119,14 +1119,26 @@ def _concat(cls, objs): assert ( PANDAS_LT_300 ), "Need to drop after pandas-3.0 support is added." - warnings.warn( + warning_msg = ( "The behavior of array concatenation with empty entries is " "deprecated. In a future version, this will no longer exclude " "empty items when determining the result dtype. " "To retain the old behavior, exclude the empty entries before " - "the concat operation.", - FutureWarning, + "the concat operation." ) + # Warn only if the type might _actually_ change + if len(non_empties) == 0: + if not all(objs[0].dtype == index.dtype for index in objs[1:]): + warnings.warn(warning_msg, FutureWarning) + else: + common_all_type = find_common_type( + [index.dtype for index in objs] + ) + common_non_empty_type = find_common_type( + [index.dtype for index in non_empties] + ) + if common_all_type != common_non_empty_type: + warnings.warn(warning_msg, FutureWarning) if all(isinstance(obj, RangeIndex) for obj in non_empties): result = _concat_range_index(non_empties) else: diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py index c7875b81440..104a5fc0ffa 100644 --- a/python/cudf/cudf/tests/test_index.py +++ b/python/cudf/cudf/tests/test_index.py @@ -1039,7 +1039,9 @@ def test_index_append(data, other): (len(data) == 0 or len(other) == 0) and pd_data.dtype != pd_other.dtype ): expected = pd_data.append(pd_other) - with expect_warning_if(len(data) == 0 or len(other) == 0): + with expect_warning_if( + (len(data) == 0 or len(other) == 0) and gd_data.dtype != gd_other.dtype + ): actual = gd_data.append(gd_other) if len(data) == 0 and len(other) == 0: # Pandas default dtype to "object" for empty list @@ -1237,7 +1239,10 @@ def test_index_append_list(data, other): and (any(d.dtype != data.dtype for d in other)) ): expected = pd_data.append(pd_other) - with expect_warning_if(len(data) == 0 or any(len(d) == 0 for d in other)): + with expect_warning_if( + (len(data) == 0 or any(len(d) == 0 for d in other)) + and (any(d.dtype != data.dtype for d in other)) + ): actual = gd_data.append(gd_other) assert_eq(expected, actual) @@ -2817,8 +2822,7 @@ def test_index_methods(index, func): if func == "append": expected = pidx.append(other=pidx) - with expect_warning_if(len(gidx) == 0): - actual = gidx.append(other=gidx) + actual = gidx.append(other=gidx) else: expected = getattr(pidx, func)() actual = getattr(gidx, func)() diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml index bdb4abbfe70..d221f4a25be 100644 --- a/python/dask_cudf/pyproject.toml +++ b/python/dask_cudf/pyproject.toml @@ -113,4 +113,7 @@ filterwarnings = [ "error::FutureWarning", "error::DeprecationWarning", "ignore:create_block_manager_from_blocks is deprecated and will be removed in a future version. Use public APIs instead.:DeprecationWarning:partd", + # https://github.com/dask/partd/blob/main/partd/pandas.py#L198 + "ignore:Passing a BlockManager to DataFrame is deprecated and will raise in a future version. Use public APIs instead.:DeprecationWarning", + "ignore:String support for `aggregate_files` is experimental. Behavior may change in the future.:FutureWarning:dask", ] From 7c962ec752c94a1c1c8a0198a155c95b055e7fa7 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 2 May 2024 17:32:02 -0700 Subject: [PATCH 04/10] address read_json warning --- python/dask_cudf/dask_cudf/io/tests/test_json.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/dask_cudf/dask_cudf/io/tests/test_json.py b/python/dask_cudf/dask_cudf/io/tests/test_json.py index f8e5be0a417..dadeb4e1d1f 100644 --- a/python/dask_cudf/dask_cudf/io/tests/test_json.py +++ b/python/dask_cudf/dask_cudf/io/tests/test_json.py @@ -84,9 +84,9 @@ def test_read_json_nested(tmp_path): } ) kwargs = dict(orient="records", lines=True) - with tmp_path / "data.json" as f, dask.config.set( - {"dataframe.convert-string": False} - ): + f = tmp_path / "data.json" + f.touch() + with dask.config.set({"dataframe.convert-string": False}): df.to_json(f, **kwargs) # Ensure engine='cudf' is tested. actual = dask_cudf.read_json(f, engine="cudf", **kwargs) From 1415f62b0de2e8f3dbd556e25d4ad741d409cfa9 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 2 May 2024 17:39:37 -0700 Subject: [PATCH 05/10] Enable warnings as errors in custreamz --- python/custreamz/pyproject.toml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/python/custreamz/pyproject.toml b/python/custreamz/pyproject.toml index e6c86351ac9..4242ecbffe3 100644 --- a/python/custreamz/pyproject.toml +++ b/python/custreamz/pyproject.toml @@ -103,3 +103,12 @@ skip = [ "dist", "__init__.py", ] + +[tool.pytest.ini_options] +filterwarnings = [ + "error", + "ignore:Port .* is already in use.:UserWarning:distributed", + # Should be fixed in the next streamz release + # https://github.com/python-streamz/streamz/commit/2812f1f961dfcb3f17e948d8b12a12472975558e + "ignore:pkg_resources is deprecated as an API:DeprecationWarning:streamz", +] From 3381adcd5f801cff78c4cdedb6e819ead1bb2002 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 3 May 2024 10:50:19 -0700 Subject: [PATCH 06/10] Remove touch --- python/dask_cudf/dask_cudf/io/tests/test_json.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/dask_cudf/dask_cudf/io/tests/test_json.py b/python/dask_cudf/dask_cudf/io/tests/test_json.py index dadeb4e1d1f..dc780478794 100644 --- a/python/dask_cudf/dask_cudf/io/tests/test_json.py +++ b/python/dask_cudf/dask_cudf/io/tests/test_json.py @@ -85,7 +85,6 @@ def test_read_json_nested(tmp_path): ) kwargs = dict(orient="records", lines=True) f = tmp_path / "data.json" - f.touch() with dask.config.set({"dataframe.convert-string": False}): df.to_json(f, **kwargs) # Ensure engine='cudf' is tested. From 1a8b8ad9ced58382cd3db2677ebac3dc9ad3d2f2 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 3 May 2024 11:31:12 -0700 Subject: [PATCH 07/10] Try to address ResourceWarnings --- .../custreamz/tests/test_dataframes.py | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/python/custreamz/custreamz/tests/test_dataframes.py b/python/custreamz/custreamz/tests/test_dataframes.py index bae4b051cae..6de1c5cdd98 100644 --- a/python/custreamz/custreamz/tests/test_dataframes.py +++ b/python/custreamz/custreamz/tests/test_dataframes.py @@ -24,19 +24,23 @@ @pytest.fixture(scope="module") def client(): - client = Client(processes=False, asynchronous=False) - try: + with Client(processes=False, asynchronous=False) as client: yield client - finally: - client.close() -@pytest.fixture(params=["core", "dask"]) -def stream(request, client): - if request.param == "core": - return Stream() - else: - return DaskStream() +@pytest.fixture(scope="module") +def dask_stream(client): + return DaskStream() + + +@pytest.fixture +def core_stream(): + return Stream() + + +@pytest.fixture(params=["core_stream", "dask_stream"]) +def stream(request): + return request.getfixturevalue(request.param) def test_identity(stream): From e753c9d478c84103334a7a9e6c2af1aea241ac89 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 3 May 2024 11:34:38 -0700 Subject: [PATCH 08/10] create_block_manager_from_blocks comes from not just partd --- python/dask_cudf/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml index d221f4a25be..5fbdd98225e 100644 --- a/python/dask_cudf/pyproject.toml +++ b/python/dask_cudf/pyproject.toml @@ -112,7 +112,7 @@ skip = [ filterwarnings = [ "error::FutureWarning", "error::DeprecationWarning", - "ignore:create_block_manager_from_blocks is deprecated and will be removed in a future version. Use public APIs instead.:DeprecationWarning:partd", + "ignore:create_block_manager_from_blocks is deprecated and will be removed in a future version. Use public APIs instead.:DeprecationWarning", # https://github.com/dask/partd/blob/main/partd/pandas.py#L198 "ignore:Passing a BlockManager to DataFrame is deprecated and will raise in a future version. Use public APIs instead.:DeprecationWarning", "ignore:String support for `aggregate_files` is experimental. Behavior may change in the future.:FutureWarning:dask", From b2ff708a7a2b9605aaca39b40c716272d2030797 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 3 May 2024 13:21:13 -0700 Subject: [PATCH 09/10] Revert "Try to address ResourceWarnings" This reverts commit 1a8b8ad9ced58382cd3db2677ebac3dc9ad3d2f2. --- .../custreamz/tests/test_dataframes.py | 24 ++++++++----------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/python/custreamz/custreamz/tests/test_dataframes.py b/python/custreamz/custreamz/tests/test_dataframes.py index 6de1c5cdd98..bae4b051cae 100644 --- a/python/custreamz/custreamz/tests/test_dataframes.py +++ b/python/custreamz/custreamz/tests/test_dataframes.py @@ -24,23 +24,19 @@ @pytest.fixture(scope="module") def client(): - with Client(processes=False, asynchronous=False) as client: + client = Client(processes=False, asynchronous=False) + try: yield client + finally: + client.close() -@pytest.fixture(scope="module") -def dask_stream(client): - return DaskStream() - - -@pytest.fixture -def core_stream(): - return Stream() - - -@pytest.fixture(params=["core_stream", "dask_stream"]) -def stream(request): - return request.getfixturevalue(request.param) +@pytest.fixture(params=["core", "dask"]) +def stream(request, client): + if request.param == "core": + return Stream() + else: + return DaskStream() def test_identity(stream): From 91b5fe7fa8f80da738122c08a78071fe0c56031f Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 3 May 2024 13:22:55 -0700 Subject: [PATCH 10/10] Ignore socket warnings --- python/custreamz/pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/python/custreamz/pyproject.toml b/python/custreamz/pyproject.toml index 4242ecbffe3..7786bf98bef 100644 --- a/python/custreamz/pyproject.toml +++ b/python/custreamz/pyproject.toml @@ -107,6 +107,7 @@ skip = [ [tool.pytest.ini_options] filterwarnings = [ "error", + "ignore:unclosed