From 4b40662e02cc7720cc2c8bacd38d74af9f54fb38 Mon Sep 17 00:00:00 2001 From: rjzamora Date: Wed, 31 Jul 2024 09:00:39 -0700 Subject: [PATCH 1/3] register read_parquet and read_csv as dispatchable --- dask_expr/_collection.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dask_expr/_collection.py b/dask_expr/_collection.py index 002aff4d..289f1518 100644 --- a/dask_expr/_collection.py +++ b/dask_expr/_collection.py @@ -5099,17 +5099,18 @@ def from_dask_array(x, columns=None, index=None, meta=None): return from_legacy_dataframe(df, optimize=True) +@dataframe_creation_dispatch.register_inplace("pandas") def read_csv( path, *args, header="infer", dtype_backend=None, storage_options=None, + _legacy_backend="pandas", **kwargs, ): from dask_expr.io.csv import ReadCSV - dataframe_backend = config.get("dataframe.backend", "pandas") if not isinstance(path, str): path = stringify_path(path) return new_collection( @@ -5119,7 +5120,7 @@ def read_csv( storage_options=storage_options, kwargs=kwargs, header=header, - dataframe_backend=dataframe_backend, + dataframe_backend=_legacy_backend, ) ) @@ -5174,6 +5175,7 @@ def read_fwf( ) +@dataframe_creation_dispatch.register_inplace("pandas") def read_parquet( path=None, columns=None, From beb5955c30c4b32ab032f3453f714ecee104d3b9 Mon Sep 17 00:00:00 2001 From: rjzamora Date: Wed, 31 Jul 2024 09:20:57 -0700 Subject: [PATCH 2/3] formatting --- dask_expr/_collection.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dask_expr/_collection.py b/dask_expr/_collection.py index 289f1518..7026adb8 100644 --- a/dask_expr/_collection.py +++ b/dask_expr/_collection.py @@ -14,7 +14,7 @@ import numpy as np import pandas as pd import pyarrow as pa -from dask import compute, config, get_annotations +from dask import compute, get_annotations from dask.array import Array from dask.base import DaskMethodsMixin, is_dask_collection, named_schedulers from dask.core import flatten @@ -5106,7 +5106,7 @@ def read_csv( header="infer", dtype_backend=None, storage_options=None, - _legacy_backend="pandas", + _legacy_dataframe_backend="pandas", **kwargs, ): from dask_expr.io.csv import ReadCSV @@ -5120,7 +5120,7 @@ def read_csv( storage_options=storage_options, kwargs=kwargs, header=header, - dataframe_backend=_legacy_backend, + dataframe_backend=_legacy_dataframe_backend, ) ) From 5af50d4add2bbcdf2ff5a67ab76eec0536621c49 Mon Sep 17 00:00:00 2001 From: "Richard (Rick) Zamora" Date: Mon, 12 Aug 2024 10:53:32 -0500 Subject: [PATCH 3/3] Apply suggestions from code review --- dask_expr/_collection.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dask_expr/_collection.py b/dask_expr/_collection.py index 7026adb8..5d808119 100644 --- a/dask_expr/_collection.py +++ b/dask_expr/_collection.py @@ -5106,7 +5106,6 @@ def read_csv( header="infer", dtype_backend=None, storage_options=None, - _legacy_dataframe_backend="pandas", **kwargs, ): from dask_expr.io.csv import ReadCSV @@ -5120,7 +5119,7 @@ def read_csv( storage_options=storage_options, kwargs=kwargs, header=header, - dataframe_backend=_legacy_dataframe_backend, + dataframe_backend="pandas", ) )