Skip to content

Commit

Permalink
Fixup concat dtypes for empty dataframes (#602)
Browse files Browse the repository at this point in the history
  • Loading branch information
phofl authored Dec 19, 2023
1 parent 6d33d2d commit c770840
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 1 deletion.
7 changes: 6 additions & 1 deletion dask_expr/_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,14 @@ def _frames(self):

@functools.cached_property
def _meta(self):
# ignore DataFrame without columns to avoid dtype upcasting
meta = make_meta(
methods.concat(
[meta_nonempty(df._meta) for df in self._frames],
[
meta_nonempty(df._meta)
for df in self._frames
if df.ndim < 2 or len(df._meta.columns) > 0
],
join=self.join,
filter_warning=False,
axis=self.axis,
Expand Down
11 changes: 11 additions & 0 deletions dask_expr/tests/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,17 @@ def test_concat_index(df, pdf):
assert query._name == expected._name


def test_concat_dataframe_empty():
df = lib.DataFrame({"a": [100, 200, 300]}, dtype="int64")
empty_df = lib.DataFrame([], dtype="int64")
df_concat = lib.concat([df, empty_df])

ddf = from_pandas(df, npartitions=1)
empty_ddf = from_pandas(empty_df, npartitions=1)
ddf_concat = concat([ddf, empty_ddf])
assert_eq(df_concat, ddf_concat)


def test_concat_after_merge():
pdf1 = lib.DataFrame(
{"x": range(10), "y": [1, 2, 3, 4, 5] * 2, "z": ["cat", "dog"] * 5}
Expand Down

0 comments on commit c770840

Please sign in to comment.