Skip to content

Commit

Permalink
BUG: merge performance issue caused by DataFrameAutoMergeMixin (#740
Browse files Browse the repository at this point in the history
)
  • Loading branch information
ChengjieLi28 authored Oct 16, 2023
1 parent 7ea4316 commit b1d828f
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 2 deletions.
3 changes: 2 additions & 1 deletion python/xorbits/_mars/dataframe/base/cartesian_chunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,9 +139,10 @@ def tile(cls, op: "DataFrameCartesianChunk"):
auto_merge_threshold = op.auto_merge_threshold
auto_merge_before, auto_merge_after = cls._get_auto_merge_options(op.auto_merge)

yield from cls._merge_before(
merge_before_res = yield from cls._merge_before(
op, auto_merge_before, auto_merge_threshold, left, right, logger
)
left, right = merge_before_res[0], merge_before_res[1]

if left.ndim == 2 and left.chunk_shape[1] > 1:
if has_unknown_shape(left):
Expand Down
1 change: 1 addition & 0 deletions python/xorbits/_mars/dataframe/base/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ def _merge_before(
right.shape,
len(right.chunks),
)
return [left, right]

@classmethod
def _merge_after(
Expand Down
3 changes: 2 additions & 1 deletion python/xorbits/_mars/dataframe/merge/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -744,9 +744,10 @@ def tile(cls, op: "DataFrameMerge"):
auto_merge_threshold = op.auto_merge_threshold
auto_merge_before, auto_merge_after = cls._get_auto_merge_options(op.auto_merge)

yield from cls._merge_before(
merge_before_res = yield from cls._merge_before(
op, auto_merge_before, auto_merge_threshold, left, right, logger
)
left, right = merge_before_res[0], merge_before_res[1]

method = cls._choose_merge_method(op, left, right)
if cls._if_apply_bloom_filter(method, op, left, right):
Expand Down

0 comments on commit b1d828f

Please sign in to comment.