Skip to content

Commit

Permalink
[BACKPORT] Fix chunk index error in auto_merge_chunks (#3057) (#3068)
Browse files Browse the repository at this point in the history
Co-authored-by: yuyiming <36940796+yuyiming@users.noreply.github.com>
  • Loading branch information
wjsi and yuyiming authored May 23, 2022
1 parent 540f71f commit 62b7f99
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 10 deletions.
5 changes: 4 additions & 1 deletion mars/dataframe/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,7 +614,10 @@ def get_chunks_meta(self, data_keys: List[str], **_) -> List[Dict]:
assert len(df2.chunks) == 2
assert isinstance(df2.chunks[0].op, DataFrameConcat)
assert len(df2.chunks[0].op.inputs) == 3
assert df2.chunks[1] is df.chunks[-1]
assert isinstance(df2.chunks[1].op, DataFrameConcat)
assert len(df2.chunks[1].op.inputs) == 1
assert df2.chunks[1].shape == df.chunks[-1].shape
assert df2.chunks[1].index == (1, 0)

# mock situation that df not executed
df2 = auto_merge_chunks(FakeContext(False), df, 3 * memory_size)
Expand Down
13 changes: 4 additions & 9 deletions mars/dataframe/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1389,15 +1389,10 @@ def _concat_chunks(merge_chunks: List[ChunkType], output_index: int):
to_merge_chunks.append(chunk)
acc_memory_size += chunk_memory_size
# process the last chunk
if len(to_merge_chunks) > 1:
merged_chunk = _concat_chunks(to_merge_chunks, len(n_split))
out_chunks.append(merged_chunk)
n_split.append(merged_chunk.shape[0])
else:
assert len(to_merge_chunks) == 1
last_chunk = to_merge_chunks[0]
out_chunks.append(last_chunk)
n_split.append(last_chunk.shape[0])
assert len(to_merge_chunks) >= 1
merged_chunk = _concat_chunks(to_merge_chunks, len(n_split))
out_chunks.append(merged_chunk)
n_split.append(merged_chunk.shape[0])

new_op = df_or_series.op.copy()
params = df_or_series.params.copy()
Expand Down

0 comments on commit 62b7f99

Please sign in to comment.