Skip to content

Commit

Permalink
Fix df.loc when providing empty list (mars-project#2528)
Browse files Browse the repository at this point in the history
  • Loading branch information
Xuye (Chris) Qin committed Oct 15, 2021
1 parent 53ac012 commit d50d9f8
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 3 deletions.
14 changes: 11 additions & 3 deletions mars/dataframe/indexing/index_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -756,7 +756,10 @@ def preprocess(self,
for chunk_index, pos in chunk_index_to_pos.items():
# chunk_index and pos are all list with 1 element
abs_pos = pos[0] + cum_nsplit[chunk_index[0]]
chunk_labels = to_numpy(pd_index[abs_pos])
if isinstance(pd_index, pd.RangeIndex) and len(abs_pos) == 0:
chunk_labels = np.array([], dtype=pd_index.dtype)
else:
chunk_labels = to_numpy(pd_index[abs_pos])
chunk_index_to_labels[chunk_index[0]] = chunk_labels

index_info.is_label_asc_sorted = is_asc_sorted
Expand All @@ -778,6 +781,8 @@ def process(self,
tileable = context.tileable
input_axis = index_info.input_axis
chunk_index_to_labels = index_info.chunk_index_to_labels
full_label_size = sum(labels.size for labels
in chunk_index_to_labels.values())

other_index_to_iter = dict()
chunk_index_to_info = context.chunk_index_to_info.copy()
Expand All @@ -786,8 +791,11 @@ def process(self,
chunk_labels = chunk_index_to_labels[i]
size = chunk_labels.size

if size == 0 and tileable.shape[0] > 0:
# not effected when tileable not empty and no index chosen
if size == 0 and full_label_size > 0 and tileable.shape[0] > 0:
# not effected when
# 1) tileable not empty
# 2) full index not empty
# 3) no index chosen for this chunk
del context.chunk_index_to_info[chunk_index]
continue

Expand Down
11 changes: 11 additions & 0 deletions mars/dataframe/indexing/tests/test_indexing_execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,17 @@ def test_loc_getitem(setup):
expected = raw1.loc['a3', 'b']
assert result == expected

# test empty list
df = df1.loc[[]]
result = df.execute().fetch()
expected = raw1.loc[[]]
pd.testing.assert_frame_equal(result, expected)

df = df2.loc[[]]
result = df.execute().fetch()
expected = raw2.loc[[]]
pd.testing.assert_frame_equal(result, expected)

df = df2.loc[1:4, 'b':'d']
result = df.execute().fetch()
expected = raw2.loc[1:4, 'b': 'd']
Expand Down

0 comments on commit d50d9f8

Please sign in to comment.