Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FIX-#5702: Fix passing RangeIndex to loc. #5719

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 16 additions & 3 deletions modin/core/storage_formats/base/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
from modin.utils import MODIN_UNNAMED_SERIES_LABEL, try_cast_to_pandas
from modin.config import StorageFormat

from pandas.core.dtypes.common import is_scalar
from pandas.core.dtypes.common import is_scalar, is_number
import pandas.core.resample
import pandas
from pandas._typing import IndexLabel, Suffixes
Expand Down Expand Up @@ -3391,14 +3391,27 @@ def get_positions_from_labels(self, row_loc, col_loc):
for axis, axis_loc in enumerate((row_loc, col_loc)):
if is_scalar(axis_loc):
axis_loc = np.array([axis_loc])
if isinstance(axis_loc, slice) or is_range_like(axis_loc):
if isinstance(axis_loc, pandas.RangeIndex):
axis_lookup = axis_loc
Comment on lines +3394 to +3395
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure this is the proper fix... I mean, this if should certainly be there as a fastpath solution! But...

For me it feels like there's a bug somewhere else that this fastpath change just masks.

And I think I know where that is, just a few lines below! Note how axis_labels.slice_indexer() expects that its both start and stop arguments are labels which means that they signify closed interval, while RangeIndex/range signify semi-open interval!

elif isinstance(axis_loc, slice) or is_range_like(axis_loc):
if isinstance(axis_loc, slice) and axis_loc == slice(None):
axis_lookup = axis_loc
else:
axis_labels = self.get_axis(axis)
# `slice_indexer` returns a fully-defined numeric slice for a non-fully-defined labels-based slice
# RangeIndex and range use a semi-open interval, while
# slice_indexer uses a closed interval. Subtract 1 step from the
# end of the interval to get the equivalent closed interval.
if axis_loc.stop is None or not is_number(axis_loc.stop):
slice_stop = axis_loc.stop
else:
slice_stop = axis_loc.stop - (
0 if axis_loc.step is None else axis_loc.step
)
mvashishtha marked this conversation as resolved.
Show resolved Hide resolved
axis_lookup = axis_labels.slice_indexer(
axis_loc.start, axis_loc.stop, axis_loc.step
axis_loc.start,
slice_stop,
axis_loc.step,
)
# Converting negative indices to their actual positions:
axis_lookup = pandas.RangeIndex(
Expand Down
42 changes: 42 additions & 0 deletions modin/pandas/test/dataframe/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -840,6 +840,48 @@ def test_loc_iloc_slice_indexer(locator_name, slice_indexer):
eval_general(md_df, pd_df, lambda df: getattr(df, locator_name)[slice_indexer])


@pytest.mark.parametrize(
"indexer_size",
[
1,
2,
NROWS,
pytest.param(
NROWS + 1,
marks=pytest.mark.xfail(
reason="https://github.com/modin-project/modin/issues/5739", strict=True
),
),
],
)
class TestLocRangeLikeIndexer:
"""Test cases related to https://github.com/modin-project/modin/issues/5702"""

def test_range_index_getitem_single_value(self, indexer_size):
eval_general(
*create_test_dfs(test_data["int_data"]),
lambda df: df.loc[pd.RangeIndex(indexer_size)],
)

def test_range_index_getitem_two_values(self, indexer_size):
eval_general(
*create_test_dfs(test_data["int_data"]),
lambda df: df.loc[pd.RangeIndex(indexer_size), :],
)

def test_range_getitem_single_value(self, indexer_size):
eval_general(
*create_test_dfs(test_data["int_data"]),
lambda df: df.loc[range(indexer_size)],
)

def test_range_getitem_two_values_5702(self, indexer_size):
eval_general(
*create_test_dfs(test_data["int_data"]),
lambda df: df.loc[range(indexer_size), :],
)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_pop(request, data):
modin_df = pd.DataFrame(data)
Expand Down