Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REFACTOR-#2009: avoid index access in is_scalar calls #2010

Merged
merged 1 commit into from
Sep 4, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions modin/pandas/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import numpy as np
from numpy import nan
import pandas
from pandas.api.types import is_scalar
from pandas.compat import numpy as numpy_compat
from pandas.core.common import count_not_none, pipe
from pandas.core.dtypes.common import (
Expand All @@ -33,7 +32,7 @@
import pickle as pkl

from modin.error_message import ErrorMessage
from modin.pandas.utils import try_cast_to_pandas
from modin.pandas.utils import try_cast_to_pandas, is_scalar

# Similar to pandas, sentinel value to use as kwarg in place of None when None has
# special meaning and needs to be distinguished from a user explicitly passing None.
Expand Down
3 changes: 2 additions & 1 deletion modin/pandas/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,13 @@

import numpy as np
import pandas
from pandas.api.types import is_scalar, is_list_like, is_bool
from pandas.api.types import is_list_like, is_bool
from pandas.core.dtypes.common import is_integer
from pandas.core.indexing import IndexingError

from .dataframe import DataFrame
from .series import Series
from .utils import is_scalar

"""Indexing Helper Class works as follows:

Expand Down
3 changes: 1 addition & 2 deletions modin/pandas/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,14 @@
from pandas.core.dtypes.common import (
is_dict_like,
is_list_like,
is_scalar,
)
import sys
import warnings

from .base import BasePandasDataset
from .iterator import PartitionIterator
from .utils import _inherit_docstrings
from .utils import from_pandas, to_pandas
from .utils import from_pandas, to_pandas, is_scalar

if sys.version_info[0] == 3 and sys.version_info[1] >= 7:
# Python >= 3.7
Expand Down
27 changes: 27 additions & 0 deletions modin/pandas/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,3 +155,30 @@ def hashable(obj):
except TypeError:
return False
return True


def is_scalar(obj):
"""
Return True if given object is scalar.

This method wrks the same as is_scalar method from Pandas but
it is optimized for Modin frames. For BasePandasDataset objects
Pandas version of is_scalar tries to access missing attribute
causing index scan. This tiggers execution for lazy frames and
we avoid it by handling BasePandasDataset objects separately.

Parameters
----------
val : object
Object to check.

Returns
-------
bool
True if given object is scalar and False otherwise.
"""

from pandas.api.types import is_scalar as pandas_is_scalar
from .base import BasePandasDataset

return not isinstance(obj, BasePandasDataset) and pandas_is_scalar(obj)