From 9c94578c985a18c79a0c9344c67b72d298f8a7ad Mon Sep 17 00:00:00 2001 From: ienkovich Date: Thu, 3 Sep 2020 15:53:20 -0500 Subject: [PATCH] REFACTOR-#2009: avoid index access in is_scalar calls Signed-off-by: ienkovich --- modin/pandas/base.py | 3 +-- modin/pandas/indexing.py | 3 ++- modin/pandas/series.py | 3 +-- modin/pandas/utils.py | 27 +++++++++++++++++++++++++++ 4 files changed, 31 insertions(+), 5 deletions(-) diff --git a/modin/pandas/base.py b/modin/pandas/base.py index 71450b75182..81afa9a5305 100644 --- a/modin/pandas/base.py +++ b/modin/pandas/base.py @@ -15,7 +15,6 @@ import numpy as np from numpy import nan import pandas -from pandas.api.types import is_scalar from pandas.compat import numpy as numpy_compat from pandas.core.common import count_not_none, pipe from pandas.core.dtypes.common import ( @@ -33,7 +32,7 @@ import pickle as pkl from modin.error_message import ErrorMessage -from modin.pandas.utils import try_cast_to_pandas +from modin.pandas.utils import try_cast_to_pandas, is_scalar # Similar to pandas, sentinel value to use as kwarg in place of None when None has # special meaning and needs to be distinguished from a user explicitly passing None. diff --git a/modin/pandas/indexing.py b/modin/pandas/indexing.py index 84d32115622..802acdced5d 100644 --- a/modin/pandas/indexing.py +++ b/modin/pandas/indexing.py @@ -13,12 +13,13 @@ import numpy as np import pandas -from pandas.api.types import is_scalar, is_list_like, is_bool +from pandas.api.types import is_list_like, is_bool from pandas.core.dtypes.common import is_integer from pandas.core.indexing import IndexingError from .dataframe import DataFrame from .series import Series +from .utils import is_scalar """Indexing Helper Class works as follows: diff --git a/modin/pandas/series.py b/modin/pandas/series.py index b9c41f60454..a9140cf8c9b 100644 --- a/modin/pandas/series.py +++ b/modin/pandas/series.py @@ -20,7 +20,6 @@ from pandas.core.dtypes.common import ( is_dict_like, is_list_like, - is_scalar, ) import sys import warnings @@ -28,7 +27,7 @@ from .base import BasePandasDataset from .iterator import PartitionIterator from .utils import _inherit_docstrings -from .utils import from_pandas, to_pandas +from .utils import from_pandas, to_pandas, is_scalar if sys.version_info[0] == 3 and sys.version_info[1] >= 7: # Python >= 3.7 diff --git a/modin/pandas/utils.py b/modin/pandas/utils.py index d2c346d05ff..e21c7617dd5 100644 --- a/modin/pandas/utils.py +++ b/modin/pandas/utils.py @@ -155,3 +155,30 @@ def hashable(obj): except TypeError: return False return True + + +def is_scalar(obj): + """ + Return True if given object is scalar. + + This method wrks the same as is_scalar method from Pandas but + it is optimized for Modin frames. For BasePandasDataset objects + Pandas version of is_scalar tries to access missing attribute + causing index scan. This tiggers execution for lazy frames and + we avoid it by handling BasePandasDataset objects separately. + + Parameters + ---------- + val : object + Object to check. + + Returns + ------- + bool + True if given object is scalar and False otherwise. + """ + + from pandas.api.types import is_scalar as pandas_is_scalar + from .base import BasePandasDataset + + return not isinstance(obj, BasePandasDataset) and pandas_is_scalar(obj)