Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FIX-#2285: Default to pandas warning message improved #2302

Merged
merged 2 commits into from
Oct 23, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions modin/backends/base/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
CatDefault,
GroupByDefault,
)
from modin.error_message import ErrorMessage

from pandas.core.dtypes.common import is_scalar
import pandas.core.resample
Expand All @@ -32,6 +33,7 @@

def _get_axis(axis):
def axis_getter(self):
ErrorMessage.default_to_pandas(f"DataFrame.get_axis({axis})")
return self.to_pandas().axes[axis]

return axis_getter
Expand Down
3 changes: 2 additions & 1 deletion modin/backends/pandas/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,8 @@ def default_to_pandas(self, pandas_op, *args, **kwargs):
----
This operation takes a distributed object and converts it directly to pandas.
"""
ErrorMessage.default_to_pandas(str(pandas_op))
op_name = getattr(pandas_op, "__name__", str(pandas_op))
ErrorMessage.default_to_pandas(op_name)
args = (a.to_pandas() if isinstance(a, type(self)) else a for a in args)
kwargs = {
k: v.to_pandas if isinstance(v, type(self)) else v
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

class BinaryDefault(AnyDefault):
@classmethod
def build_default_to_pandas(cls, fn):
def build_default_to_pandas(cls, fn, fn_name):
def bin_ops_wrapper(df, other, *args, **kwargs):
squeeze_other = kwargs.pop("broadcast", False) or kwargs.pop(
"squeeze_other", False
Expand All @@ -41,7 +41,4 @@ def bin_ops_wrapper(df, other, *args, **kwargs):
result = pandas.DataFrame(result)
return result

def wrapper(self, *args, **kwargs):
return self.default_to_pandas(bin_ops_wrapper, *args, **kwargs)

return wrapper
return super().build_default_to_pandas(bin_ops_wrapper, fn_name)
20 changes: 9 additions & 11 deletions modin/data_management/functions/default_methods/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,13 @@


class DefaultMethod(Function):
OBJECT_TYPE = "DataFrame"

@classmethod
def call(cls, func, **call_kwds):
obj = call_kwds.get("obj_type", pandas.DataFrame)
force_inplace = call_kwds.get("inplace")
fn_name = call_kwds.get("fn_name", getattr(func, "__name__", str(func)))

if isinstance(func, str):
fn = getattr(obj, func)
Expand Down Expand Up @@ -57,28 +60,21 @@ def applyier(df, *args, **kwargs):
inplace = force_inplace
return result if not inplace else df

return cls.build_wrapper(applyier, func)
return cls.build_wrapper(applyier, fn_name)

@classmethod
def register(cls, func, **kwargs):
return cls.call(func, **kwargs)

@classmethod
def build_wrapper(cls, fn, fn_name=None):
wrapper = cls.build_default_to_pandas(fn)
def build_wrapper(cls, fn, fn_name):
wrapper = cls.build_default_to_pandas(fn, fn_name)

def args_cast(self, *args, **kwargs):
args = try_cast_to_pandas(args)
kwargs = try_cast_to_pandas(kwargs)
return wrapper(self, *args, **kwargs)

if fn_name is None:
fn_name = fn.__name__
if not isinstance(fn_name, str):
fn_name = getattr(fn_name, "__name__", repr(fn_name))

# setting proper function name that will be printed in default to pandas warning
args_cast.__name__ = fn_name
return args_cast

@classmethod
Expand All @@ -89,7 +85,9 @@ def property_wrapper(df):
return property_wrapper

@classmethod
def build_default_to_pandas(cls, fn):
def build_default_to_pandas(cls, fn, fn_name):
fn.__name__ = f"<function {cls.OBJECT_TYPE}.{fn_name}>"

def wrapper(self, *args, **kwargs):
return self.default_to_pandas(fn, *args, **kwargs)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,8 @@ def build_groupby(cls, func):


class GroupByDefault(DefaultMethod):
OBJECT_TYPE = "GroupBy"

@classmethod
def register(cls, func, **kwargs):
return cls.call(GroupBy.build_groupby(func), **kwargs)
return cls.call(GroupBy.build_groupby(func), fn_name=func.__name__, **kwargs)
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@ def fn(df, resample_args, *args, **kwargs):


class ResampleDefault(DefaultMethod):
OBJECT_TYPE = "Resampler"

@classmethod
def register(cls, func, squeeze_self=False, **kwargs):
return cls.call(Resampler.build_resample(func, squeeze_self), **kwargs)
return cls.call(
Resampler.build_resample(func, squeeze_self),
fn_name=func.__name__,
**kwargs
)
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ def fn(df, rolling_args, *args, **kwargs):


class RollingDefault(DefaultMethod):
OBJECT_TYPE = "Rolling"

@classmethod
def register(cls, func, **kwargs):
return cls.call(Rolling.build_rolling(func), **kwargs)
return cls.call(Rolling.build_rolling(func), fn_name=func.__name__, **kwargs)
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@


class SeriesDefault(AnyDefault):
OBJECT_TYPE = "Series"

@classmethod
def frame_wrapper(cls, df):
return df.squeeze(axis=1)
27 changes: 27 additions & 0 deletions modin/pandas/test/test_general.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import modin.pandas as pd
import numpy as np
from numpy.testing import assert_array_equal
from modin.utils import get_current_backend

from .utils import test_data_values, test_data_keys, df_equals

Expand Down Expand Up @@ -437,3 +438,29 @@ def test_to_pandas_indices():
assert md_df.axes[axis].equal_levels(
pd_df.axes[axis]
), f"Levels of indices at axis {axis} are different!"


@pytest.mark.skipif(
get_current_backend() != "BaseOnPython",
reason="This test make sense only on BaseOnPython backend.",
)
@pytest.mark.parametrize(
"func, regex",
[
(lambda df: df.mean(level=0), r"DataFrame\.mean"),
(lambda df: df + df, r"DataFrame\.add"),
(lambda df: df.index, r"DataFrame\.get_axis\(0\)"),
(
lambda df: df.drop(columns="col1").squeeze().repeat(2),
r"Series\.repeat",
),
(lambda df: df.groupby("col1").prod(), r"GroupBy\.prod"),
(lambda df: df.rolling(1).count(), r"Rolling\.count"),
],
)
def test_default_to_pandas_warning_message(func, regex):
data = {"col1": [1, 2, 3], "col2": [4, 5, 6]}
df = pd.DataFrame(data)

with pytest.warns(UserWarning, match=regex):
func(df)