diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5ad82d0da95fa..a1af806e5cb9e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -82,8 +82,10 @@ from pandas import compat from pandas.compat import PY36 from pandas.compat.numpy import function as nv -from pandas.util._decorators import Appender, Substitution -from pandas.util._validators import validate_bool_kwarg +from pandas.util._decorators import (Appender, Substitution, + rewrite_axis_style_signature) +from pandas.util._validators import (validate_bool_kwarg, + validate_axis_style_args) from pandas.core.indexes.period import PeriodIndex from pandas.core.indexes.datetimes import DatetimeIndex @@ -2917,12 +2919,19 @@ def align(self, other, join='outer', axis=None, level=None, copy=True, broadcast_axis=broadcast_axis) @Appender(_shared_docs['reindex'] % _shared_doc_kwargs) - def reindex(self, labels=None, index=None, columns=None, axis=None, - **kwargs): - axes = self._validate_axis_style_args(labels, 'labels', - axes=[index, columns], - axis=axis, method_name='reindex') + @rewrite_axis_style_signature('labels', [('method', None), + ('copy', True), + ('level', None), + ('fill_value', np.nan), + ('limit', None), + ('tolerance', None)]) + def reindex(self, *args, **kwargs): + axes = validate_axis_style_args(self, args, kwargs, 'labels', + 'reindex') kwargs.update(axes) + # Pop these, since the values are in `kwargs` under different names + kwargs.pop('axis', None) + kwargs.pop('labels', None) return super(DataFrame, self).reindex(**kwargs) @Appender(_shared_docs['reindex_axis'] % _shared_doc_kwargs) @@ -2933,8 +2942,10 @@ def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True, method=method, level=level, copy=copy, limit=limit, fill_value=fill_value) - def rename(self, mapper=None, index=None, columns=None, axis=None, - **kwargs): + @rewrite_axis_style_signature('mapper', [('copy', True), + ('inplace', False), + ('level', None)]) + def rename(self, *args, **kwargs): """Alter axes labels. Function / dict values must be unique (1-to-1). Labels not contained in @@ -3008,10 +3019,11 @@ def rename(self, mapper=None, index=None, columns=None, axis=None, 2 2 5 4 3 6 """ - axes = self._validate_axis_style_args(mapper, 'mapper', - axes=[index, columns], - axis=axis, method_name='rename') + axes = validate_axis_style_args(self, args, kwargs, 'mapper', 'rename') kwargs.update(axes) + # Pop these, since the values are in `kwargs` under different names + kwargs.pop('axis', None) + kwargs.pop('mapper', None) return super(DataFrame, self).rename(**kwargs) @Appender(_shared_docs['fillna'] % _shared_doc_kwargs) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 3e5828fc59932..118e7d5cd437b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -28,7 +28,7 @@ from pandas.core.dtypes.cast import maybe_promote, maybe_upcast_putmask from pandas.core.dtypes.missing import isna, notna from pandas.core.dtypes.generic import ABCSeries, ABCPanel, ABCDataFrame -from pandas.core.common import (_all_not_none, _count_not_none, +from pandas.core.common import (_count_not_none, _maybe_box_datetimelike, _values_from_object, AbstractMethodError, SettingWithCopyError, SettingWithCopyWarning) @@ -728,51 +728,6 @@ def swaplevel(self, i=-2, j=-1, axis=0): result._data.set_axis(axis, labels.swaplevel(i, j)) return result - def _validate_axis_style_args(self, arg, arg_name, axes, - axis, method_name): - out = {} - for i, value in enumerate(axes): - if value is not None: - out[self._AXIS_NAMES[i]] = value - - aliases = ', '.join(self._AXIS_NAMES.values()) - if axis is not None: - # Using "axis" style, along with a positional arg - # Both index and columns should be None then - axis = self._get_axis_name(axis) - if any(x is not None for x in axes): - msg = ( - "Can't specify both 'axis' and {aliases}. " - "Specify either\n" - "\t.{method_name}({arg_name}, axis=axis), or\n" - "\t.{method_name}(index=index, columns=columns)" - ).format(arg_name=arg_name, method_name=method_name, - aliases=aliases) - raise TypeError(msg) - out[axis] = arg - - elif _all_not_none(arg, *axes): - msg = ( - "Cannot specify all of '{arg_name}', {aliases}. " - "Specify either {arg_name} and 'axis', or {aliases}." - ).format(arg_name=arg_name, aliases=aliases) - raise TypeError(msg) - - elif _all_not_none(arg, axes[0]): - # This is the "ambiguous" case, so emit a warning - msg = ( - "Interpreting call to '.{method_name}(a, b)' as " - "'.{method_name}(index=a, columns=b)'. " # TODO - "Use keyword arguments to remove any ambiguity." - ).format(method_name=method_name) - warnings.warn(msg, stacklevel=3) - out[self._AXIS_ORDERS[0]] = arg - out[self._AXIS_ORDERS[1]] = axes[0] - elif axes[0] is None: - # This is for the default axis, like reindex([0, 1]) - out[self._AXIS_ORDERS[0]] = arg - return out - # ---------------------------------------------------------------------- # Rename diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 997dd9c8e0f67..d2b513367fc78 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -35,6 +35,7 @@ from pandas.core.series import Series from pandas.core.reshape.util import cartesian_product from pandas.util._decorators import (deprecate, Appender) +from pandas.util._validators import validate_axis_style_args _shared_doc_kwargs = dict( axes='items, major_axis, minor_axis', @@ -1199,20 +1200,27 @@ def _wrap_result(self, result, axis): return self._construct_return_type(result, axes) @Appender(_shared_docs['reindex'] % _shared_doc_kwargs) - def reindex(self, labels=None, - items=None, major_axis=None, minor_axis=None, - axis=None, **kwargs): - major_axis = (major_axis if major_axis is not None else - kwargs.pop('major', None)) - minor_axis = (minor_axis if minor_axis is not None else - kwargs.pop('minor', None)) - axes = self._validate_axis_style_args( - labels, 'labels', axes=[items, major_axis, minor_axis], - axis=axis, method_name='reindex') + def reindex(self, *args, **kwargs): + major = kwargs.pop("major", None) + minor = kwargs.pop('minor', None) + + if major is not None: + if kwargs.get("major_axis"): + raise TypeError("Cannot specify both 'major' and 'major_axis'") + kwargs['major_axis'] = major + if minor is not None: + if kwargs.get("minor_axis"): + raise TypeError("Cannot specify both 'minor' and 'minor_axis'") + + kwargs['minor_axis'] = minor + axes = validate_axis_style_args(self, args, kwargs, 'labels', + 'reindex') if self.ndim >= 4: # Hack for PanelND axes = {} kwargs.update(axes) + kwargs.pop('axis', None) + kwargs.pop('labels', None) return super(Panel, self).reindex(**kwargs) @Appender(_shared_docs['rename'] % _shared_doc_kwargs) diff --git a/pandas/core/panel4d.py b/pandas/core/panel4d.py index e6914fb268359..0fac720302cfb 100644 --- a/pandas/core/panel4d.py +++ b/pandas/core/panel4d.py @@ -1,8 +1,11 @@ """ Panel4D: a 4-d dict like collection of panels """ import warnings +from pandas.core.generic import NDFrame from pandas.core.panelnd import create_nd_panel_factory from pandas.core.panel import Panel +from pandas.util._validators import validate_axis_style_args + Panel4D = create_nd_panel_factory(klass_name='Panel4D', orders=['labels', 'items', 'major_axis', @@ -62,12 +65,34 @@ def panel4d_reindex(self, labs=None, labels=None, items=None, major_axis=None, # Hack for reindex_axis deprecation # Ha, we used labels for two different things # I think this will work still. - axes = self._validate_axis_style_args( - labs, 'labels', - axes=[labels, items, major_axis, minor_axis], - axis=axis, method_name='reindex') + if labs is None: + args = () + else: + args = (labs,) + kwargs_ = dict(labels=labels, + items=items, + major_axis=major_axis, + minor_axis=minor_axis, + axis=axis) + kwargs_ = {k: v for k, v in kwargs_.items() if v is not None} + # major = kwargs.pop("major", None) + # minor = kwargs.pop('minor', None) + + # if major is not None: + # if kwargs.get("major_axis"): + # raise TypeError("Cannot specify both 'major' and 'major_axis'") + # kwargs_['major_axis'] = major + # if minor is not None: + # if kwargs.get("minor_axis"): + # raise TypeError("Cannot specify both 'minor' and 'minor_axis'") + # kwargs_['minor_axis'] = minor + + if axis is not None: + kwargs_['axis'] = axis + + axes = validate_axis_style_args(self, args, kwargs_, 'labs', 'reindex') kwargs.update(axes) - return super(Panel, self).reindex(**kwargs) + return NDFrame.reindex(self, **kwargs) Panel4D.__init__ = panel4d_init diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 84f7dd108f2cb..e7ea3f9c62540 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -2,13 +2,14 @@ from __future__ import print_function +import inspect import pytest from datetime import datetime, timedelta import numpy as np -from pandas.compat import lrange +from pandas.compat import lrange, PY2 from pandas import (DataFrame, Series, Index, MultiIndex, RangeIndex, date_range, IntervalIndex, to_datetime) @@ -887,6 +888,9 @@ def test_rename_axis_style(self): result = df.rename({'X': 'x', 'Y': 'y'}, axis='index') assert_frame_equal(result, expected) + result = df.rename(mapper=str.lower, axis='index') + assert_frame_equal(result, expected) + def test_rename_mapper_multi(self): df = pd.DataFrame({"A": ['a', 'b'], "B": ['c', 'd'], 'C': [1, 2]}).set_index(["A", "B"]) @@ -929,6 +933,10 @@ def test_rename_axis_style_raises(self): with tm.assert_raises_regex(TypeError, None): df.rename(str.lower, str.lower, str.lower) + # Duplicates + with tm.assert_raises_regex(TypeError, "multiple values"): + df.rename(id, mapper=id) + def test_reindex_api_equivalence(self): # equivalence of the labels/axis and index/columns API's df = DataFrame([[1, 2, 3], [3, 4, 5], [5, 6, 7]], @@ -956,6 +964,17 @@ def test_reindex_api_equivalence(self): for res in [res2, res3]: tm.assert_frame_equal(res1, res) + def test_rename_positional(self): + df = pd.DataFrame(columns=['A', 'B']) + with tm.assert_produces_warning(FutureWarning) as rec: + result = df.rename(None, str.lower) + expected = pd.DataFrame(columns=['a', 'b']) + assert_frame_equal(result, expected) + assert len(rec) == 1 + message = str(rec[0].message) + assert 'rename' in message + assert 'Use named arguments' in message + def test_assign_columns(self): self.frame['hi'] = 'there' @@ -981,12 +1000,27 @@ def test_set_index_preserve_categorical_dtype(self): def test_ambiguous_warns(self): df = pd.DataFrame({"A": [1, 2]}) - with tm.assert_produces_warning(UserWarning): + with tm.assert_produces_warning(FutureWarning): df.rename(id, id) - with tm.assert_produces_warning(UserWarning): + with tm.assert_produces_warning(FutureWarning): df.rename({0: 10}, {"A": "B"}) + @pytest.mark.skipif(PY2, reason="inspect.signature") + def test_rename_signature(self): + sig = inspect.signature(pd.DataFrame.rename) + parameters = set(sig.parameters) + assert parameters == {"self", "mapper", "index", "columns", "axis", + "inplace", "copy", "level"} + + @pytest.mark.skipif(PY2, reason="inspect.signature") + def test_reindex_signature(self): + sig = inspect.signature(pd.DataFrame.reindex) + parameters = set(sig.parameters) + assert parameters == {"self", "labels", "index", "columns", "axis", + "limit", "copy", "level", "method", + "fill_value", "tolerance"} + class TestIntervalIndex(object): diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index bf96818edf04e..1e2f630401c89 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -468,7 +468,7 @@ def test_reindex_positional_warns(self): df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) expected = pd.DataFrame({"A": [1., 2], 'B': [4., 5], "C": [np.nan, np.nan]}) - with tm.assert_produces_warning(UserWarning): + with tm.assert_produces_warning(FutureWarning): result = df.reindex([0, 1], ['A', 'B', 'C']) assert_frame_equal(result, expected) @@ -476,34 +476,38 @@ def test_reindex_positional_warns(self): def test_reindex_axis_style_raises(self): # https://github.com/pandas-dev/pandas/issues/12392 df = pd.DataFrame({"A": [1, 2, 3], 'B': [4, 5, 6]}) - with tm.assert_raises_regex(TypeError, 'reindex'): + with tm.assert_raises_regex(TypeError, "Cannot specify both 'axis'"): df.reindex([0, 1], ['A'], axis=1) - with tm.assert_raises_regex(TypeError, 'reindex'): + with tm.assert_raises_regex(TypeError, "Cannot specify both 'axis'"): df.reindex([0, 1], ['A'], axis='index') - with tm.assert_raises_regex(TypeError, 'reindex'): + with tm.assert_raises_regex(TypeError, "Cannot specify both 'axis'"): df.reindex(index=[0, 1], axis='index') - with tm.assert_raises_regex(TypeError, 'reindex'): + with tm.assert_raises_regex(TypeError, "Cannot specify both 'axis'"): df.reindex(index=[0, 1], axis='columns') - with tm.assert_raises_regex(TypeError, 'reindex'): + with tm.assert_raises_regex(TypeError, "Cannot specify both 'axis'"): df.reindex(columns=[0, 1], axis='columns') - with tm.assert_raises_regex(TypeError, 'reindex'): + with tm.assert_raises_regex(TypeError, "Cannot specify both 'axis'"): df.reindex(index=[0, 1], columns=[0, 1], axis='columns') with tm.assert_raises_regex(TypeError, 'Cannot specify all'): df.reindex([0, 1], [0], ['A']) # Mixing styles - with tm.assert_raises_regex(TypeError, 'reindex'): + with tm.assert_raises_regex(TypeError, "Cannot specify both 'axis'"): df.reindex(index=[0, 1], axis='index') - with tm.assert_raises_regex(TypeError, 'reindex'): + with tm.assert_raises_regex(TypeError, "Cannot specify both 'axis'"): df.reindex(index=[0, 1], axis='columns') + # Duplicates + with tm.assert_raises_regex(TypeError, "multiple values"): + df.reindex([0, 1], labels=[0, 1]) + def test_reindex_single_named_indexer(self): # https://github.com/pandas-dev/pandas/issues/12392 df = pd.DataFrame({"A": [1, 2, 3], "B": [1, 2, 3]}) @@ -532,7 +536,7 @@ def test_reindex_api_equivalence(self): for res in [res2, res3]: tm.assert_frame_equal(res1, res) - with tm.assert_produces_warning(UserWarning) as m: + with tm.assert_produces_warning(FutureWarning) as m: res1 = df.reindex(['b', 'a'], ['e', 'd']) assert 'reindex' in str(m[0].message) res2 = df.reindex(columns=['e', 'd'], index=['b', 'a']) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index e845fcac33323..33fb6f1108bf2 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1424,6 +1424,11 @@ def test_reindex(self): result = self.panel.reindex(minor=new_minor) assert_frame_equal(result['ItemB'], ref.reindex(columns=new_minor)) + # raise exception put both major and major_axis + pytest.raises(Exception, self.panel.reindex, + minor_axis=new_minor, + minor=new_minor) + # this ok result = self.panel.reindex() assert_panel_equal(result, self.panel) @@ -1460,6 +1465,9 @@ def test_reindex_axis_style(self): result = panel.reindex([0, 1], axis=2) assert_panel_equal(result, expected2) + result = panel.reindex([0, 1], axis=2) + assert_panel_equal(result, expected2) + def test_reindex_multi(self): with catch_warnings(record=True): diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py index 9e4e5515a292b..7c9250e52d482 100644 --- a/pandas/util/_decorators.py +++ b/pandas/util/_decorators.py @@ -1,5 +1,6 @@ -from pandas.compat import callable, signature +from pandas.compat import callable, signature, PY2 from pandas._libs.properties import cache_readonly # noqa +import inspect import types import warnings from textwrap import dedent @@ -119,6 +120,31 @@ def wrapper(*args, **kwargs): return _deprecate_kwarg +def rewrite_axis_style_signature(name, extra_params): + def decorate(func): + @wraps(func) + def wrapper(*args, **kwargs): + return func(*args, **kwargs) + + if not PY2: + kind = inspect.Parameter.POSITIONAL_OR_KEYWORD + params = [ + inspect.Parameter('self', kind), + inspect.Parameter(name, kind, default=None), + inspect.Parameter('index', kind, default=None), + inspect.Parameter('columns', kind, default=None), + inspect.Parameter('axis', kind, default=None), + ] + + for pname, default in extra_params: + params.append(inspect.Parameter(pname, kind, default=default)) + + sig = inspect.Signature(params) + + func.__signature__ = sig + return wrapper + return decorate + # Substitution and Appender are derived from matplotlib.docstring (1.1.0) # module http://matplotlib.org/users/license.html diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index 2661e4a98aedf..728db6af5558b 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -2,6 +2,7 @@ Module that contains many useful utilities for validating data or function arguments """ +import warnings from pandas.core.dtypes.common import is_bool @@ -224,3 +225,98 @@ def validate_bool_kwarg(value, arg_name): 'type {typ}.'.format(arg=arg_name, typ=type(value).__name__)) return value + + +def validate_axis_style_args(data, args, kwargs, arg_name, method_name): + """Argument handler for mixed index, columns / axis functions + + In an attempt to handle both `.method(index, columns)`, and + `.method(arg, axis=.)`, we have to do some bad things to argument + parsing. This translates all arguments to `{index=., columns=.}` style. + + Parameters + ---------- + data : DataFrame or Panel + arg : tuple + All positional arguments from the user + kwargs : dict + All keyword arguments from the user + arg_name, method_name : str + Used for better error messages + + Returns + ------- + kwargs : dict + A dictionary of keyword arguments. Doesn't modify ``kwargs`` + inplace, so update them with the return value here. + + Examples + -------- + >>> df._validate_axis_style_args((str.upper,), {'columns': id}, + ... 'mapper', 'rename') + {'columns': , 'index': } + + This emits a warning + >>> df._validate_axis_style_args((str.upper, id), {}, + ... 'mapper', 'rename') + {'columns': , 'index': } + """ + # TODO(PY3): Change to keyword-only args and remove all this + + out = {} + # Goal: fill 'out' with index/columns-style arguments + # like out = {'index': foo, 'columns': bar} + + # Start by validating for consistency + if 'axis' in kwargs and any(x in kwargs for x in data._AXIS_NUMBERS): + msg = "Cannot specify both 'axis' and any of 'index' or 'columns'." + raise TypeError(msg) + + # First fill with explicit values provided by the user... + if arg_name in kwargs: + if args: + msg = ("{} got multiple values for argument " + "'{}'".format(method_name, arg_name)) + raise TypeError(msg) + + axis = data._get_axis_name(kwargs.get('axis', 0)) + out[axis] = kwargs[arg_name] + + # More user-provided arguments, now from kwargs + for k, v in kwargs.items(): + try: + ax = data._get_axis_name(k) + except ValueError: + pass + else: + out[ax] = v + + # All user-provided kwargs have been handled now. + # Now we supplement with positional arguments, emmitting warnings + # when there's ambiguity and raising when there's conflicts + + if len(args) == 0: + pass # It's up to the function to decide if this is valid + elif len(args) == 1: + axis = data._get_axis_name(kwargs.get('axis', 0)) + out[axis] = args[0] + elif len(args) == 2: + if 'axis' in kwargs: + # Unambiguously wrong + msg = ("Cannot specify both 'axis' and any of 'index' " + "or 'columns'") + raise TypeError(msg) + + msg = ("Intepreting call\n\t'.{method_name}(a, b)' as " + "\n\t'.{method_name}(index=a, columns=b)'.\nUse named " + "arguments to remove any ambiguity. In the future, using " + "positional arguments for 'index' or 'columns' will raise " + " a 'TypeError'.") + warnings.warn(msg.format(method_name=method_name,), FutureWarning, + stacklevel=4) + out[data._AXIS_NAMES[0]] = args[0] + out[data._AXIS_NAMES[1]] = args[1] + else: + msg = "Cannot specify all of '{}', 'index', 'columns'." + raise TypeError(msg.format(arg_name)) + return out