Skip to content

Commit

Permalink
DEPR: allowing subclass-specific keywords in pd.Index.__new__ (pandas…
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and luckyvs1 committed Jan 20, 2021
1 parent eb6a2d1 commit e3eafc4
Show file tree
Hide file tree
Showing 17 changed files with 112 additions and 61 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ Other API changes

Deprecations
~~~~~~~~~~~~

- Deprecated allowing subclass-specific keyword arguments in the :class:`Index` constructor, use the specific subclass directly instead (:issue:`14093`,:issue:`21311`,:issue:`22315`,:issue:`26974`)
-
-

Expand Down
9 changes: 9 additions & 0 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,15 @@ def __new__(
cls, data=None, dtype=None, copy=False, name=None, tupleize_cols=True, **kwargs
) -> "Index":

if kwargs:
warnings.warn(
"Passing keywords other than 'data', 'dtype', 'copy', 'name', "
"'tupleize_cols' is deprecated and will raise TypeError in a "
"future version. Use the specific Index subclass directly instead",
FutureWarning,
stacklevel=2,
)

from pandas.core.indexes.range import RangeIndex

name = maybe_extract_name(name, data, cls)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def __array_wrap__(self, result, context=None):
if not is_period_dtype(self.dtype) and attrs["freq"]:
# no need to infer if freq is None
attrs["freq"] = "infer"
return Index(result, **attrs)
return type(self)(result, **attrs)

# ------------------------------------------------------------------------

Expand Down
8 changes: 6 additions & 2 deletions pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -1966,6 +1966,10 @@ def _asfreq_compat(index, freq):
new_index: Index
if isinstance(index, PeriodIndex):
new_index = index.asfreq(freq=freq)
else:
new_index = Index([], dtype=index.dtype, freq=freq, name=index.name)
elif isinstance(index, DatetimeIndex):
new_index = DatetimeIndex([], dtype=index.dtype, freq=freq, name=index.name)
elif isinstance(index, TimedeltaIndex):
new_index = TimedeltaIndex([], dtype=index.dtype, freq=freq, name=index.name)
else: # pragma: no cover
raise TypeError(type(index))
return new_index
63 changes: 39 additions & 24 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from typing import (
TYPE_CHECKING,
Any,
Callable,
Dict,
List,
Optional,
Expand Down Expand Up @@ -2045,15 +2046,19 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
if self.freq is not None:
kwargs["freq"] = _ensure_decoded(self.freq)

factory: Union[Type[Index], Type[DatetimeIndex]] = Index
if is_datetime64_dtype(values.dtype) or is_datetime64tz_dtype(values.dtype):
factory = DatetimeIndex

# making an Index instance could throw a number of different errors
try:
new_pd_index = Index(values, **kwargs)
new_pd_index = factory(values, **kwargs)
except ValueError:
# if the output freq is different that what we recorded,
# it should be None (see also 'doc example part 2')
if "freq" in kwargs:
kwargs["freq"] = None
new_pd_index = Index(values, **kwargs)
new_pd_index = factory(values, **kwargs)

new_pd_index = _set_tz(new_pd_index, self.tz)
return new_pd_index, new_pd_index
Expand Down Expand Up @@ -2736,8 +2741,14 @@ def _alias_to_class(self, alias):
return alias
return self._reverse_index_map.get(alias, Index)

def _get_index_factory(self, klass):
if klass == DatetimeIndex:
def _get_index_factory(self, attrs):
index_class = self._alias_to_class(
_ensure_decoded(getattr(attrs, "index_class", ""))
)

factory: Callable

if index_class == DatetimeIndex:

def f(values, freq=None, tz=None):
# data are already in UTC, localize and convert if tz present
Expand All @@ -2747,16 +2758,34 @@ def f(values, freq=None, tz=None):
result = result.tz_localize("UTC").tz_convert(tz)
return result

return f
elif klass == PeriodIndex:
factory = f
elif index_class == PeriodIndex:

def f(values, freq=None, tz=None):
parr = PeriodArray._simple_new(values, freq=freq)
return PeriodIndex._simple_new(parr, name=None)

return f
factory = f
else:
factory = index_class

kwargs = {}
if "freq" in attrs:
kwargs["freq"] = attrs["freq"]
if index_class is Index:
# DTI/PI would be gotten by _alias_to_class
factory = TimedeltaIndex

if "tz" in attrs:
if isinstance(attrs["tz"], bytes):
# created by python2
kwargs["tz"] = attrs["tz"].decode("utf-8")
else:
# created by python3
kwargs["tz"] = attrs["tz"]
assert index_class is DatetimeIndex # just checking

return klass
return factory, kwargs

def validate_read(self, columns, where):
"""
Expand Down Expand Up @@ -2928,22 +2957,8 @@ def read_index_node(
name = _ensure_str(node._v_attrs.name)
name = _ensure_decoded(name)

index_class = self._alias_to_class(
_ensure_decoded(getattr(node._v_attrs, "index_class", ""))
)
factory = self._get_index_factory(index_class)

kwargs = {}
if "freq" in node._v_attrs:
kwargs["freq"] = node._v_attrs["freq"]

if "tz" in node._v_attrs:
if isinstance(node._v_attrs["tz"], bytes):
# created by python2
kwargs["tz"] = node._v_attrs["tz"].decode("utf-8")
else:
# created by python3
kwargs["tz"] = node._v_attrs["tz"]
attrs = node._v_attrs
factory, kwargs = self._get_index_factory(attrs)

if kind == "date":
index = factory(
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/groupby/test_grouping.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,8 @@ def test_grouper_creation_bug(self):
)
result = s.groupby(pd.Grouper(level="three", freq="M")).sum()
expected = Series(
[28], index=Index([Timestamp("2013-01-31")], freq="M", name="three")
[28],
index=pd.DatetimeIndex([Timestamp("2013-01-31")], freq="M", name="three"),
)
tm.assert_series_equal(result, expected)

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/groupby/test_quantile.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ def test_columns_groupby_quantile():
[9.6, 8.4, 10.6, 9.4],
],
index=list("XYZ"),
columns=Index(
columns=pd.MultiIndex.from_tuples(
[("A", 0.8), ("A", 0.2), ("B", 0.8), ("B", 0.2)], names=["col", None]
),
)
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/indexes/base_class/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import pytest

from pandas import Index, MultiIndex
import pandas._testing as tm


class TestIndexConstructor:
Expand Down Expand Up @@ -29,7 +30,8 @@ def test_construction_list_mixed_tuples(self, index_vals):
def test_constructor_wrong_kwargs(self):
# GH #19348
with pytest.raises(TypeError, match="Unexpected keyword arguments {'foo'}"):
Index([], foo="bar")
with tm.assert_produces_warning(FutureWarning):
Index([], foo="bar")

@pytest.mark.xfail(reason="see GH#21311: Index doesn't enforce dtype argument")
def test_constructor_cast(self):
Expand Down
8 changes: 6 additions & 2 deletions pandas/tests/indexes/categorical/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,10 +129,14 @@ def test_construction_with_categorical_dtype(self):
CategoricalIndex(data, categories=cats, dtype=dtype)

with pytest.raises(ValueError, match=msg):
Index(data, categories=cats, dtype=dtype)
with tm.assert_produces_warning(FutureWarning):
# passing subclass-specific kwargs to pd.Index
Index(data, categories=cats, dtype=dtype)

with pytest.raises(ValueError, match=msg):
CategoricalIndex(data, ordered=ordered, dtype=dtype)

with pytest.raises(ValueError, match=msg):
Index(data, ordered=ordered, dtype=dtype)
with tm.assert_produces_warning(FutureWarning):
# passing subclass-specific kwargs to pd.Index
Index(data, ordered=ordered, dtype=dtype)
26 changes: 15 additions & 11 deletions pandas/tests/indexes/datetimes/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,9 @@ def test_construction_index_with_mixed_timezones_with_NaT(self):
assert result.tz is None

# all NaT with tz
result = Index([pd.NaT, pd.NaT], tz="Asia/Tokyo", name="idx")
with tm.assert_produces_warning(FutureWarning):
# subclass-specific kwargs to pd.Index
result = Index([pd.NaT, pd.NaT], tz="Asia/Tokyo", name="idx")
exp = DatetimeIndex([pd.NaT, pd.NaT], tz="Asia/Tokyo", name="idx")

tm.assert_index_equal(result, exp, exact=True)
Expand Down Expand Up @@ -462,16 +464,18 @@ def test_construction_dti_with_mixed_timezones(self):
with pytest.raises(ValueError, match=msg):
# passing tz should results in DatetimeIndex, then mismatch raises
# TypeError
Index(
[
pd.NaT,
Timestamp("2011-01-01 10:00"),
pd.NaT,
Timestamp("2011-01-02 10:00", tz="US/Eastern"),
],
tz="Asia/Tokyo",
name="idx",
)
with tm.assert_produces_warning(FutureWarning):
# subclass-specific kwargs to pd.Index
Index(
[
pd.NaT,
Timestamp("2011-01-01 10:00"),
pd.NaT,
Timestamp("2011-01-02 10:00", tz="US/Eastern"),
],
tz="Asia/Tokyo",
name="idx",
)

def test_construction_base_constructor(self):
arr = [Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-03")]
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/indexes/interval/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ class ConstructorTests:
get_kwargs_from_breaks to the expected format.
"""

@pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning")
@pytest.mark.parametrize(
"breaks",
[
Expand Down Expand Up @@ -80,6 +81,7 @@ def test_constructor_dtype(self, constructor, breaks, subtype):
result = constructor(dtype=dtype, **result_kwargs)
tm.assert_index_equal(result, expected)

@pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning")
@pytest.mark.parametrize("breaks", [[np.nan] * 2, [np.nan] * 4, [np.nan] * 50])
def test_constructor_nan(self, constructor, breaks, closed):
# GH 18421
Expand All @@ -93,6 +95,7 @@ def test_constructor_nan(self, constructor, breaks, closed):
assert result.dtype.subtype == expected_subtype
tm.assert_numpy_array_equal(np.array(result), expected_values)

@pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning")
@pytest.mark.parametrize(
"breaks",
[
Expand Down Expand Up @@ -378,6 +381,7 @@ def test_constructor_errors(self, constructor):
with pytest.raises(TypeError, match=msg):
constructor([0, 1])

@pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning")
@pytest.mark.parametrize(
"data, closed",
[
Expand Down
10 changes: 8 additions & 2 deletions pandas/tests/indexes/multi/test_equivalence.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,10 +185,16 @@ def test_identical(idx):
mi2 = mi2.set_names(["new1", "new2"])
assert mi.identical(mi2)

mi3 = Index(mi.tolist(), names=mi.names)
with tm.assert_produces_warning(FutureWarning):
# subclass-specific keywords to pd.Index
mi3 = Index(mi.tolist(), names=mi.names)

msg = r"Unexpected keyword arguments {'names'}"
with pytest.raises(TypeError, match=msg):
Index(mi.tolist(), names=mi.names, tupleize_cols=False)
with tm.assert_produces_warning(FutureWarning):
# subclass-specific keywords to pd.Index
Index(mi.tolist(), names=mi.names, tupleize_cols=False)

mi4 = Index(mi.tolist(), tupleize_cols=False)
assert mi.identical(mi3)
assert not mi.identical(mi4)
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/indexes/multi/test_names.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,9 @@ def test_take_preserve_name(idx):
def test_copy_names():
# Check that adding a "names" parameter to the copy is honored
# GH14302
multi_idx = pd.Index([(1, 2), (3, 4)], names=["MyName1", "MyName2"])
with tm.assert_produces_warning(FutureWarning):
# subclass-specific kwargs to pd.Index
multi_idx = pd.Index([(1, 2), (3, 4)], names=["MyName1", "MyName2"])
multi_idx1 = multi_idx.copy()

assert multi_idx.equals(multi_idx1)
Expand Down
2 changes: 2 additions & 0 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,7 @@ def test_constructor_dtypes_to_timedelta(self, cast_index, vals):
index = Index(vals)
assert isinstance(index, TimedeltaIndex)

@pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning")
@pytest.mark.parametrize("attr", ["values", "asi8"])
@pytest.mark.parametrize("klass", [Index, DatetimeIndex])
def test_constructor_dtypes_datetime(self, tz_naive_fixture, attr, klass):
Expand Down Expand Up @@ -2255,6 +2256,7 @@ def test_index_subclass_constructor_wrong_kwargs(index_maker):
index_maker(foo="bar")


@pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning")
def test_deprecated_fastpath():
msg = "[Uu]nexpected keyword argument"
with pytest.raises(TypeError, match=msg):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexes/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def test_index_groupby(self):
idx.groupby(to_groupby), {1.0: idx[[0, 5]], 2.0: idx[[1, 4]]}
)

to_groupby = Index(
to_groupby = pd.DatetimeIndex(
[
datetime(2011, 11, 1),
datetime(2011, 12, 1),
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/indexing/test_coercion.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,7 +506,9 @@ def test_insert_index_period(self, insert, coerced_val, coerced_dtype):
else:
msg = r"Unexpected keyword arguments {'freq'}"
with pytest.raises(TypeError, match=msg):
pd.Index(data, freq="M")
with tm.assert_produces_warning(FutureWarning):
# passing keywords to pd.Index
pd.Index(data, freq="M")

def test_insert_index_complex128(self):
pytest.xfail("Test not implemented")
Expand Down
20 changes: 8 additions & 12 deletions pandas/tests/io/json/test_json_table_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -705,18 +705,14 @@ def test_read_json_table_orient_raises(self, index_nm, vals, recwarn):
"idx",
[
pd.Index(range(4)),
pd.Index(
pd.date_range(
"2020-08-30",
freq="d",
periods=4,
),
freq=None,
),
pd.Index(
pd.date_range("2020-08-30", freq="d", periods=4, tz="US/Central"),
freq=None,
),
pd.date_range(
"2020-08-30",
freq="d",
periods=4,
)._with_freq(None),
pd.date_range(
"2020-08-30", freq="d", periods=4, tz="US/Central"
)._with_freq(None),
pd.MultiIndex.from_product(
[
pd.date_range("2020-08-30", freq="d", periods=2, tz="US/Central"),
Expand Down

0 comments on commit e3eafc4

Please sign in to comment.