DEPR: allowing subclass-specific keywords in pd.Index.__new__ (pandas…

…-dev#38597)
luckyvs1 · Jan 20, 2021 · e3eafc4 · e3eafc4
1 parent eb6a2d1
commit e3eafc4
Show file tree

Hide file tree

Showing 17 changed files with 112 additions and 61 deletions.
diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
@@ -143,7 +143,7 @@ Other API changes
 
 Deprecations
 ~~~~~~~~~~~~
-
+- Deprecated allowing subclass-specific keyword arguments in the :class:`Index` constructor, use the specific subclass directly instead (:issue:`14093`,:issue:`21311`,:issue:`22315`,:issue:`26974`)
 -
 -
 

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -252,6 +252,15 @@ def __new__(
         cls, data=None, dtype=None, copy=False, name=None, tupleize_cols=True, **kwargs
     ) -> "Index":
 
+        if kwargs:
+            warnings.warn(
+                "Passing keywords other than 'data', 'dtype', 'copy', 'name', "
+                "'tupleize_cols' is deprecated and will raise TypeError in a "
+                "future version.  Use the specific Index subclass directly instead",
+                FutureWarning,
+                stacklevel=2,
+            )
+
         from pandas.core.indexes.range import RangeIndex
 
         name = maybe_extract_name(name, data, cls)

diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
@@ -147,7 +147,7 @@ def __array_wrap__(self, result, context=None):
         if not is_period_dtype(self.dtype) and attrs["freq"]:
             # no need to infer if freq is None
             attrs["freq"] = "infer"
-        return Index(result, **attrs)
+        return type(self)(result, **attrs)
 
     # ------------------------------------------------------------------------
 

diff --git a/pandas/core/resample.py b/pandas/core/resample.py
@@ -1966,6 +1966,10 @@ def _asfreq_compat(index, freq):
     new_index: Index
     if isinstance(index, PeriodIndex):
         new_index = index.asfreq(freq=freq)
-    else:
-        new_index = Index([], dtype=index.dtype, freq=freq, name=index.name)
+    elif isinstance(index, DatetimeIndex):
+        new_index = DatetimeIndex([], dtype=index.dtype, freq=freq, name=index.name)
+    elif isinstance(index, TimedeltaIndex):
+        new_index = TimedeltaIndex([], dtype=index.dtype, freq=freq, name=index.name)
+    else:  # pragma: no cover
+        raise TypeError(type(index))
     return new_index
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
@@ -12,6 +12,7 @@
 from typing import (
     TYPE_CHECKING,
     Any,
+    Callable,
     Dict,
     List,
     Optional,
@@ -2045,15 +2046,19 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
         if self.freq is not None:
             kwargs["freq"] = _ensure_decoded(self.freq)
 
+        factory: Union[Type[Index], Type[DatetimeIndex]] = Index
+        if is_datetime64_dtype(values.dtype) or is_datetime64tz_dtype(values.dtype):
+            factory = DatetimeIndex
+
         # making an Index instance could throw a number of different errors
         try:
-            new_pd_index = Index(values, **kwargs)
+            new_pd_index = factory(values, **kwargs)
         except ValueError:
             # if the output freq is different that what we recorded,
             # it should be None (see also 'doc example part 2')
             if "freq" in kwargs:
                 kwargs["freq"] = None
-            new_pd_index = Index(values, **kwargs)
+            new_pd_index = factory(values, **kwargs)
 
         new_pd_index = _set_tz(new_pd_index, self.tz)
         return new_pd_index, new_pd_index
@@ -2736,8 +2741,14 @@ def _alias_to_class(self, alias):
             return alias
         return self._reverse_index_map.get(alias, Index)
 
-    def _get_index_factory(self, klass):
-        if klass == DatetimeIndex:
+    def _get_index_factory(self, attrs):
+        index_class = self._alias_to_class(
+            _ensure_decoded(getattr(attrs, "index_class", ""))
+        )
+
+        factory: Callable
+
+        if index_class == DatetimeIndex:
 
             def f(values, freq=None, tz=None):
                 # data are already in UTC, localize and convert if tz present
@@ -2747,16 +2758,34 @@ def f(values, freq=None, tz=None):
                     result = result.tz_localize("UTC").tz_convert(tz)
                 return result
 
-            return f
-        elif klass == PeriodIndex:
+            factory = f
+        elif index_class == PeriodIndex:
 
             def f(values, freq=None, tz=None):
                 parr = PeriodArray._simple_new(values, freq=freq)
                 return PeriodIndex._simple_new(parr, name=None)
 
-            return f
+            factory = f
+        else:
+            factory = index_class
+
+        kwargs = {}
+        if "freq" in attrs:
+            kwargs["freq"] = attrs["freq"]
+            if index_class is Index:
+                # DTI/PI would be gotten by _alias_to_class
+                factory = TimedeltaIndex
+
+        if "tz" in attrs:
+            if isinstance(attrs["tz"], bytes):
+                # created by python2
+                kwargs["tz"] = attrs["tz"].decode("utf-8")
+            else:
+                # created by python3
+                kwargs["tz"] = attrs["tz"]
+            assert index_class is DatetimeIndex  # just checking
 
-        return klass
+        return factory, kwargs
 
     def validate_read(self, columns, where):
         """
@@ -2928,22 +2957,8 @@ def read_index_node(
             name = _ensure_str(node._v_attrs.name)
             name = _ensure_decoded(name)
 
-        index_class = self._alias_to_class(
-            _ensure_decoded(getattr(node._v_attrs, "index_class", ""))
-        )
-        factory = self._get_index_factory(index_class)
-
-        kwargs = {}
-        if "freq" in node._v_attrs:
-            kwargs["freq"] = node._v_attrs["freq"]
-
-        if "tz" in node._v_attrs:
-            if isinstance(node._v_attrs["tz"], bytes):
-                # created by python2
-                kwargs["tz"] = node._v_attrs["tz"].decode("utf-8")
-            else:
-                # created by python3
-                kwargs["tz"] = node._v_attrs["tz"]
+        attrs = node._v_attrs
+        factory, kwargs = self._get_index_factory(attrs)
 
         if kind == "date":
             index = factory(

diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py
@@ -254,7 +254,8 @@ def test_grouper_creation_bug(self):
         )
         result = s.groupby(pd.Grouper(level="three", freq="M")).sum()
         expected = Series(
-            [28], index=Index([Timestamp("2013-01-31")], freq="M", name="three")
+            [28],
+            index=pd.DatetimeIndex([Timestamp("2013-01-31")], freq="M", name="three"),
         )
         tm.assert_series_equal(result, expected)
 

diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py
@@ -271,7 +271,7 @@ def test_columns_groupby_quantile():
             [9.6, 8.4, 10.6, 9.4],
         ],
         index=list("XYZ"),
-        columns=Index(
+        columns=pd.MultiIndex.from_tuples(
             [("A", 0.8), ("A", 0.2), ("B", 0.8), ("B", 0.2)], names=["col", None]
         ),
     )

diff --git a/pandas/tests/indexes/base_class/test_constructors.py b/pandas/tests/indexes/base_class/test_constructors.py
@@ -2,6 +2,7 @@
 import pytest
 
 from pandas import Index, MultiIndex
+import pandas._testing as tm
 
 
 class TestIndexConstructor:
@@ -29,7 +30,8 @@ def test_construction_list_mixed_tuples(self, index_vals):
     def test_constructor_wrong_kwargs(self):
         # GH #19348
         with pytest.raises(TypeError, match="Unexpected keyword arguments {'foo'}"):
-            Index([], foo="bar")
+            with tm.assert_produces_warning(FutureWarning):
+                Index([], foo="bar")
 
     @pytest.mark.xfail(reason="see GH#21311: Index doesn't enforce dtype argument")
     def test_constructor_cast(self):

diff --git a/pandas/tests/indexes/categorical/test_constructors.py b/pandas/tests/indexes/categorical/test_constructors.py
@@ -129,10 +129,14 @@ def test_construction_with_categorical_dtype(self):
             CategoricalIndex(data, categories=cats, dtype=dtype)
 
         with pytest.raises(ValueError, match=msg):
-            Index(data, categories=cats, dtype=dtype)
+            with tm.assert_produces_warning(FutureWarning):
+                # passing subclass-specific kwargs to pd.Index
+                Index(data, categories=cats, dtype=dtype)
 
         with pytest.raises(ValueError, match=msg):
             CategoricalIndex(data, ordered=ordered, dtype=dtype)
 
         with pytest.raises(ValueError, match=msg):
-            Index(data, ordered=ordered, dtype=dtype)
+            with tm.assert_produces_warning(FutureWarning):
+                # passing subclass-specific kwargs to pd.Index
+                Index(data, ordered=ordered, dtype=dtype)
diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py
@@ -374,7 +374,9 @@ def test_construction_index_with_mixed_timezones_with_NaT(self):
         assert result.tz is None
 
         # all NaT with tz
-        result = Index([pd.NaT, pd.NaT], tz="Asia/Tokyo", name="idx")
+        with tm.assert_produces_warning(FutureWarning):
+            # subclass-specific kwargs to pd.Index
+            result = Index([pd.NaT, pd.NaT], tz="Asia/Tokyo", name="idx")
         exp = DatetimeIndex([pd.NaT, pd.NaT], tz="Asia/Tokyo", name="idx")
 
         tm.assert_index_equal(result, exp, exact=True)
@@ -462,16 +464,18 @@ def test_construction_dti_with_mixed_timezones(self):
         with pytest.raises(ValueError, match=msg):
             # passing tz should results in DatetimeIndex, then mismatch raises
             # TypeError
-            Index(
-                [
-                    pd.NaT,
-                    Timestamp("2011-01-01 10:00"),
-                    pd.NaT,
-                    Timestamp("2011-01-02 10:00", tz="US/Eastern"),
-                ],
-                tz="Asia/Tokyo",
-                name="idx",
-            )
+            with tm.assert_produces_warning(FutureWarning):
+                # subclass-specific kwargs to pd.Index
+                Index(
+                    [
+                        pd.NaT,
+                        Timestamp("2011-01-01 10:00"),
+                        pd.NaT,
+                        Timestamp("2011-01-02 10:00", tz="US/Eastern"),
+                    ],
+                    tz="Asia/Tokyo",
+                    name="idx",
+                )
 
     def test_construction_base_constructor(self):
         arr = [Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-03")]

diff --git a/pandas/tests/indexes/interval/test_constructors.py b/pandas/tests/indexes/interval/test_constructors.py
@@ -36,6 +36,7 @@ class ConstructorTests:
     get_kwargs_from_breaks to the expected format.
     """
 
+    @pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning")
     @pytest.mark.parametrize(
         "breaks",
         [
@@ -80,6 +81,7 @@ def test_constructor_dtype(self, constructor, breaks, subtype):
             result = constructor(dtype=dtype, **result_kwargs)
             tm.assert_index_equal(result, expected)
 
+    @pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning")
     @pytest.mark.parametrize("breaks", [[np.nan] * 2, [np.nan] * 4, [np.nan] * 50])
     def test_constructor_nan(self, constructor, breaks, closed):
         # GH 18421
@@ -93,6 +95,7 @@ def test_constructor_nan(self, constructor, breaks, closed):
         assert result.dtype.subtype == expected_subtype
         tm.assert_numpy_array_equal(np.array(result), expected_values)
 
+    @pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning")
     @pytest.mark.parametrize(
         "breaks",
         [
@@ -378,6 +381,7 @@ def test_constructor_errors(self, constructor):
         with pytest.raises(TypeError, match=msg):
             constructor([0, 1])
 
+    @pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning")
     @pytest.mark.parametrize(
         "data, closed",
         [

diff --git a/pandas/tests/indexes/multi/test_equivalence.py b/pandas/tests/indexes/multi/test_equivalence.py
@@ -185,10 +185,16 @@ def test_identical(idx):
     mi2 = mi2.set_names(["new1", "new2"])
     assert mi.identical(mi2)
 
-    mi3 = Index(mi.tolist(), names=mi.names)
+    with tm.assert_produces_warning(FutureWarning):
+        # subclass-specific keywords to pd.Index
+        mi3 = Index(mi.tolist(), names=mi.names)
+
     msg = r"Unexpected keyword arguments {'names'}"
     with pytest.raises(TypeError, match=msg):
-        Index(mi.tolist(), names=mi.names, tupleize_cols=False)
+        with tm.assert_produces_warning(FutureWarning):
+            # subclass-specific keywords to pd.Index
+            Index(mi.tolist(), names=mi.names, tupleize_cols=False)
+
     mi4 = Index(mi.tolist(), tupleize_cols=False)
     assert mi.identical(mi3)
     assert not mi.identical(mi4)

diff --git a/pandas/tests/indexes/multi/test_names.py b/pandas/tests/indexes/multi/test_names.py
@@ -56,7 +56,9 @@ def test_take_preserve_name(idx):
 def test_copy_names():
     # Check that adding a "names" parameter to the copy is honored
     # GH14302
-    multi_idx = pd.Index([(1, 2), (3, 4)], names=["MyName1", "MyName2"])
+    with tm.assert_produces_warning(FutureWarning):
+        # subclass-specific kwargs to pd.Index
+        multi_idx = pd.Index([(1, 2), (3, 4)], names=["MyName1", "MyName2"])
     multi_idx1 = multi_idx.copy()
 
     assert multi_idx.equals(multi_idx1)

diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
@@ -335,6 +335,7 @@ def test_constructor_dtypes_to_timedelta(self, cast_index, vals):
             index = Index(vals)
             assert isinstance(index, TimedeltaIndex)
 
+    @pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning")
     @pytest.mark.parametrize("attr", ["values", "asi8"])
     @pytest.mark.parametrize("klass", [Index, DatetimeIndex])
     def test_constructor_dtypes_datetime(self, tz_naive_fixture, attr, klass):
@@ -2255,6 +2256,7 @@ def test_index_subclass_constructor_wrong_kwargs(index_maker):
         index_maker(foo="bar")
 
 
+@pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning")
 def test_deprecated_fastpath():
     msg = "[Uu]nexpected keyword argument"
     with pytest.raises(TypeError, match=msg):

diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py
@@ -57,7 +57,7 @@ def test_index_groupby(self):
                 idx.groupby(to_groupby), {1.0: idx[[0, 5]], 2.0: idx[[1, 4]]}
             )
 
-            to_groupby = Index(
+            to_groupby = pd.DatetimeIndex(
                 [
                     datetime(2011, 11, 1),
                     datetime(2011, 12, 1),

diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py
@@ -506,7 +506,9 @@ def test_insert_index_period(self, insert, coerced_val, coerced_dtype):
         else:
             msg = r"Unexpected keyword arguments {'freq'}"
             with pytest.raises(TypeError, match=msg):
-                pd.Index(data, freq="M")
+                with tm.assert_produces_warning(FutureWarning):
+                    # passing keywords to pd.Index
+                    pd.Index(data, freq="M")
 
     def test_insert_index_complex128(self):
         pytest.xfail("Test not implemented")

diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py
@@ -705,18 +705,14 @@ def test_read_json_table_orient_raises(self, index_nm, vals, recwarn):
         "idx",
         [
             pd.Index(range(4)),
-            pd.Index(
-                pd.date_range(
-                    "2020-08-30",
-                    freq="d",
-                    periods=4,
-                ),
-                freq=None,
-            ),
-            pd.Index(
-                pd.date_range("2020-08-30", freq="d", periods=4, tz="US/Central"),
-                freq=None,
-            ),
+            pd.date_range(
+                "2020-08-30",
+                freq="d",
+                periods=4,
+            )._with_freq(None),
+            pd.date_range(
+                "2020-08-30", freq="d", periods=4, tz="US/Central"
+            )._with_freq(None),
             pd.MultiIndex.from_product(
                 [
                     pd.date_range("2020-08-30", freq="d", periods=2, tz="US/Central"),