DEPR: Enforce empty Series returning object dtype (pandas-dev#49342)

* DEPR: Enforce empty Series returning object dtype * Fix some tests & simplify * only for list like types * len(data)
noatamir · Nov 9, 2022 · 639001d · 639001d
1 parent d65a53d
commit 639001d
Show file tree

Hide file tree

Showing 17 changed files with 45 additions and 148 deletions.
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -279,6 +279,7 @@ Removal of prior version deprecations/changes
 - Removed the ``display.column_space`` option in favor of ``df.to_string(col_space=...)`` (:issue:`47280`)
 - Removed the deprecated method ``mad`` from pandas classes (:issue:`11787`)
 - Removed the deprecated method ``tshift`` from pandas classes (:issue:`11631`)
+- Changed behavior of empty data passed into :class:`Series`; the default dtype will be ``object`` instead of ``float64`` (:issue:`29405`)
 - Changed the behavior of :func:`to_datetime` with argument "now" with ``utc=False`` to match ``Timestamp("now")`` (:issue:`18705`)
 - Changed behavior of :class:`DataFrame` constructor given floating-point ``data`` and an integer ``dtype``, when the data cannot be cast losslessly, the floating point dtype is retained, matching :class:`Series` behavior (:issue:`41170`)
 - Changed behavior of :class:`DataFrame` constructor when passed a ``dtype`` (other than int) that the data cannot be cast to; it now raises instead of silently ignoring the dtype (:issue:`41733`)

diff --git a/pandas/core/apply.py b/pandas/core/apply.py
@@ -58,10 +58,7 @@
 from pandas.core.algorithms import safe_sort
 from pandas.core.base import SelectionMixin
 import pandas.core.common as com
-from pandas.core.construction import (
-    create_series_with_explicit_dtype,
-    ensure_wrapped_if_datetimelike,
-)
+from pandas.core.construction import ensure_wrapped_if_datetimelike
 
 if TYPE_CHECKING:
     from pandas import (
@@ -881,14 +878,12 @@ def wrap_results(self, results: ResType, res_index: Index) -> DataFrame | Series
 
         # dict of scalars
 
-        # the default dtype of an empty Series will be `object`, but this
+        # the default dtype of an empty Series is `object`, but this
         # code can be hit by df.mean() where the result should have dtype
         # float64 even if it's an empty Series.
         constructor_sliced = self.obj._constructor_sliced
-        if constructor_sliced is Series:
-            result = create_series_with_explicit_dtype(
-                results, dtype_if_empty=np.float64
-            )
+        if len(results) == 0 and constructor_sliced is Series:
+            result = constructor_sliced(results, dtype=np.float64)
         else:
             result = constructor_sliced(results)
         result.index = res_index

diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -71,7 +71,6 @@
 from pandas.core.arraylike import OpsMixin
 from pandas.core.arrays import ExtensionArray
 from pandas.core.construction import (
-    create_series_with_explicit_dtype,
     ensure_wrapped_if_datetimelike,
     extract_array,
 )
@@ -842,9 +841,12 @@ def _map_values(self, mapper, na_action=None):
                 # expected to be pd.Series(np.nan, ...). As np.nan is
                 # of dtype float64 the return value of this method should
                 # be float64 as well
-                mapper = create_series_with_explicit_dtype(
-                    mapper, dtype_if_empty=np.float64
-                )
+                from pandas import Series
+
+                if len(mapper) == 0:
+                    mapper = Series(mapper, dtype=np.float64)
+                else:
+                    mapper = Series(mapper)
 
         if isinstance(mapper, ABCSeries):
             if na_action not in (None, "ignore"):

diff --git a/pandas/core/construction.py b/pandas/core/construction.py
@@ -8,7 +8,6 @@
 
 from typing import (
     TYPE_CHECKING,
-    Any,
     Optional,
     Sequence,
     Union,
@@ -830,62 +829,3 @@ def _try_cast(
         subarr = np.array(arr, dtype=dtype, copy=copy)
 
     return subarr
-
-
-def is_empty_data(data: Any) -> bool:
-    """
-    Utility to check if a Series is instantiated with empty data,
-    which does not contain dtype information.
-
-    Parameters
-    ----------
-    data : array-like, Iterable, dict, or scalar value
-        Contains data stored in Series.
-
-    Returns
-    -------
-    bool
-    """
-    is_none = data is None
-    is_list_like_without_dtype = is_list_like(data) and not hasattr(data, "dtype")
-    is_simple_empty = is_list_like_without_dtype and not data
-    return is_none or is_simple_empty
-
-
-def create_series_with_explicit_dtype(
-    data: Any = None,
-    index: ArrayLike | Index | None = None,
-    dtype: Dtype | None = None,
-    name: str | None = None,
-    copy: bool = False,
-    fastpath: bool = False,
-    dtype_if_empty: Dtype = object,
-) -> Series:
-    """
-    Helper to pass an explicit dtype when instantiating an empty Series.
-
-    This silences a DeprecationWarning described in GitHub-17261.
-
-    Parameters
-    ----------
-    data : Mirrored from Series.__init__
-    index : Mirrored from Series.__init__
-    dtype : Mirrored from Series.__init__
-    name : Mirrored from Series.__init__
-    copy : Mirrored from Series.__init__
-    fastpath : Mirrored from Series.__init__
-    dtype_if_empty : str, numpy.dtype, or ExtensionDtype
-        This dtype will be passed explicitly if an empty Series will
-        be instantiated.
-
-    Returns
-    -------
-    Series
-    """
-    from pandas.core.series import Series
-
-    if is_empty_data(data) and dtype is None:
-        dtype = dtype_if_empty
-    return Series(
-        data=data, index=index, dtype=dtype, name=name, copy=copy, fastpath=fastpath
-    )
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -145,10 +145,7 @@
 from pandas.core.array_algos.replace import should_use_regex
 from pandas.core.arrays import ExtensionArray
 from pandas.core.base import PandasObject
-from pandas.core.construction import (
-    create_series_with_explicit_dtype,
-    extract_array,
-)
+from pandas.core.construction import extract_array
 from pandas.core.describe import describe_ndframe
 from pandas.core.flags import Flags
 from pandas.core.indexes.api import (
@@ -6843,9 +6840,9 @@ def fillna(
                         if inplace:
                             return None
                         return self.copy()
-                    value = create_series_with_explicit_dtype(
-                        value, dtype_if_empty=object
-                    )
+                    from pandas import Series
+
+                    value = Series(value)
                     value = value.reindex(self.index, copy=False)
                     value = value._values
                 elif not is_list_like(value):

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -80,7 +80,6 @@
 )
 from pandas.core.arrays.categorical import Categorical
 import pandas.core.common as com
-from pandas.core.construction import create_series_with_explicit_dtype
 from pandas.core.frame import DataFrame
 from pandas.core.groupby import base
 from pandas.core.groupby.groupby import (
@@ -295,9 +294,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
 
                 # result is a dict whose keys are the elements of result_index
                 index = self.grouper.result_index
-                return create_series_with_explicit_dtype(
-                    result, index=index, dtype_if_empty=object
-                )
+                return Series(result, index=index)
 
     agg = aggregate
 
@@ -1294,10 +1291,8 @@ def _wrap_applied_output_series(
         key_index,
         override_group_keys: bool,
     ) -> DataFrame | Series:
-        # this is to silence a DeprecationWarning
-        # TODO(2.0): Remove when default dtype of empty Series is object
         kwargs = first_not_none._construct_axes_dict()
-        backup = create_series_with_explicit_dtype(dtype_if_empty=object, **kwargs)
+        backup = Series(**kwargs)
         values = [x if (x is not None) else backup for x in values]
 
         all_indexed_same = all_indexes_same(x.index for x in values)

diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
@@ -601,7 +601,7 @@ def _homogenize(data, index: Index, dtype: DtypeObj | None) -> list[ArrayLike]:
         else:
             if isinstance(val, dict):
                 # GH#41785 this _should_ be equivalent to (but faster than)
-                #  val = create_series_with_explicit_dtype(val, index=index)._values
+                #  val = Series(val, index=index)._values
                 if oindex is None:
                     oindex = index.astype("O")
 

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -119,9 +119,7 @@
 from pandas.core.arrays.categorical import CategoricalAccessor
 from pandas.core.arrays.sparse import SparseAccessor
 from pandas.core.construction import (
-    create_series_with_explicit_dtype,
     extract_array,
-    is_empty_data,
     sanitize_array,
 )
 from pandas.core.generic import NDFrame
@@ -389,18 +387,6 @@ def __init__(
 
             name = ibase.maybe_extract_name(name, data, type(self))
 
-            if is_empty_data(data) and dtype is None:
-                # gh-17261
-                warnings.warn(
-                    "The default dtype for empty Series will be 'object' instead "
-                    "of 'float64' in a future version. Specify a dtype explicitly "
-                    "to silence this warning.",
-                    FutureWarning,
-                    stacklevel=find_stack_level(),
-                )
-                # uncomment the line below when removing the FutureWarning
-                # dtype = np.dtype(object)
-
             if index is not None:
                 index = ensure_index(index)
 
@@ -458,6 +444,9 @@ def __init__(
                 pass
             else:
                 data = com.maybe_iterable_to_list(data)
+                if is_list_like(data) and not len(data) and dtype is None:
+                    # GH 29405: Pre-2.0, this defaulted to float.
+                    dtype = np.dtype(object)
 
             if index is None:
                 if not is_list_like(data):
@@ -531,15 +520,10 @@ def _init_dict(
 
         # Input is now list-like, so rely on "standard" construction:
 
-        # TODO: passing np.float64 to not break anything yet. See GH-17261
-        s = create_series_with_explicit_dtype(
-            # error: Argument "index" to "create_series_with_explicit_dtype" has
-            # incompatible type "Tuple[Any, ...]"; expected "Union[ExtensionArray,
-            # ndarray, Index, None]"
+        s = self._constructor(
             values,
-            index=keys,  # type: ignore[arg-type]
+            index=keys,
             dtype=dtype,
-            dtype_if_empty=np.float64,
         )
 
         # Now we just make sure the order is respected, if any

diff --git a/pandas/io/html.py b/pandas/io/html.py
@@ -32,9 +32,9 @@
 from pandas.core.dtypes.common import is_list_like
 
 from pandas import isna
-from pandas.core.construction import create_series_with_explicit_dtype
 from pandas.core.indexes.base import Index
 from pandas.core.indexes.multi import MultiIndex
+from pandas.core.series import Series
 
 from pandas.io.common import (
     file_exists,
@@ -858,7 +858,7 @@ def _parse_tfoot_tr(self, table):
 
 def _expand_elements(body) -> None:
     data = [len(elem) for elem in body]
-    lens = create_series_with_explicit_dtype(data, dtype_if_empty=object)
+    lens = Series(data)
     lens_max = lens.max()
     not_max = lens[lens != lens_max]
 

diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
@@ -49,7 +49,6 @@
     notna,
     to_datetime,
 )
-from pandas.core.construction import create_series_with_explicit_dtype
 from pandas.core.reshape.concat import concat
 from pandas.core.shared_docs import _shared_docs
 
@@ -1221,9 +1220,9 @@ def _parse(self) -> None:
         if self.orient == "split":
             decoded = {str(k): v for k, v in data.items()}
             self.check_keys_split(decoded)
-            self.obj = create_series_with_explicit_dtype(**decoded)
+            self.obj = Series(**decoded)
         else:
-            self.obj = create_series_with_explicit_dtype(data, dtype_if_empty=object)
+            self.obj = Series(data)
 
     def _try_convert_types(self) -> None:
         if self.obj is None:

diff --git a/pandas/tests/frame/constructors/test_from_dict.py b/pandas/tests/frame/constructors/test_from_dict.py
@@ -10,7 +10,6 @@
     Series,
 )
 import pandas._testing as tm
-from pandas.core.construction import create_series_with_explicit_dtype
 
 
 class TestFromDict:
@@ -79,9 +78,7 @@ def test_constructor_list_of_series(self):
             OrderedDict([["a", 1.5], ["b", 3], ["c", 4]]),
             OrderedDict([["b", 3], ["c", 4], ["d", 6]]),
         ]
-        data = [
-            create_series_with_explicit_dtype(d, dtype_if_empty=object) for d in data
-        ]
+        data = [Series(d) for d in data]
 
         result = DataFrame(data)
         sdict = OrderedDict(zip(range(len(data)), data))

diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py
@@ -30,7 +30,6 @@
 )
 import pandas._testing as tm
 from pandas.core.arrays import SparseArray
-from pandas.core.construction import create_series_with_explicit_dtype
 from pandas.tests.extension.decimal import to_decimal
 
 
@@ -519,7 +518,7 @@ def test_concat_no_unnecessary_upcast(dt, frame_or_series):
     assert x.values.dtype == dt
 
 
-@pytest.mark.parametrize("pdt", [create_series_with_explicit_dtype, DataFrame])
+@pytest.mark.parametrize("pdt", [Series, DataFrame])
 @pytest.mark.parametrize("dt", np.sctypes["int"])
 def test_concat_will_upcast(dt, pdt):
     with catch_warnings(record=True):

diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py
@@ -108,8 +108,7 @@ def test_astype_empty_constructor_equality(self, dtype):
             "m",  # Generic timestamps raise a ValueError. Already tested.
         ):
             init_empty = Series([], dtype=dtype)
-            with tm.assert_produces_warning(FutureWarning):
-                as_type_empty = Series([]).astype(dtype)
+            as_type_empty = Series([]).astype(dtype)
             tm.assert_series_equal(init_empty, as_type_empty)
 
     @pytest.mark.parametrize("dtype", [str, np.str_])

diff --git a/pandas/tests/series/methods/test_is_unique.py b/pandas/tests/series/methods/test_is_unique.py
@@ -2,7 +2,6 @@
 import pytest
 
 from pandas import Series
-from pandas.core.construction import create_series_with_explicit_dtype
 
 
 @pytest.mark.parametrize(
@@ -19,7 +18,7 @@
 )
 def test_is_unique(data, expected):
     # GH#11946 / GH#25180
-    ser = create_series_with_explicit_dtype(data, dtype_if_empty=object)
+    ser = Series(data)
     assert ser.is_unique is expected
 
 

diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py
@@ -309,8 +309,7 @@ def test_replace_with_empty_dictlike(self):
         s = pd.Series(list("abcd"))
         tm.assert_series_equal(s, s.replace({}))
 
-        with tm.assert_produces_warning(FutureWarning):
-            empty_series = pd.Series([])
+        empty_series = pd.Series([])
         tm.assert_series_equal(s, s.replace(empty_series))
 
     def test_replace_string_with_number(self):