Skip to content

Commit

Permalink
DEPR: Enforce empty Series returning object dtype (pandas-dev#49342)
Browse files Browse the repository at this point in the history
* DEPR: Enforce empty Series returning object dtype

* Fix some tests & simplify

* only for list like types

* len(data)
  • Loading branch information
mroeschke authored and noatamir committed Nov 9, 2022
1 parent d65a53d commit 639001d
Show file tree
Hide file tree
Showing 17 changed files with 45 additions and 148 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,7 @@ Removal of prior version deprecations/changes
- Removed the ``display.column_space`` option in favor of ``df.to_string(col_space=...)`` (:issue:`47280`)
- Removed the deprecated method ``mad`` from pandas classes (:issue:`11787`)
- Removed the deprecated method ``tshift`` from pandas classes (:issue:`11631`)
- Changed behavior of empty data passed into :class:`Series`; the default dtype will be ``object`` instead of ``float64`` (:issue:`29405`)
- Changed the behavior of :func:`to_datetime` with argument "now" with ``utc=False`` to match ``Timestamp("now")`` (:issue:`18705`)
- Changed behavior of :class:`DataFrame` constructor given floating-point ``data`` and an integer ``dtype``, when the data cannot be cast losslessly, the floating point dtype is retained, matching :class:`Series` behavior (:issue:`41170`)
- Changed behavior of :class:`DataFrame` constructor when passed a ``dtype`` (other than int) that the data cannot be cast to; it now raises instead of silently ignoring the dtype (:issue:`41733`)
Expand Down
13 changes: 4 additions & 9 deletions pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,7 @@
from pandas.core.algorithms import safe_sort
from pandas.core.base import SelectionMixin
import pandas.core.common as com
from pandas.core.construction import (
create_series_with_explicit_dtype,
ensure_wrapped_if_datetimelike,
)
from pandas.core.construction import ensure_wrapped_if_datetimelike

if TYPE_CHECKING:
from pandas import (
Expand Down Expand Up @@ -881,14 +878,12 @@ def wrap_results(self, results: ResType, res_index: Index) -> DataFrame | Series

# dict of scalars

# the default dtype of an empty Series will be `object`, but this
# the default dtype of an empty Series is `object`, but this
# code can be hit by df.mean() where the result should have dtype
# float64 even if it's an empty Series.
constructor_sliced = self.obj._constructor_sliced
if constructor_sliced is Series:
result = create_series_with_explicit_dtype(
results, dtype_if_empty=np.float64
)
if len(results) == 0 and constructor_sliced is Series:
result = constructor_sliced(results, dtype=np.float64)
else:
result = constructor_sliced(results)
result.index = res_index
Expand Down
10 changes: 6 additions & 4 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@
from pandas.core.arraylike import OpsMixin
from pandas.core.arrays import ExtensionArray
from pandas.core.construction import (
create_series_with_explicit_dtype,
ensure_wrapped_if_datetimelike,
extract_array,
)
Expand Down Expand Up @@ -842,9 +841,12 @@ def _map_values(self, mapper, na_action=None):
# expected to be pd.Series(np.nan, ...). As np.nan is
# of dtype float64 the return value of this method should
# be float64 as well
mapper = create_series_with_explicit_dtype(
mapper, dtype_if_empty=np.float64
)
from pandas import Series

if len(mapper) == 0:
mapper = Series(mapper, dtype=np.float64)
else:
mapper = Series(mapper)

if isinstance(mapper, ABCSeries):
if na_action not in (None, "ignore"):
Expand Down
60 changes: 0 additions & 60 deletions pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

from typing import (
TYPE_CHECKING,
Any,
Optional,
Sequence,
Union,
Expand Down Expand Up @@ -830,62 +829,3 @@ def _try_cast(
subarr = np.array(arr, dtype=dtype, copy=copy)

return subarr


def is_empty_data(data: Any) -> bool:
"""
Utility to check if a Series is instantiated with empty data,
which does not contain dtype information.
Parameters
----------
data : array-like, Iterable, dict, or scalar value
Contains data stored in Series.
Returns
-------
bool
"""
is_none = data is None
is_list_like_without_dtype = is_list_like(data) and not hasattr(data, "dtype")
is_simple_empty = is_list_like_without_dtype and not data
return is_none or is_simple_empty


def create_series_with_explicit_dtype(
data: Any = None,
index: ArrayLike | Index | None = None,
dtype: Dtype | None = None,
name: str | None = None,
copy: bool = False,
fastpath: bool = False,
dtype_if_empty: Dtype = object,
) -> Series:
"""
Helper to pass an explicit dtype when instantiating an empty Series.
This silences a DeprecationWarning described in GitHub-17261.
Parameters
----------
data : Mirrored from Series.__init__
index : Mirrored from Series.__init__
dtype : Mirrored from Series.__init__
name : Mirrored from Series.__init__
copy : Mirrored from Series.__init__
fastpath : Mirrored from Series.__init__
dtype_if_empty : str, numpy.dtype, or ExtensionDtype
This dtype will be passed explicitly if an empty Series will
be instantiated.
Returns
-------
Series
"""
from pandas.core.series import Series

if is_empty_data(data) and dtype is None:
dtype = dtype_if_empty
return Series(
data=data, index=index, dtype=dtype, name=name, copy=copy, fastpath=fastpath
)
11 changes: 4 additions & 7 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,10 +145,7 @@
from pandas.core.array_algos.replace import should_use_regex
from pandas.core.arrays import ExtensionArray
from pandas.core.base import PandasObject
from pandas.core.construction import (
create_series_with_explicit_dtype,
extract_array,
)
from pandas.core.construction import extract_array
from pandas.core.describe import describe_ndframe
from pandas.core.flags import Flags
from pandas.core.indexes.api import (
Expand Down Expand Up @@ -6843,9 +6840,9 @@ def fillna(
if inplace:
return None
return self.copy()
value = create_series_with_explicit_dtype(
value, dtype_if_empty=object
)
from pandas import Series

value = Series(value)
value = value.reindex(self.index, copy=False)
value = value._values
elif not is_list_like(value):
Expand Down
9 changes: 2 additions & 7 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@
)
from pandas.core.arrays.categorical import Categorical
import pandas.core.common as com
from pandas.core.construction import create_series_with_explicit_dtype
from pandas.core.frame import DataFrame
from pandas.core.groupby import base
from pandas.core.groupby.groupby import (
Expand Down Expand Up @@ -295,9 +294,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)

# result is a dict whose keys are the elements of result_index
index = self.grouper.result_index
return create_series_with_explicit_dtype(
result, index=index, dtype_if_empty=object
)
return Series(result, index=index)

agg = aggregate

Expand Down Expand Up @@ -1294,10 +1291,8 @@ def _wrap_applied_output_series(
key_index,
override_group_keys: bool,
) -> DataFrame | Series:
# this is to silence a DeprecationWarning
# TODO(2.0): Remove when default dtype of empty Series is object
kwargs = first_not_none._construct_axes_dict()
backup = create_series_with_explicit_dtype(dtype_if_empty=object, **kwargs)
backup = Series(**kwargs)
values = [x if (x is not None) else backup for x in values]

all_indexed_same = all_indexes_same(x.index for x in values)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -601,7 +601,7 @@ def _homogenize(data, index: Index, dtype: DtypeObj | None) -> list[ArrayLike]:
else:
if isinstance(val, dict):
# GH#41785 this _should_ be equivalent to (but faster than)
# val = create_series_with_explicit_dtype(val, index=index)._values
# val = Series(val, index=index)._values
if oindex is None:
oindex = index.astype("O")

Expand Down
26 changes: 5 additions & 21 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,7 @@
from pandas.core.arrays.categorical import CategoricalAccessor
from pandas.core.arrays.sparse import SparseAccessor
from pandas.core.construction import (
create_series_with_explicit_dtype,
extract_array,
is_empty_data,
sanitize_array,
)
from pandas.core.generic import NDFrame
Expand Down Expand Up @@ -389,18 +387,6 @@ def __init__(

name = ibase.maybe_extract_name(name, data, type(self))

if is_empty_data(data) and dtype is None:
# gh-17261
warnings.warn(
"The default dtype for empty Series will be 'object' instead "
"of 'float64' in a future version. Specify a dtype explicitly "
"to silence this warning.",
FutureWarning,
stacklevel=find_stack_level(),
)
# uncomment the line below when removing the FutureWarning
# dtype = np.dtype(object)

if index is not None:
index = ensure_index(index)

Expand Down Expand Up @@ -458,6 +444,9 @@ def __init__(
pass
else:
data = com.maybe_iterable_to_list(data)
if is_list_like(data) and not len(data) and dtype is None:
# GH 29405: Pre-2.0, this defaulted to float.
dtype = np.dtype(object)

if index is None:
if not is_list_like(data):
Expand Down Expand Up @@ -531,15 +520,10 @@ def _init_dict(

# Input is now list-like, so rely on "standard" construction:

# TODO: passing np.float64 to not break anything yet. See GH-17261
s = create_series_with_explicit_dtype(
# error: Argument "index" to "create_series_with_explicit_dtype" has
# incompatible type "Tuple[Any, ...]"; expected "Union[ExtensionArray,
# ndarray, Index, None]"
s = self._constructor(
values,
index=keys, # type: ignore[arg-type]
index=keys,
dtype=dtype,
dtype_if_empty=np.float64,
)

# Now we just make sure the order is respected, if any
Expand Down
4 changes: 2 additions & 2 deletions pandas/io/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@
from pandas.core.dtypes.common import is_list_like

from pandas import isna
from pandas.core.construction import create_series_with_explicit_dtype
from pandas.core.indexes.base import Index
from pandas.core.indexes.multi import MultiIndex
from pandas.core.series import Series

from pandas.io.common import (
file_exists,
Expand Down Expand Up @@ -858,7 +858,7 @@ def _parse_tfoot_tr(self, table):

def _expand_elements(body) -> None:
data = [len(elem) for elem in body]
lens = create_series_with_explicit_dtype(data, dtype_if_empty=object)
lens = Series(data)
lens_max = lens.max()
not_max = lens[lens != lens_max]

Expand Down
5 changes: 2 additions & 3 deletions pandas/io/json/_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@
notna,
to_datetime,
)
from pandas.core.construction import create_series_with_explicit_dtype
from pandas.core.reshape.concat import concat
from pandas.core.shared_docs import _shared_docs

Expand Down Expand Up @@ -1221,9 +1220,9 @@ def _parse(self) -> None:
if self.orient == "split":
decoded = {str(k): v for k, v in data.items()}
self.check_keys_split(decoded)
self.obj = create_series_with_explicit_dtype(**decoded)
self.obj = Series(**decoded)
else:
self.obj = create_series_with_explicit_dtype(data, dtype_if_empty=object)
self.obj = Series(data)

def _try_convert_types(self) -> None:
if self.obj is None:
Expand Down
5 changes: 1 addition & 4 deletions pandas/tests/frame/constructors/test_from_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
Series,
)
import pandas._testing as tm
from pandas.core.construction import create_series_with_explicit_dtype


class TestFromDict:
Expand Down Expand Up @@ -79,9 +78,7 @@ def test_constructor_list_of_series(self):
OrderedDict([["a", 1.5], ["b", 3], ["c", 4]]),
OrderedDict([["b", 3], ["c", 4], ["d", 6]]),
]
data = [
create_series_with_explicit_dtype(d, dtype_if_empty=object) for d in data
]
data = [Series(d) for d in data]

result = DataFrame(data)
sdict = OrderedDict(zip(range(len(data)), data))
Expand Down
3 changes: 1 addition & 2 deletions pandas/tests/reshape/concat/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
)
import pandas._testing as tm
from pandas.core.arrays import SparseArray
from pandas.core.construction import create_series_with_explicit_dtype
from pandas.tests.extension.decimal import to_decimal


Expand Down Expand Up @@ -519,7 +518,7 @@ def test_concat_no_unnecessary_upcast(dt, frame_or_series):
assert x.values.dtype == dt


@pytest.mark.parametrize("pdt", [create_series_with_explicit_dtype, DataFrame])
@pytest.mark.parametrize("pdt", [Series, DataFrame])
@pytest.mark.parametrize("dt", np.sctypes["int"])
def test_concat_will_upcast(dt, pdt):
with catch_warnings(record=True):
Expand Down
3 changes: 1 addition & 2 deletions pandas/tests/series/methods/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,7 @@ def test_astype_empty_constructor_equality(self, dtype):
"m", # Generic timestamps raise a ValueError. Already tested.
):
init_empty = Series([], dtype=dtype)
with tm.assert_produces_warning(FutureWarning):
as_type_empty = Series([]).astype(dtype)
as_type_empty = Series([]).astype(dtype)
tm.assert_series_equal(init_empty, as_type_empty)

@pytest.mark.parametrize("dtype", [str, np.str_])
Expand Down
3 changes: 1 addition & 2 deletions pandas/tests/series/methods/test_is_unique.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import pytest

from pandas import Series
from pandas.core.construction import create_series_with_explicit_dtype


@pytest.mark.parametrize(
Expand All @@ -19,7 +18,7 @@
)
def test_is_unique(data, expected):
# GH#11946 / GH#25180
ser = create_series_with_explicit_dtype(data, dtype_if_empty=object)
ser = Series(data)
assert ser.is_unique is expected


Expand Down
3 changes: 1 addition & 2 deletions pandas/tests/series/methods/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,8 +309,7 @@ def test_replace_with_empty_dictlike(self):
s = pd.Series(list("abcd"))
tm.assert_series_equal(s, s.replace({}))

with tm.assert_produces_warning(FutureWarning):
empty_series = pd.Series([])
empty_series = pd.Series([])
tm.assert_series_equal(s, s.replace(empty_series))

def test_replace_string_with_number(self):
Expand Down
Loading

0 comments on commit 639001d

Please sign in to comment.