Skip to content

Commit

Permalink
DEPR: Enforce Series(float_with_nan, dtype=inty) (pandas-dev#49605)
Browse files Browse the repository at this point in the history
* DEPR: Enforce Series(float_with_nan, dtype=inty)

* update asv

* troubleshoot asv

* suggested asv edit
  • Loading branch information
jbrockmendel authored and mliu08 committed Nov 27, 2022
1 parent 3afb93f commit f7303b1
Show file tree
Hide file tree
Showing 6 changed files with 68 additions and 82 deletions.
36 changes: 20 additions & 16 deletions asv_bench/benchmarks/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -600,31 +600,35 @@ def time_frame_agg(self, dtype, method):


class Cumulative:
param_names = ["dtype", "method"]
param_names = ["dtype", "method", "with_nans"]
params = [
["float64", "int64", "Float64", "Int64"],
["cummin", "cummax", "cumsum"],
[True, False],
]

def setup(self, dtype, method):
def setup(self, dtype, method, with_nans):
if with_nans and dtype == "int64":
raise NotImplementedError("Construction of df would raise")

N = 500_000
vals = np.random.randint(-10, 10, (N, 5))
null_vals = vals.astype(float, copy=True)
null_vals[::2, :] = np.nan
null_vals[::3, :] = np.nan
df = DataFrame(vals, columns=list("abcde"), dtype=dtype)
null_df = DataFrame(null_vals, columns=list("abcde"), dtype=dtype)
keys = np.random.randint(0, 100, size=N)
df["key"] = keys
null_df["key"] = keys
self.df = df
self.null_df = null_df
vals = np.random.randint(-10, 10, (N, 5))

def time_frame_transform(self, dtype, method):
self.df.groupby("key").transform(method)
if with_nans:
null_vals = vals.astype(float, copy=True)
null_vals[::2, :] = np.nan
null_vals[::3, :] = np.nan
df = DataFrame(null_vals, columns=list("abcde"), dtype=dtype)
df["key"] = keys
self.df = df
else:
df = DataFrame(vals, columns=list("abcde")).astype(dtype, copy=False)
df["key"] = keys
self.df = df

def time_frame_transform_many_nulls(self, dtype, method):
self.null_df.groupby("key").transform(method)
def time_frame_transform(self, dtype, method, with_nans):
self.df.groupby("key").transform(method)


class RankWithTies:
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,7 @@ Removal of prior version deprecations/changes
- Changed behavior of :class:`Index` constructor when passed a ``SparseArray`` or ``SparseDtype`` to retain that dtype instead of casting to ``numpy.ndarray`` (:issue:`43930`)
- Changed behavior of setitem-like operations (``__setitem__``, ``fillna``, ``where``, ``mask``, ``replace``, ``insert``, fill_value for ``shift``) on an object with :class:`DatetimeTZDtype` when using a value with a non-matching timezone, the value will be cast to the object's timezone instead of casting both to object-dtype (:issue:`44243`)
- Changed behavior of :class:`Index`, :class:`Series`, :class:`DataFrame` constructors with floating-dtype data and a :class:`DatetimeTZDtype`, the data are now interpreted as UTC-times instead of wall-times, consistent with how integer-dtype data are treated (:issue:`45573`)
- Changed behavior of :class:`Series` and :class:`DataFrame` constructors with integer dtype and floating-point data containing ``NaN``, this now raises ``IntCastingNaNError`` (:issue:`40110`)
- Removed the deprecated ``base`` and ``loffset`` arguments from :meth:`pandas.DataFrame.resample`, :meth:`pandas.Series.resample` and :class:`pandas.Grouper`. Use ``offset`` or ``origin`` instead (:issue:`31809`)
- Changed behavior of :meth:`Series.fillna` and :meth:`DataFrame.fillna` with ``timedelta64[ns]`` dtype and an incompatible ``fill_value``; this now casts to ``object`` dtype instead of raising, consistent with the behavior with other dtypes (:issue:`45746`)
- Change the default argument of ``regex`` for :meth:`Series.str.replace` from ``True`` to ``False``. Additionally, a single character ``pat`` with ``regex=True`` is now treated as a regular expression instead of a string literal. (:issue:`36695`, :issue:`24804`)
Expand Down
13 changes: 1 addition & 12 deletions pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
cast,
overload,
)
import warnings

import numpy as np
from numpy import ma
Expand All @@ -29,7 +28,6 @@
T,
)
from pandas.errors import IntCastingNaNError
from pandas.util._exceptions import find_stack_level

from pandas.core.dtypes.base import (
ExtensionDtype,
Expand Down Expand Up @@ -577,16 +575,7 @@ def sanitize_array(
subarr = maybe_cast_to_integer_array(data, dtype)

except IntCastingNaNError:
warnings.warn(
"In a future version, passing float-dtype values containing NaN "
"and an integer dtype will raise IntCastingNaNError "
"(subclass of ValueError) instead of silently ignoring the "
"passed dtype. To retain the old behavior, call Series(arr) or "
"DataFrame(arr) without passing a dtype.",
FutureWarning,
stacklevel=find_stack_level(),
)
subarr = np.array(data, copy=copy)
raise
except ValueError:
# Pre-2.0, we would have different behavior for Series vs DataFrame.
# DataFrame would call np.array(data, dtype=dtype, copy=copy),
Expand Down
50 changes: 21 additions & 29 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import pytest
import pytz

from pandas.errors import IntCastingNaNError
import pandas.util._test_decorators as td

from pandas.core.dtypes.common import is_integer_dtype
Expand Down Expand Up @@ -105,16 +106,13 @@ def test_constructor_dict_with_tzaware_scalar(self):
def test_construct_ndarray_with_nas_and_int_dtype(self):
# GH#26919 match Series by not casting np.nan to meaningless int
arr = np.array([[1, np.nan], [2, 3]])
with tm.assert_produces_warning(FutureWarning):
df = DataFrame(arr, dtype="i8")
assert df.values.dtype == arr.dtype
assert isna(df.iloc[0, 1])
msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
with pytest.raises(IntCastingNaNError, match=msg):
DataFrame(arr, dtype="i8")

# check this matches Series behavior
with tm.assert_produces_warning(FutureWarning):
ser = Series(arr[0], dtype="i8", name=0)
expected = df.iloc[0]
tm.assert_series_equal(ser, expected)
with pytest.raises(IntCastingNaNError, match=msg):
Series(arr[0], dtype="i8", name=0)

def test_construct_from_list_of_datetimes(self):
df = DataFrame([datetime.now(), datetime.now()])
Expand Down Expand Up @@ -966,21 +964,16 @@ def _check_basic_constructor(self, empty):
assert len(frame.index) == 3
assert len(frame.columns) == 1

warn = None if empty is np.ones else FutureWarning
with tm.assert_produces_warning(warn):
if empty is not np.ones:
msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
with pytest.raises(IntCastingNaNError, match=msg):
DataFrame(mat, columns=["A", "B", "C"], index=[1, 2], dtype=np.int64)
return
else:
frame = DataFrame(
mat, columns=["A", "B", "C"], index=[1, 2], dtype=np.int64
)
if empty is np.ones:
# passing dtype casts
assert frame.values.dtype == np.int64
else:
# i.e. ma.masked_all
# Since we have NaNs, refuse to cast to int dtype, which would take NaN
# to meaningless integers. This matches Series behavior. GH#26919
assert frame.isna().all().all()
assert frame.values.dtype == np.float64
assert isna(frame.values).all()

# wrong size axis labels
msg = r"Shape of passed values is \(2, 3\), indices imply \(1, 3\)"
Expand Down Expand Up @@ -1741,11 +1734,10 @@ def test_constructor_mix_series_nonseries(self, float_frame):
DataFrame({"A": float_frame["A"], "B": list(float_frame["B"])[:-2]})

def test_constructor_miscast_na_int_dtype(self):
msg = "float-dtype values containing NaN and an integer dtype"
with tm.assert_produces_warning(FutureWarning, match=msg):
df = DataFrame([[np.nan, 1], [1, 0]], dtype=np.int64)
expected = DataFrame([[np.nan, 1], [1, 0]])
tm.assert_frame_equal(df, expected)
msg = r"Cannot convert non-finite values \(NA or inf\) to integer"

with pytest.raises(IntCastingNaNError, match=msg):
DataFrame([[np.nan, 1], [1, 0]], dtype=np.int64)

def test_constructor_column_duplicates(self):
# it works! #2079
Expand Down Expand Up @@ -2722,16 +2714,16 @@ def test_floating_values_integer_dtype(self):

# with NaNs, we go through a different path with a different warning
arr[0, 0] = np.nan
msg = "passing float-dtype values containing NaN"
with tm.assert_produces_warning(FutureWarning, match=msg):
msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
with pytest.raises(IntCastingNaNError, match=msg):
DataFrame(arr, dtype="i8")
with tm.assert_produces_warning(FutureWarning, match=msg):
with pytest.raises(IntCastingNaNError, match=msg):
Series(arr[0], dtype="i8")
# The future (raising) behavior matches what we would get via astype:
msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
with pytest.raises(ValueError, match=msg):
with pytest.raises(IntCastingNaNError, match=msg):
DataFrame(arr).astype("i8")
with pytest.raises(ValueError, match=msg):
with pytest.raises(IntCastingNaNError, match=msg):
Series(arr[0]).astype("i8")


Expand Down
37 changes: 18 additions & 19 deletions pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
lib,
)
from pandas.compat import is_numpy_dev
from pandas.errors import IntCastingNaNError
import pandas.util._test_decorators as td

from pandas.core.dtypes.common import (
Expand Down Expand Up @@ -670,10 +671,9 @@ def test_constructor_sanitize(self):
s = Series(np.array([1.0, 1.0, 8.0]), dtype="i8")
assert s.dtype == np.dtype("i8")

msg = "float-dtype values containing NaN and an integer dtype"
with tm.assert_produces_warning(FutureWarning, match=msg):
ser = Series(np.array([1.0, 1.0, np.nan]), copy=True, dtype="i8")
assert ser.dtype == np.dtype("f8")
msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
with pytest.raises(IntCastingNaNError, match=msg):
Series(np.array([1.0, 1.0, np.nan]), copy=True, dtype="i8")

def test_constructor_copy(self):
# GH15125
Expand Down Expand Up @@ -809,18 +809,17 @@ def test_constructor_floating_data_int_dtype(self, frame_or_series):
res = frame_or_series(list(arr), dtype="i8")
tm.assert_equal(res, expected)

# When we have NaNs, we silently ignore the integer dtype
# pre-2.0, when we had NaNs, we silently ignored the integer dtype
arr[0] = np.nan
expected = frame_or_series(arr)
msg = "passing float-dtype values containing NaN and an integer dtype"
with tm.assert_produces_warning(FutureWarning, match=msg):
obj = frame_or_series(arr, dtype="i8")
tm.assert_equal(obj, expected)

with tm.assert_produces_warning(FutureWarning, match=msg):
msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
with pytest.raises(IntCastingNaNError, match=msg):
frame_or_series(arr, dtype="i8")

with pytest.raises(IntCastingNaNError, match=msg):
# same behavior if we pass list instead of the ndarray
obj = frame_or_series(list(arr), dtype="i8")
tm.assert_equal(obj, expected)
frame_or_series(list(arr), dtype="i8")

# float array that can be losslessly cast to integers
arr = np.array([1.0, 2.0], dtype="float64")
Expand Down Expand Up @@ -854,13 +853,13 @@ def test_constructor_invalid_coerce_ints_with_float_nan(self, any_int_numpy_dtyp
# Updated: make sure we treat this list the same as we would treat the
# equivalent ndarray
vals = [1, 2, np.nan]
msg = "In a future version, passing float-dtype values containing NaN"
with tm.assert_produces_warning(FutureWarning, match=msg):
res = Series(vals, dtype=any_int_numpy_dtype)
with tm.assert_produces_warning(FutureWarning, match=msg):
expected = Series(np.array(vals), dtype=any_int_numpy_dtype)
tm.assert_series_equal(res, expected)
assert np.isnan(expected.iloc[-1])
# pre-2.0 this would return with a float dtype, in 2.0 we raise

msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
with pytest.raises(IntCastingNaNError, match=msg):
Series(vals, dtype=any_int_numpy_dtype)
with pytest.raises(IntCastingNaNError, match=msg):
Series(np.array(vals), dtype=any_int_numpy_dtype)

def test_constructor_dtype_no_cast(self):
# see gh-1572
Expand Down
13 changes: 7 additions & 6 deletions pandas/tests/test_downstream.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import numpy as np
import pytest

from pandas.errors import IntCastingNaNError
import pandas.util._test_decorators as td

import pandas as pd
Expand Down Expand Up @@ -100,13 +101,13 @@ def test_construct_dask_float_array_int_dtype_match_ndarray():
expected = Series(arr, dtype="i8")
tm.assert_series_equal(res, expected)

msg = "In a future version, passing float-dtype values containing NaN"
msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
arr[2] = np.nan
with tm.assert_produces_warning(FutureWarning, match=msg):
res = Series(darr, dtype="i8")
with tm.assert_produces_warning(FutureWarning, match=msg):
expected = Series(arr, dtype="i8")
tm.assert_series_equal(res, expected)
with pytest.raises(IntCastingNaNError, match=msg):
Series(darr, dtype="i8")
# which is the same as we get with a numpy input
with pytest.raises(IntCastingNaNError, match=msg):
Series(arr, dtype="i8")


def test_xarray(df):
Expand Down

0 comments on commit f7303b1

Please sign in to comment.