From c8276c776b2c52cde95f771ab0bd01722ef7e516 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 10 Dec 2021 09:56:01 +0100 Subject: [PATCH 1/9] REGR: allow reindexing datetimelike with upcast / raise deprecation warning --- pandas/core/array_algos/take.py | 6 ++++++ pandas/tests/frame/methods/test_reindex.py | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/pandas/core/array_algos/take.py b/pandas/core/array_algos/take.py index c4b8f833f4ad3..bf874606f4a89 100644 --- a/pandas/core/array_algos/take.py +++ b/pandas/core/array_algos/take.py @@ -94,6 +94,12 @@ def take_nd( """ if fill_value is lib.no_default: fill_value = na_value_for_dtype(arr.dtype, compat=False) + elif arr.dtype.kind in "mM": + dtype, fill_value = maybe_promote(arr.dtype, fill_value) + if arr.dtype != dtype: + # EA.take is strict about returning a new object of the same type + # so for that case cast upfront + arr = arr.astype(dtype) if not isinstance(arr, np.ndarray): # i.e. ExtensionArray, diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index bee8025275b42..c9acfd46d4932 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -113,6 +113,11 @@ def test_reindex_date_fill_value(self): ) tm.assert_frame_equal(res, expected) + # only reindexing rows + with tm.assert_produces_warning(FutureWarning): + res = df.reindex(index=range(4), fill_value=fv) + tm.assert_frame_equal(res, expected[["A", "B"]]) + # same with a datetime-castable str res = df.reindex( index=range(4), columns=["A", "B", "C"], fill_value="2016-01-01" From f35a3ef18bda046f5c68a64e3088c25860711484 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 10 Dec 2021 11:33:20 +0100 Subject: [PATCH 2/9] add test for GH-42921 --- pandas/tests/series/methods/test_reindex.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/pandas/tests/series/methods/test_reindex.py b/pandas/tests/series/methods/test_reindex.py index 4350a5d9ac989..4edc11b2e6054 100644 --- a/pandas/tests/series/methods/test_reindex.py +++ b/pandas/tests/series/methods/test_reindex.py @@ -9,6 +9,8 @@ Period, PeriodIndex, Series, + Timedelta, + Timestamp, date_range, isna, ) @@ -300,6 +302,21 @@ def test_reindex_fill_value(): tm.assert_series_equal(result, expected) +@pytest.mark.parametrize("dtype", ["datetime64[ns]", "timedelta64[ns]"]) +@pytest.mark.parametrize("fill_value", ["string", 0, Timedelta(0)]) +def test_reindex_fill_value_datetimelike_upcast(dtype, fill_value): + # https://github.com/pandas-dev/pandas/issues/42921 + if dtype == "timedelta64[ns]" and fill_value == Timedelta(0): + # use the scalar that is not compatible with the dtype for this test + fill_value = Timestamp(0) + + ser = Series([NaT], dtype=dtype) + + result = ser.reindex([0, 1], fill_value=fill_value) + expected = Series([None, fill_value], index=[0, 1], dtype=object) + tm.assert_series_equal(result, expected) + + def test_reindex_datetimeindexes_tz_naive_and_aware(): # GH 8306 idx = date_range("20131101", tz="America/Chicago", periods=7) From 92c1fe4288c4aed9074d818c6b709e8a0b53a7c7 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 10 Dec 2021 11:37:51 +0100 Subject: [PATCH 3/9] add whatsnew --- doc/source/whatsnew/v1.3.5.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.3.5.rst b/doc/source/whatsnew/v1.3.5.rst index 49d37eff63323..d155d23098f69 100644 --- a/doc/source/whatsnew/v1.3.5.rst +++ b/doc/source/whatsnew/v1.3.5.rst @@ -21,6 +21,7 @@ Fixed regressions - Fixed performance regression in :func:`read_csv` (:issue:`44106`) - Fixed regression in :meth:`Series.duplicated` and :meth:`Series.drop_duplicates` when Series has :class:`Categorical` dtype with boolean categories (:issue:`44351`) - Fixed regression in :meth:`.GroupBy.sum` with ``timedelta64[ns]`` dtype containing ``NaT`` failing to treat that value as NA (:issue:`42659`) +- Fixed regression in :meth:`~Series.reindex` raising an error when using an incompatible fill value with a datetime-like dtype (or not raising a deprecation warning for using a ``datetime.date`` as fill value) (:issue:`42921`) - .. --------------------------------------------------------------------------- From 2fd957ce3cd7340ae8a948d2bca1af43f1313532 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 11 Dec 2021 11:21:30 +0100 Subject: [PATCH 4/9] maybe_promote is only for np.dtype --- pandas/core/array_algos/take.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/array_algos/take.py b/pandas/core/array_algos/take.py index bf874606f4a89..188725f003f1e 100644 --- a/pandas/core/array_algos/take.py +++ b/pandas/core/array_algos/take.py @@ -94,7 +94,7 @@ def take_nd( """ if fill_value is lib.no_default: fill_value = na_value_for_dtype(arr.dtype, compat=False) - elif arr.dtype.kind in "mM": + elif isinstance(arr.dtype, np.dtype) and arr.dtype.kind in "mM": dtype, fill_value = maybe_promote(arr.dtype, fill_value) if arr.dtype != dtype: # EA.take is strict about returning a new object of the same type From c4b9ba77d9d0f53b3ba152ccafa8c55d9686e831 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 11 Dec 2021 15:10:37 +0100 Subject: [PATCH 5/9] skip array manager --- pandas/tests/frame/methods/test_reindex.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index c9acfd46d4932..b1cabf0a06752 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -8,6 +8,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + import pandas as pd from pandas import ( Categorical, @@ -97,6 +99,7 @@ def test_reindex_copies(self): result2 = df.reindex(columns=cols, index=df.index, copy=True) assert not np.shares_memory(result2[0]._values, df[0]._values) + @td.skip_array_manager_not_yet_implemented def test_reindex_date_fill_value(self): # passing date to dt64 is deprecated arr = date_range("2016-01-01", periods=6).values.reshape(3, 2) From db189a716aa7939fd78b21fc55c90185783cd8ef Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 14 Dec 2021 17:35:22 +0100 Subject: [PATCH 6/9] move whatsnew --- doc/source/whatsnew/v1.3.5.rst | 1 - doc/source/whatsnew/v1.4.0.rst | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.5.rst b/doc/source/whatsnew/v1.3.5.rst index 499f8cf8bd12b..c53707610db37 100644 --- a/doc/source/whatsnew/v1.3.5.rst +++ b/doc/source/whatsnew/v1.3.5.rst @@ -21,7 +21,6 @@ Fixed regressions - Fixed performance regression in :func:`read_csv` (:issue:`44106`) - Fixed regression in :meth:`Series.duplicated` and :meth:`Series.drop_duplicates` when Series has :class:`Categorical` dtype with boolean categories (:issue:`44351`) - Fixed regression in :meth:`.GroupBy.sum` with ``timedelta64[ns]`` dtype containing ``NaT`` failing to treat that value as NA (:issue:`42659`) -- Fixed regression in :meth:`~Series.reindex` raising an error when using an incompatible fill value with a datetime-like dtype (or not raising a deprecation warning for using a ``datetime.date`` as fill value) (:issue:`42921`) - Fixed regression in :meth:`.RollingGroupby.cov` and :meth:`.RollingGroupby.corr` when ``other`` had the same shape as each group would incorrectly return superfluous groups in the result (:issue:`42915`) - Fixed regression where a single column ``np.matrix`` was no longer coerced to a 1d ``np.ndarray`` when added to a :class:`DataFrame` (:issue:`42376`) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 916bcf3db9a4a..294ec46b4bb5c 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -623,6 +623,7 @@ Datetimelike - Bug in adding a ``np.timedelta64`` object to a :class:`BusinessDay` or :class:`CustomBusinessDay` object incorrectly raising (:issue:`44532`) - Bug in :meth:`Index.insert` for inserting ``np.datetime64``, ``np.timedelta64`` or ``tuple`` into :class:`Index` with ``dtype='object'`` with negative loc adding ``None`` and replacing existing value (:issue:`44509`) - Bug in :meth:`Series.mode` with ``DatetimeTZDtype`` incorrectly returning timezone-naive and ``PeriodDtype`` incorrectly raising (:issue:`41927`) +- Fixed regression in :meth:`~Series.reindex` raising an error when using an incompatible fill value with a datetime-like dtype (or not raising a deprecation warning for using a ``datetime.date`` as fill value) (:issue:`42921`) - Timedelta From 872ca457fd8863d3137d10ad096b4aaff7c3e1d9 Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 1 Jan 2022 00:24:23 +0100 Subject: [PATCH 7/9] Try fix error --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index f18f1c760ca28..21ca186d61b62 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -575,7 +575,7 @@ def _maybe_promote(dtype: np.dtype, fill_value=np.nan): except (ValueError, TypeError): pass else: - if fv.tz is None: + if isna(fv) or fv.tz is None: return dtype, fv.asm8 return np.dtype("object"), fill_value From 1139b5f57420f4467853ea9d368ea9d111631da3 Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 1 Jan 2022 23:57:40 +0100 Subject: [PATCH 8/9] Adjust test --- pandas/tests/series/methods/test_reindex.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/tests/series/methods/test_reindex.py b/pandas/tests/series/methods/test_reindex.py index 4edc11b2e6054..d1f329f7a2d68 100644 --- a/pandas/tests/series/methods/test_reindex.py +++ b/pandas/tests/series/methods/test_reindex.py @@ -304,8 +304,11 @@ def test_reindex_fill_value(): @pytest.mark.parametrize("dtype", ["datetime64[ns]", "timedelta64[ns]"]) @pytest.mark.parametrize("fill_value", ["string", 0, Timedelta(0)]) -def test_reindex_fill_value_datetimelike_upcast(dtype, fill_value): +def test_reindex_fill_value_datetimelike_upcast(dtype, fill_value, using_array_manager): # https://github.com/pandas-dev/pandas/issues/42921 + if using_array_manager: + pytest.skip("Array manager does not promote dtype, hence we fail") + if dtype == "timedelta64[ns]" and fill_value == Timedelta(0): # use the scalar that is not compatible with the dtype for this test fill_value = Timestamp(0) From 8eb4d5950b805d301f637146f47b2b81d8bfb00f Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sun, 16 Jan 2022 18:23:04 +0100 Subject: [PATCH 9/9] fix linting --- pandas/tests/frame/methods/test_reindex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index 4f35b9bdc74c4..8575e7895ae5a 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -7,8 +7,8 @@ import numpy as np import pytest -import pandas.util._test_decorators as td from pandas._libs.tslibs.timezones import dateutil_gettz as gettz +import pandas.util._test_decorators as td import pandas as pd from pandas import (