From 6fa02d7067f247c1059b9b0b1643e54abd32d884 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Mon, 10 Apr 2023 18:22:58 +0200 Subject: [PATCH 1/5] BUG: DataFrame reductions casting ts resolution always to nanoseconds --- doc/source/whatsnew/v2.0.1.rst | 1 + pandas/core/dtypes/cast.py | 4 ++-- pandas/tests/frame/test_reductions.py | 34 +++++++++++++++++++++++++++ 3 files changed, 37 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.0.1.rst b/doc/source/whatsnew/v2.0.1.rst index caf237fb15163..17f2809a0f3f7 100644 --- a/doc/source/whatsnew/v2.0.1.rst +++ b/doc/source/whatsnew/v2.0.1.rst @@ -26,6 +26,7 @@ Bug fixes - Bug in :meth:`Series.describe` not returning :class:`ArrowDtype` with ``pyarrow.float64`` type with numeric data (:issue:`52427`) - Fixed segfault in :meth:`Series.to_numpy` with ``null[pyarrow]`` dtype (:issue:`52443`) - Bug in :func:`pandas.testing.assert_series_equal` where ``check_dtype=False`` would still raise for datetime or timedelta types with different resolutions (:issue:`52449`) +- Bug in :meth:`DataFrame.max` and related casting different :class:`Timestamp` resolutions always to nanoseconds (:issue:`52524`) .. --------------------------------------------------------------------------- .. _whatsnew_201.other: diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index f1945a2eb32ab..7fc6fd7fff9b5 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1408,9 +1408,9 @@ def find_common_type(types): # take lowest unit if all(is_datetime64_dtype(t) for t in types): - return np.dtype("datetime64[ns]") + return np.dtype(max(types)) if all(is_timedelta64_dtype(t) for t in types): - return np.dtype("timedelta64[ns]") + return np.dtype(max(types)) # don't mix bool / int or float or complex # this is different from numpy, which casts bool with float/int as int diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 28809e2ecb788..94a387db0635b 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -1508,6 +1508,40 @@ def test_reductions_skipna_none_raises( with pytest.raises(ValueError, match=msg): getattr(obj, all_reductions)(skipna=None) + def test_reduction_timestamp_smallest_unit(self): + # GH#52524 + df = DataFrame( + { + "a": Series([Timestamp("2019-12-31")], dtype="datetime64[s]"), + "b": Series( + [Timestamp("2019-12-31 00:00:00.123")], dtype="datetime64[ms]" + ), + } + ) + result = df.max() + expected = Series( + [Timestamp("2019-12-31"), Timestamp("2019-12-31 00:00:00.123")], + dtype="datetime64[ms]", + index=["a", "b"], + ) + tm.assert_series_equal(result, expected) + + def test_reduction_timedelta_smallest_unit(self): + # GH#52524 + df = DataFrame( + { + "a": Series([pd.Timedelta("1 days")], dtype="timedelta64[s]"), + "b": Series([pd.Timedelta("1 days")], dtype="timedelta64[ms]"), + } + ) + result = df.max() + expected = Series( + [pd.Timedelta("1 days"), pd.Timedelta("1 days")], + dtype="timedelta64[ms]", + index=["a", "b"], + ) + tm.assert_series_equal(result, expected) + class TestNuisanceColumns: @pytest.mark.parametrize("method", ["any", "all"]) From 33a3d5f98c4fbfcf305c97cca48b6e98822a6401 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Mon, 10 Apr 2023 19:40:17 +0200 Subject: [PATCH 2/5] Lets see what breaks --- pandas/core/internals/array_manager.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 203fc9c7f78cb..c33a5bf9cd211 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -14,7 +14,6 @@ import numpy as np from pandas._libs import ( - NaT, algos as libalgos, lib, ) @@ -981,13 +980,8 @@ def reduce(self, func: Callable) -> Self: result_arrays: list[np.ndarray] = [] for i, arr in enumerate(self.arrays): res = func(arr, axis=0) - - # TODO NaT doesn't preserve dtype, so we need to ensure to create - # a timedelta result array if original was timedelta - # what if datetime results in timedelta? (eg std) - dtype = arr.dtype if res is NaT else None result_arrays.append( - sanitize_array([res], None, dtype=dtype) # type: ignore[arg-type] + sanitize_array([res], None, dtype=arr.dtype) # type: ignore[arg-type] ) index = Index._simple_new(np.array([None], dtype=object)) # placeholder From 5ab0a275c75bb79f4b35be52277b2cc9688a7d78 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Mon, 10 Apr 2023 21:13:50 +0200 Subject: [PATCH 3/5] Revert --- pandas/core/internals/array_manager.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index c33a5bf9cd211..203fc9c7f78cb 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -14,6 +14,7 @@ import numpy as np from pandas._libs import ( + NaT, algos as libalgos, lib, ) @@ -980,8 +981,13 @@ def reduce(self, func: Callable) -> Self: result_arrays: list[np.ndarray] = [] for i, arr in enumerate(self.arrays): res = func(arr, axis=0) + + # TODO NaT doesn't preserve dtype, so we need to ensure to create + # a timedelta result array if original was timedelta + # what if datetime results in timedelta? (eg std) + dtype = arr.dtype if res is NaT else None result_arrays.append( - sanitize_array([res], None, dtype=arr.dtype) # type: ignore[arg-type] + sanitize_array([res], None, dtype=dtype) # type: ignore[arg-type] ) index = Index._simple_new(np.array([None], dtype=object)) # placeholder From a63bc59e51ffb29964d8b64aa39f2952fcbb7f9a Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Mon, 10 Apr 2023 21:14:10 +0200 Subject: [PATCH 4/5] Skip for array manager --- pandas/tests/frame/test_reductions.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 94a387db0635b..731c72ee8d39d 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -1508,6 +1508,7 @@ def test_reductions_skipna_none_raises( with pytest.raises(ValueError, match=msg): getattr(obj, all_reductions)(skipna=None) + @td.skip_array_manager_invalid_test def test_reduction_timestamp_smallest_unit(self): # GH#52524 df = DataFrame( @@ -1526,6 +1527,7 @@ def test_reduction_timestamp_smallest_unit(self): ) tm.assert_series_equal(result, expected) + @td.skip_array_manager_invalid_test def test_reduction_timedelta_smallest_unit(self): # GH#52524 df = DataFrame( From 73a5bcf8463759b0cb567fd2769b3ee668fc6bd8 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Tue, 11 Apr 2023 00:40:14 +0200 Subject: [PATCH 5/5] Switch --- pandas/tests/frame/test_reductions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 731c72ee8d39d..49d1106c424fd 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -1527,7 +1527,7 @@ def test_reduction_timestamp_smallest_unit(self): ) tm.assert_series_equal(result, expected) - @td.skip_array_manager_invalid_test + @td.skip_array_manager_not_yet_implemented def test_reduction_timedelta_smallest_unit(self): # GH#52524 df = DataFrame(