Skip to content

Commit

Permalink
DEPR: Series(dt64_naive, dtype=dt64tz) (#49242)
Browse files Browse the repository at this point in the history
* DEPR: Series(dt64_naive, dtype=dt64tz)

* mypy fixup
  • Loading branch information
jbrockmendel authored Oct 25, 2022
1 parent 6a1ae42 commit 74c4cd1
Show file tree
Hide file tree
Showing 6 changed files with 50 additions and 80 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ Removal of prior version deprecations/changes
- Removed deprecated :meth:`Index.is_mixed`, check ``index.inferred_type`` directly instead (:issue:`32922`)
- Removed deprecated :func:`pandas.api.types.is_categorical`; use :func:`pandas.api.types.is_categorical_dtype` instead (:issue:`33385`)
- Removed deprecated :meth:`Index.asi8` (:issue:`37877`)
- Enforced deprecation changing behavior when passing ``datetime64[ns]`` dtype data and timezone-aware dtype to :class:`Series`, interpreting the values as wall-times instead of UTC times, matching :class:`DatetimeIndex` behavior (:issue:`41662`)
- Removed deprecated :meth:`DataFrame._AXIS_NUMBERS`, :meth:`DataFrame._AXIS_NAMES`, :meth:`Series._AXIS_NUMBERS`, :meth:`Series._AXIS_NAMES` (:issue:`33637`)
- Removed deprecated :meth:`Index.to_native_types`, use ``obj.astype(str)`` instead (:issue:`36418`)
- Removed deprecated :meth:`Series.iteritems`, :meth:`DataFrame.iteritems`, use ``obj.items`` instead (:issue:`45321`)
Expand Down
5 changes: 5 additions & 0 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2123,10 +2123,15 @@ def _sequence_to_dt64ns(
# Convert tz-naive to UTC
# TODO: if tz is UTC, are there situations where we *don't* want a
# copy? tz_localize_to_utc always makes one.
shape = data.shape
if data.ndim > 1:
data = data.ravel()

data = tzconversion.tz_localize_to_utc(
data.view("i8"), tz, ambiguous=ambiguous, creso=data_unit
)
data = data.view(new_dtype)
data = data.reshape(shape)

assert data.dtype == new_dtype, data.dtype
result = data
Expand Down
15 changes: 1 addition & 14 deletions pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,7 @@
is_object_dtype,
is_timedelta64_ns_dtype,
)
from pandas.core.dtypes.dtypes import (
DatetimeTZDtype,
PandasDtype,
)
from pandas.core.dtypes.dtypes import PandasDtype
from pandas.core.dtypes.generic import (
ABCExtensionArray,
ABCIndex,
Expand Down Expand Up @@ -800,16 +797,6 @@ def _try_cast(

elif isinstance(dtype, ExtensionDtype):
# create an extension array from its dtype
if isinstance(dtype, DatetimeTZDtype):
# We can't go through _from_sequence because it handles dt64naive
# data differently; _from_sequence treats naive as wall times,
# while maybe_cast_to_datetime treats it as UTC
# see test_maybe_promote_any_numpy_dtype_with_datetimetz
# TODO(2.0): with deprecations enforced, should be able to remove
# special case.
return maybe_cast_to_datetime(arr, dtype)
# TODO: copy?

array_type = dtype.construct_array_type()._from_sequence
subarr = array_type(arr, dtype=dtype, copy=copy)
return subarr
Expand Down
86 changes: 27 additions & 59 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@
is_complex,
is_complex_dtype,
is_datetime64_dtype,
is_datetime64tz_dtype,
is_dtype_equal,
is_extension_array_dtype,
is_float,
Expand Down Expand Up @@ -1314,13 +1313,15 @@ def try_timedelta(v: np.ndarray) -> np.ndarray:


def maybe_cast_to_datetime(
value: ExtensionArray | np.ndarray | list, dtype: DtypeObj | None
value: ExtensionArray | np.ndarray | list, dtype: np.dtype | None
) -> ExtensionArray | np.ndarray:
"""
try to cast the array/value to a datetimelike dtype, converting float
nan to iNaT
We allow a list *only* when dtype is not None.
Caller is responsible for handling ExtensionDtype cases.
"""
from pandas.core.arrays.datetimes import sequence_to_datetimes
from pandas.core.arrays.timedeltas import TimedeltaArray
Expand All @@ -1332,18 +1333,22 @@ def maybe_cast_to_datetime(
# TODO: _from_sequence would raise ValueError in cases where
# _ensure_nanosecond_dtype raises TypeError
dtype = cast(np.dtype, dtype)
dtype = _ensure_nanosecond_dtype(dtype)
# Incompatible types in assignment (expression has type "Union[dtype[Any],
# ExtensionDtype]", variable has type "Optional[dtype[Any]]")
dtype = _ensure_nanosecond_dtype(dtype) # type: ignore[assignment]
res = TimedeltaArray._from_sequence(value, dtype=dtype)
return res

if dtype is not None:
is_datetime64 = is_datetime64_dtype(dtype)
is_datetime64tz = is_datetime64tz_dtype(dtype)

vdtype = getattr(value, "dtype", None)

if is_datetime64 or is_datetime64tz:
dtype = _ensure_nanosecond_dtype(dtype)
if is_datetime64:
# Incompatible types in assignment (expression has type
# "Union[dtype[Any], ExtensionDtype]", variable has type
# "Optional[dtype[Any]]")
dtype = _ensure_nanosecond_dtype(dtype) # type: ignore[assignment]

value = np.array(value, copy=False)

Expand All @@ -1352,59 +1357,22 @@ def maybe_cast_to_datetime(
_disallow_mismatched_datetimelike(value, dtype)

try:
if is_datetime64:
dta = sequence_to_datetimes(value)
# GH 25843: Remove tz information since the dtype
# didn't specify one

if dta.tz is not None:
raise ValueError(
"Cannot convert timezone-aware data to "
"timezone-naive dtype. Use "
"pd.Series(values).dt.tz_localize(None) instead."
)

# TODO(2.0): Do this astype in sequence_to_datetimes to
# avoid potential extra copy?
dta = dta.astype(dtype, copy=False)
value = dta
elif is_datetime64tz:
dtype = cast(DatetimeTZDtype, dtype)
# The string check can be removed once issue #13712
# is solved. String data that is passed with a
# datetime64tz is assumed to be naive which should
# be localized to the timezone.
is_dt_string = is_string_dtype(value.dtype)
dta = sequence_to_datetimes(value)
if dta.tz is not None:
value = dta.astype(dtype, copy=False)
elif is_dt_string:
# Strings here are naive, so directly localize
# equiv: dta.astype(dtype) # though deprecated

value = dta.tz_localize(dtype.tz)
else:
# Numeric values are UTC at this point,
# so localize and convert
# equiv: Series(dta).astype(dtype) # though deprecated
if getattr(vdtype, "kind", None) == "M":
# GH#24559, GH#33401 deprecate behavior inconsistent
# with DatetimeArray/DatetimeIndex
warnings.warn(
"In a future version, constructing a Series "
"from datetime64[ns] data and a "
"DatetimeTZDtype will interpret the data "
"as wall-times instead of "
"UTC times, matching the behavior of "
"DatetimeIndex. To treat the data as UTC "
"times, use pd.Series(data).dt"
".tz_localize('UTC').tz_convert(dtype.tz) "
"or pd.Series(data.view('int64'), dtype=dtype)",
FutureWarning,
stacklevel=find_stack_level(),
)

value = dta.tz_localize("UTC").tz_convert(dtype.tz)
dta = sequence_to_datetimes(value)
# GH 25843: Remove tz information since the dtype
# didn't specify one

if dta.tz is not None:
raise ValueError(
"Cannot convert timezone-aware data to "
"timezone-naive dtype. Use "
"pd.Series(values).dt.tz_localize(None) instead."
)

# TODO(2.0): Do this astype in sequence_to_datetimes to
# avoid potential extra copy?
dta = dta.astype(dtype, copy=False)
value = dta

except OutOfBoundsDatetime:
raise
except ParserError:
Expand Down
11 changes: 10 additions & 1 deletion pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
is_named_tuple,
is_object_dtype,
)
from pandas.core.dtypes.dtypes import ExtensionDtype
from pandas.core.dtypes.generic import (
ABCDataFrame,
ABCSeries,
Expand Down Expand Up @@ -1054,7 +1055,15 @@ def _convert_object_array(
def convert(arr):
if dtype != np.dtype("O"):
arr = lib.maybe_convert_objects(arr)
arr = maybe_cast_to_datetime(arr, dtype)

if isinstance(dtype, ExtensionDtype):
# TODO: test(s) that get here
# TODO: try to de-duplicate this convert function with
# core.construction functions
cls = dtype.construct_array_type()
arr = cls._from_sequence(arr, dtype=dtype, copy=False)
else:
arr = maybe_cast_to_datetime(arr, dtype)
return arr

arrays = [convert(arr) for arr in content]
Expand Down
12 changes: 6 additions & 6 deletions pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1241,14 +1241,14 @@ def test_construction_consistency(self):
result = Series(ser.dt.tz_convert("UTC"), dtype=ser.dtype)
tm.assert_series_equal(result, ser)

msg = "will interpret the data as wall-times"
with tm.assert_produces_warning(FutureWarning, match=msg):
# deprecate behavior inconsistent with DatetimeIndex GH#33401
result = Series(ser.values, dtype=ser.dtype)
tm.assert_series_equal(result, ser)
# Pre-2.0 dt64 values were treated as utc, which was inconsistent
# with DatetimeIndex, which treats them as wall times, see GH#33401
result = Series(ser.values, dtype=ser.dtype)
expected = Series(ser.values).dt.tz_localize(ser.dtype.tz)
tm.assert_series_equal(result, expected)

with tm.assert_produces_warning(None):
# one suggested alternative to the deprecated usage
# one suggested alternative to the deprecated (changed in 2.0) usage
middle = Series(ser.values).dt.tz_localize("UTC")
result = middle.dt.tz_convert(ser.dtype.tz)
tm.assert_series_equal(result, ser)
Expand Down

0 comments on commit 74c4cd1

Please sign in to comment.