Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: Series(dt64_naive, dtype=dt64tz) #49242

Merged
merged 3 commits into from
Oct 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ Removal of prior version deprecations/changes
- Removed deprecated :meth:`Index.is_mixed`, check ``index.inferred_type`` directly instead (:issue:`32922`)
- Removed deprecated :func:`pandas.api.types.is_categorical`; use :func:`pandas.api.types.is_categorical_dtype` instead (:issue:`33385`)
- Removed deprecated :meth:`Index.asi8` (:issue:`37877`)
- Enforced deprecation changing behavior when passing ``datetime64[ns]`` dtype data and timezone-aware dtype to :class:`Series`, interpreting the values as wall-times instead of UTC times, matching :class:`DatetimeIndex` behavior (:issue:`41662`)
- Removed deprecated :meth:`DataFrame._AXIS_NUMBERS`, :meth:`DataFrame._AXIS_NAMES`, :meth:`Series._AXIS_NUMBERS`, :meth:`Series._AXIS_NAMES` (:issue:`33637`)
- Removed deprecated :meth:`Index.to_native_types`, use ``obj.astype(str)`` instead (:issue:`36418`)
- Removed deprecated :meth:`Series.iteritems`, :meth:`DataFrame.iteritems`, use ``obj.items`` instead (:issue:`45321`)
Expand Down
5 changes: 5 additions & 0 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2123,10 +2123,15 @@ def _sequence_to_dt64ns(
# Convert tz-naive to UTC
# TODO: if tz is UTC, are there situations where we *don't* want a
# copy? tz_localize_to_utc always makes one.
shape = data.shape
if data.ndim > 1:
data = data.ravel()

data = tzconversion.tz_localize_to_utc(
data.view("i8"), tz, ambiguous=ambiguous, creso=data_unit
)
data = data.view(new_dtype)
data = data.reshape(shape)

assert data.dtype == new_dtype, data.dtype
result = data
Expand Down
15 changes: 1 addition & 14 deletions pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,7 @@
is_object_dtype,
is_timedelta64_ns_dtype,
)
from pandas.core.dtypes.dtypes import (
DatetimeTZDtype,
PandasDtype,
)
from pandas.core.dtypes.dtypes import PandasDtype
from pandas.core.dtypes.generic import (
ABCExtensionArray,
ABCIndex,
Expand Down Expand Up @@ -800,16 +797,6 @@ def _try_cast(

elif isinstance(dtype, ExtensionDtype):
# create an extension array from its dtype
if isinstance(dtype, DatetimeTZDtype):
# We can't go through _from_sequence because it handles dt64naive
# data differently; _from_sequence treats naive as wall times,
# while maybe_cast_to_datetime treats it as UTC
# see test_maybe_promote_any_numpy_dtype_with_datetimetz
# TODO(2.0): with deprecations enforced, should be able to remove
# special case.
return maybe_cast_to_datetime(arr, dtype)
# TODO: copy?

array_type = dtype.construct_array_type()._from_sequence
subarr = array_type(arr, dtype=dtype, copy=copy)
return subarr
Expand Down
86 changes: 27 additions & 59 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@
is_complex,
is_complex_dtype,
is_datetime64_dtype,
is_datetime64tz_dtype,
is_dtype_equal,
is_extension_array_dtype,
is_float,
Expand Down Expand Up @@ -1314,13 +1313,15 @@ def try_timedelta(v: np.ndarray) -> np.ndarray:


def maybe_cast_to_datetime(
value: ExtensionArray | np.ndarray | list, dtype: DtypeObj | None
value: ExtensionArray | np.ndarray | list, dtype: np.dtype | None
) -> ExtensionArray | np.ndarray:
"""
try to cast the array/value to a datetimelike dtype, converting float
nan to iNaT

We allow a list *only* when dtype is not None.

Caller is responsible for handling ExtensionDtype cases.
"""
from pandas.core.arrays.datetimes import sequence_to_datetimes
from pandas.core.arrays.timedeltas import TimedeltaArray
Expand All @@ -1332,18 +1333,22 @@ def maybe_cast_to_datetime(
# TODO: _from_sequence would raise ValueError in cases where
# _ensure_nanosecond_dtype raises TypeError
dtype = cast(np.dtype, dtype)
dtype = _ensure_nanosecond_dtype(dtype)
# Incompatible types in assignment (expression has type "Union[dtype[Any],
# ExtensionDtype]", variable has type "Optional[dtype[Any]]")
dtype = _ensure_nanosecond_dtype(dtype) # type: ignore[assignment]
res = TimedeltaArray._from_sequence(value, dtype=dtype)
return res

if dtype is not None:
is_datetime64 = is_datetime64_dtype(dtype)
is_datetime64tz = is_datetime64tz_dtype(dtype)

vdtype = getattr(value, "dtype", None)

if is_datetime64 or is_datetime64tz:
dtype = _ensure_nanosecond_dtype(dtype)
if is_datetime64:
# Incompatible types in assignment (expression has type
# "Union[dtype[Any], ExtensionDtype]", variable has type
# "Optional[dtype[Any]]")
dtype = _ensure_nanosecond_dtype(dtype) # type: ignore[assignment]

value = np.array(value, copy=False)

Expand All @@ -1352,59 +1357,22 @@ def maybe_cast_to_datetime(
_disallow_mismatched_datetimelike(value, dtype)

try:
if is_datetime64:
dta = sequence_to_datetimes(value)
# GH 25843: Remove tz information since the dtype
# didn't specify one

if dta.tz is not None:
raise ValueError(
"Cannot convert timezone-aware data to "
"timezone-naive dtype. Use "
"pd.Series(values).dt.tz_localize(None) instead."
)

# TODO(2.0): Do this astype in sequence_to_datetimes to
# avoid potential extra copy?
dta = dta.astype(dtype, copy=False)
value = dta
elif is_datetime64tz:
dtype = cast(DatetimeTZDtype, dtype)
# The string check can be removed once issue #13712
# is solved. String data that is passed with a
# datetime64tz is assumed to be naive which should
# be localized to the timezone.
is_dt_string = is_string_dtype(value.dtype)
dta = sequence_to_datetimes(value)
if dta.tz is not None:
value = dta.astype(dtype, copy=False)
elif is_dt_string:
# Strings here are naive, so directly localize
# equiv: dta.astype(dtype) # though deprecated

value = dta.tz_localize(dtype.tz)
else:
# Numeric values are UTC at this point,
# so localize and convert
# equiv: Series(dta).astype(dtype) # though deprecated
if getattr(vdtype, "kind", None) == "M":
# GH#24559, GH#33401 deprecate behavior inconsistent
# with DatetimeArray/DatetimeIndex
warnings.warn(
"In a future version, constructing a Series "
"from datetime64[ns] data and a "
"DatetimeTZDtype will interpret the data "
"as wall-times instead of "
"UTC times, matching the behavior of "
"DatetimeIndex. To treat the data as UTC "
"times, use pd.Series(data).dt"
".tz_localize('UTC').tz_convert(dtype.tz) "
"or pd.Series(data.view('int64'), dtype=dtype)",
FutureWarning,
stacklevel=find_stack_level(),
)

value = dta.tz_localize("UTC").tz_convert(dtype.tz)
dta = sequence_to_datetimes(value)
# GH 25843: Remove tz information since the dtype
# didn't specify one

if dta.tz is not None:
raise ValueError(
"Cannot convert timezone-aware data to "
"timezone-naive dtype. Use "
"pd.Series(values).dt.tz_localize(None) instead."
)

# TODO(2.0): Do this astype in sequence_to_datetimes to
# avoid potential extra copy?
dta = dta.astype(dtype, copy=False)
value = dta

except OutOfBoundsDatetime:
raise
except ParserError:
Expand Down
11 changes: 10 additions & 1 deletion pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
is_named_tuple,
is_object_dtype,
)
from pandas.core.dtypes.dtypes import ExtensionDtype
from pandas.core.dtypes.generic import (
ABCDataFrame,
ABCSeries,
Expand Down Expand Up @@ -1054,7 +1055,15 @@ def _convert_object_array(
def convert(arr):
if dtype != np.dtype("O"):
arr = lib.maybe_convert_objects(arr)
arr = maybe_cast_to_datetime(arr, dtype)

if isinstance(dtype, ExtensionDtype):
# TODO: test(s) that get here
# TODO: try to de-duplicate this convert function with
# core.construction functions
cls = dtype.construct_array_type()
arr = cls._from_sequence(arr, dtype=dtype, copy=False)
else:
arr = maybe_cast_to_datetime(arr, dtype)
return arr

arrays = [convert(arr) for arr in content]
Expand Down
12 changes: 6 additions & 6 deletions pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1241,14 +1241,14 @@ def test_construction_consistency(self):
result = Series(ser.dt.tz_convert("UTC"), dtype=ser.dtype)
tm.assert_series_equal(result, ser)

msg = "will interpret the data as wall-times"
with tm.assert_produces_warning(FutureWarning, match=msg):
# deprecate behavior inconsistent with DatetimeIndex GH#33401
result = Series(ser.values, dtype=ser.dtype)
tm.assert_series_equal(result, ser)
# Pre-2.0 dt64 values were treated as utc, which was inconsistent
# with DatetimeIndex, which treats them as wall times, see GH#33401
result = Series(ser.values, dtype=ser.dtype)
expected = Series(ser.values).dt.tz_localize(ser.dtype.tz)
tm.assert_series_equal(result, expected)

with tm.assert_produces_warning(None):
# one suggested alternative to the deprecated usage
# one suggested alternative to the deprecated (changed in 2.0) usage
middle = Series(ser.values).dt.tz_localize("UTC")
result = middle.dt.tz_convert(ser.dtype.tz)
tm.assert_series_equal(result, ser)
Expand Down