Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: datetimelike.astype(int) #38544

Merged
merged 8 commits into from
Dec 23, 2020
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ Other API changes

Deprecations
~~~~~~~~~~~~

- Deprecated casting of datetimelike (``timedelta64[ns]``, ``datetime64[ns]``, ``Datetime64TZDtype``, ``PeriodDtype``) to integer dtypes, use ``values.view(...)`` instead (:issue:`38544`)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

casting -> astype

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

will update

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated+green

-
-

Expand Down
8 changes: 8 additions & 0 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,14 @@ def astype(self, dtype, copy=True):
elif is_integer_dtype(dtype):
# we deliberately ignore int32 vs. int64 here.
# See https://github.com/pandas-dev/pandas/issues/24381 for more.
warnings.warn(
f"casting {self.dtype} values to int64 with .astype(...) is "
"deprecated and will raise in a future version. "
"Use .view(...) instead.",
FutureWarning,
stacklevel=3,
)

values = self.asi8

if is_unsigned_integer_dtype(dtype):
Expand Down
18 changes: 17 additions & 1 deletion pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""
Routines for casting.
"""

from contextlib import suppress
from datetime import datetime, timedelta
from typing import (
Expand All @@ -17,6 +16,7 @@
Type,
Union,
)
import warnings

import numpy as np

Expand Down Expand Up @@ -996,6 +996,14 @@ def astype_nansafe(
if is_object_dtype(dtype):
return ints_to_pydatetime(arr.view(np.int64))
elif dtype == np.int64:
warnings.warn(
f"casting {arr.dtype} values to int64 with .astype(...) "
"is deprecated and will raise in a future version. "
"Use .view(...) instead.",
FutureWarning,
# stacklevel chosen to be correct when reached via Series.astype
stacklevel=7,
)
if isna(arr).any():
raise ValueError("Cannot convert NaT values to integer")
return arr.view(dtype)
Expand All @@ -1010,6 +1018,14 @@ def astype_nansafe(
if is_object_dtype(dtype):
return ints_to_pytimedelta(arr.view(np.int64))
elif dtype == np.int64:
warnings.warn(
f"casting {arr.dtype} values to int64 with .astype(...) "
"is deprecated and will raise in a future version. "
"Use .view(...) instead.",
FutureWarning,
# stacklevel chosen to be correct when reached via Series.astype
stacklevel=7,
)
if isna(arr).any():
raise ValueError("Cannot convert NaT values to integer")
return arr.view(dtype)
Expand Down
5 changes: 4 additions & 1 deletion pandas/core/window/ewm.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from pandas.util._decorators import Appender, Substitution, doc

from pandas.core.dtypes.common import is_datetime64_ns_dtype
from pandas.core.dtypes.missing import isna

import pandas.core.common as common
from pandas.core.util.numba_ import maybe_use_numba
Expand Down Expand Up @@ -252,7 +253,9 @@ def __init__(
raise ValueError(
"halflife must be a string or datetime.timedelta object"
)
self.times = np.asarray(times.astype(np.int64))
if isna(times).any():
raise ValueError("Cannot convert NaT values to integer")
self.times = np.asarray(times.view(np.int64))
self.halflife = Timedelta(halflife).value
# Halflife is no longer applicable when calculating COM
# But allow COM to still be calculated if the user passes other decay args
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1739,7 +1739,7 @@ def get_format_timedelta64(

If box, then show the return in quotes
"""
values_int = values.astype(np.int64)
values_int = values.view(np.int64)

consider_values = values_int != iNaT

Expand Down
6 changes: 3 additions & 3 deletions pandas/io/stata.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,15 +371,15 @@ def parse_dates_safe(dates, delta=False, year=False, days=False):
if is_datetime64_dtype(dates.dtype):
if delta:
time_delta = dates - stata_epoch
d["delta"] = time_delta._values.astype(np.int64) // 1000 # microseconds
d["delta"] = time_delta._values.view(np.int64) // 1000 # microseconds
if days or year:
date_index = DatetimeIndex(dates)
d["year"] = date_index._data.year
d["month"] = date_index._data.month
if days:
days_in_ns = dates.astype(np.int64) - to_datetime(
days_in_ns = dates.view(np.int64) - to_datetime(
d["year"], format="%Y"
).astype(np.int64)
).view(np.int64)
d["days"] = days_in_ns // NS_PER_DAY

elif infer_dtype(dates, skipna=False) == "datetime":
Expand Down
9 changes: 7 additions & 2 deletions pandas/tests/arrays/test_datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,13 +184,18 @@ def test_astype_copies(self, dtype, other):
@pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"])
def test_astype_int(self, dtype):
arr = DatetimeArray._from_sequence([pd.Timestamp("2000"), pd.Timestamp("2001")])
result = arr.astype(dtype)
with tm.assert_produces_warning(FutureWarning):
# astype(int..) deprecated
result = arr.astype(dtype)

if np.dtype(dtype).kind == "u":
expected_dtype = np.dtype("uint64")
else:
expected_dtype = np.dtype("int64")
expected = arr.astype(expected_dtype)

with tm.assert_produces_warning(FutureWarning):
# astype(int..) deprecated
expected = arr.astype(expected_dtype)

assert result.dtype == expected_dtype
tm.assert_numpy_array_equal(result, expected)
Expand Down
18 changes: 14 additions & 4 deletions pandas/tests/arrays/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,26 +123,36 @@ def test_astype(dtype):
# We choose to ignore the sign and size of integers for
# Period/Datetime/Timedelta astype
arr = period_array(["2000", "2001", None], freq="D")
result = arr.astype(dtype)
with tm.assert_produces_warning(FutureWarning):
# astype(int..) deprecated
result = arr.astype(dtype)

if np.dtype(dtype).kind == "u":
expected_dtype = np.dtype("uint64")
else:
expected_dtype = np.dtype("int64")
expected = arr.astype(expected_dtype)

with tm.assert_produces_warning(FutureWarning):
# astype(int..) deprecated
expected = arr.astype(expected_dtype)

assert result.dtype == expected_dtype
tm.assert_numpy_array_equal(result, expected)


def test_astype_copies():
arr = period_array(["2000", "2001", None], freq="D")
result = arr.astype(np.int64, copy=False)
with tm.assert_produces_warning(FutureWarning):
# astype(int..) deprecated
result = arr.astype(np.int64, copy=False)

# Add the `.base`, since we now use `.asi8` which returns a view.
# We could maybe override it in PeriodArray to return ._data directly.
assert result.base is arr._data

result = arr.astype(np.int64, copy=True)
with tm.assert_produces_warning(FutureWarning):
# astype(int..) deprecated
result = arr.astype(np.int64, copy=True)
assert result is not arr._data
tm.assert_numpy_array_equal(result, arr._data.view("i8"))

Expand Down
9 changes: 7 additions & 2 deletions pandas/tests/arrays/test_timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,13 +82,18 @@ def test_from_sequence_dtype(self):
@pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"])
def test_astype_int(self, dtype):
arr = TimedeltaArray._from_sequence([Timedelta("1H"), Timedelta("2H")])
result = arr.astype(dtype)
with tm.assert_produces_warning(FutureWarning):
# astype(int..) deprecated
result = arr.astype(dtype)

if np.dtype(dtype).kind == "u":
expected_dtype = np.dtype("uint64")
else:
expected_dtype = np.dtype("int64")
expected = arr.astype(expected_dtype)

with tm.assert_produces_warning(FutureWarning):
# astype(int..) deprecated
expected = arr.astype(expected_dtype)

assert result.dtype == expected_dtype
tm.assert_numpy_array_equal(result, expected)
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/dtypes/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -720,7 +720,9 @@ def test_astype_nansafe(val, typ):

msg = "Cannot convert NaT values to integer"
with pytest.raises(ValueError, match=msg):
astype_nansafe(arr, dtype=typ)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
# datetimelike astype(int64) deprecated
astype_nansafe(arr, dtype=typ)


@pytest.mark.parametrize("from_type", [np.datetime64, np.timedelta64])
Expand Down
12 changes: 7 additions & 5 deletions pandas/tests/indexes/datetimes/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ def test_astype(self):
)
tm.assert_index_equal(result, expected)

result = idx.astype(int)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = idx.astype(int)
expected = Int64Index(
[1463356800000000000] + [-9223372036854775808] * 3,
dtype=np.int64,
Expand All @@ -38,7 +39,8 @@ def test_astype(self):
tm.assert_index_equal(result, expected)

rng = date_range("1/1/2000", periods=10, name="idx")
result = rng.astype("i8")
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = rng.astype("i8")
tm.assert_index_equal(result, Index(rng.asi8, name="idx"))
tm.assert_numpy_array_equal(result.values, rng.asi8)

Expand All @@ -48,9 +50,9 @@ def test_astype_uint(self):
np.array([946684800000000000, 946771200000000000], dtype="uint64"),
name="idx",
)

tm.assert_index_equal(arr.astype("uint64"), expected)
tm.assert_index_equal(arr.astype("uint32"), expected)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
tm.assert_index_equal(arr.astype("uint64"), expected)
tm.assert_index_equal(arr.astype("uint32"), expected)

def test_astype_with_tz(self):

Expand Down
11 changes: 7 additions & 4 deletions pandas/tests/indexes/interval/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,10 +197,13 @@ def index(self, request):
@pytest.mark.parametrize("subtype", ["int64", "uint64"])
def test_subtype_integer(self, index, subtype):
dtype = IntervalDtype(subtype)
result = index.astype(dtype)
expected = IntervalIndex.from_arrays(
index.left.astype(subtype), index.right.astype(subtype), closed=index.closed
)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = index.astype(dtype)
expected = IntervalIndex.from_arrays(
index.left.astype(subtype),
index.right.astype(subtype),
closed=index.closed,
)
tm.assert_index_equal(result, expected)

def test_subtype_float(self, index):
Expand Down
12 changes: 10 additions & 2 deletions pandas/tests/indexes/interval/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,13 +71,21 @@ def test_constructor(self, constructor, breaks, closed, name):
)
def test_constructor_dtype(self, constructor, breaks, subtype):
# GH 19262: conversion via dtype parameter
expected_kwargs = self.get_kwargs_from_breaks(breaks.astype(subtype))
warn = None
if subtype == "int64" and breaks.dtype.kind in ["M", "m"]:
# astype(int64) deprecated
warn = FutureWarning

with tm.assert_produces_warning(warn, check_stacklevel=False):
expected_kwargs = self.get_kwargs_from_breaks(breaks.astype(subtype))
expected = constructor(**expected_kwargs)

result_kwargs = self.get_kwargs_from_breaks(breaks)
iv_dtype = IntervalDtype(subtype)
for dtype in (iv_dtype, str(iv_dtype)):
result = constructor(dtype=dtype, **result_kwargs)
with tm.assert_produces_warning(warn, check_stacklevel=False):

result = constructor(dtype=dtype, **result_kwargs)
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize("breaks", [[np.nan] * 2, [np.nan] * 4, [np.nan] * 50])
Expand Down
11 changes: 7 additions & 4 deletions pandas/tests/indexes/period/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ def test_astype_conversion(self):
)
tm.assert_index_equal(result, expected)

result = idx.astype(np.int64)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = idx.astype(np.int64)
expected = Int64Index(
[16937] + [-9223372036854775808] * 3, dtype=np.int64, name="idx"
)
Expand All @@ -48,15 +49,17 @@ def test_astype_conversion(self):
tm.assert_index_equal(result, expected)

idx = period_range("1990", "2009", freq="A", name="idx")
result = idx.astype("i8")
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for a lot of these you can just change the test if you want to use view

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

changed one of these, the rest were specifically targeting astype and not worth mucking with

result = idx.astype("i8")
tm.assert_index_equal(result, Index(idx.asi8, name="idx"))
tm.assert_numpy_array_equal(result.values, idx.asi8)

def test_astype_uint(self):
arr = period_range("2000", periods=2, name="idx")
expected = UInt64Index(np.array([10957, 10958], dtype="uint64"), name="idx")
tm.assert_index_equal(arr.astype("uint64"), expected)
tm.assert_index_equal(arr.astype("uint32"), expected)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
tm.assert_index_equal(arr.astype("uint64"), expected)
tm.assert_index_equal(arr.astype("uint32"), expected)

def test_astype_object(self):
idx = PeriodIndex([], freq="M")
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexes/period/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ def test_constructor_simple_new(self):
msg = "Should be numpy array of type i8"
with pytest.raises(AssertionError, match=msg):
# Need ndarray, not Int64Index
type(idx._data)._simple_new(idx.astype("i8"), freq=idx.freq)
type(idx._data)._simple_new(idx._int64index, freq=idx.freq)

arr = type(idx._data)._simple_new(idx.asi8, freq=idx.freq)
result = idx._simple_new(arr, name="p")
Expand Down
7 changes: 6 additions & 1 deletion pandas/tests/indexes/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,9 +341,14 @@ def test_astype_preserves_name(self, index, dtype):
else:
index.name = "idx"

warn = None
if dtype in ["int64", "uint64"]:
if needs_i8_conversion(index.dtype):
warn = FutureWarning
try:
# Some of these conversions cannot succeed so we use a try / except
result = index.astype(dtype)
with tm.assert_produces_warning(warn, check_stacklevel=False):
result = index.astype(dtype)
except (ValueError, TypeError, NotImplementedError, SystemError):
return

Expand Down
12 changes: 7 additions & 5 deletions pandas/tests/indexes/timedeltas/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ def test_astype(self):
)
tm.assert_index_equal(result, expected)

result = idx.astype(int)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = idx.astype(int)
expected = Int64Index(
[100000000000000] + [-9223372036854775808] * 3, dtype=np.int64, name="idx"
)
Expand All @@ -66,7 +67,8 @@ def test_astype(self):
tm.assert_index_equal(result, expected)

rng = timedelta_range("1 days", periods=10)
result = rng.astype("i8")
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = rng.astype("i8")
tm.assert_index_equal(result, Index(rng.asi8))
tm.assert_numpy_array_equal(rng.asi8, result.values)

Expand All @@ -75,9 +77,9 @@ def test_astype_uint(self):
expected = pd.UInt64Index(
np.array([3600000000000, 90000000000000], dtype="uint64")
)

tm.assert_index_equal(arr.astype("uint64"), expected)
tm.assert_index_equal(arr.astype("uint32"), expected)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
tm.assert_index_equal(arr.astype("uint64"), expected)
tm.assert_index_equal(arr.astype("uint32"), expected)

def test_astype_timedelta64(self):
# GH 13149, GH 13209
Expand Down
6 changes: 5 additions & 1 deletion pandas/tests/internals/test_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,9 @@ def test_astype(self, t):
# coerce all
mgr = create_mgr("c: f4; d: f2; e: f8")

warn = FutureWarning if t == "int64" else None
# datetimelike.astype(int64) deprecated

t = np.dtype(t)
tmgr = mgr.astype(t)
assert tmgr.iget(0).dtype.type == t
Expand All @@ -464,7 +467,8 @@ def test_astype(self, t):
mgr = create_mgr("a,b: object; c: bool; d: datetime; e: f4; f: f2; g: f8")

t = np.dtype(t)
tmgr = mgr.astype(t, errors="ignore")
with tm.assert_produces_warning(warn):
tmgr = mgr.astype(t, errors="ignore")
assert tmgr.iget(2).dtype.type == t
assert tmgr.iget(4).dtype.type == t
assert tmgr.iget(5).dtype.type == t
Expand Down
Loading