Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CLN: use .view(i8) instead of .astype(i8) for datetimelike values #38535

Merged
merged 2 commits into from
Dec 19, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion pandas/core/window/ewm.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from pandas.util._decorators import Appender, Substitution, doc

from pandas.core.dtypes.common import is_datetime64_ns_dtype
from pandas.core.dtypes.missing import isna

import pandas.core.common as common
from pandas.core.util.numba_ import maybe_use_numba
Expand Down Expand Up @@ -252,7 +253,9 @@ def __init__(
raise ValueError(
"halflife must be a string or datetime.timedelta object"
)
self.times = np.asarray(times.astype(np.int64))
if isna(times).any():
raise ValueError("Cannot convert NaT values to integer")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add a test for this case?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

double-checked and you're right we dont have any. suggestions for what this might look like?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Something simple like

with pytest.raises(ValueError, match=...):
     Series(range(1)).ewm(com=0.1, times=to_datetime(['NaT'])))

works.

self.times = np.asarray(times.view(np.int64))
self.halflife = Timedelta(halflife).value
# Halflife is no longer applicable when calculating COM
# But allow COM to still be calculated if the user passes other decay args
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1739,7 +1739,7 @@ def get_format_timedelta64(

If box, then show the return in quotes
"""
values_int = values.astype(np.int64)
values_int = values.view(np.int64)

consider_values = values_int != iNaT

Expand Down
6 changes: 3 additions & 3 deletions pandas/io/stata.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,15 +371,15 @@ def parse_dates_safe(dates, delta=False, year=False, days=False):
if is_datetime64_dtype(dates.dtype):
if delta:
time_delta = dates - stata_epoch
d["delta"] = time_delta._values.astype(np.int64) // 1000 # microseconds
d["delta"] = time_delta._values.view(np.int64) // 1000 # microseconds
if days or year:
date_index = DatetimeIndex(dates)
d["year"] = date_index._data.year
d["month"] = date_index._data.month
if days:
days_in_ns = dates.astype(np.int64) - to_datetime(
days_in_ns = dates.view(np.int64) - to_datetime(
d["year"], format="%Y"
).astype(np.int64)
).view(np.int64)
d["days"] = days_in_ns // NS_PER_DAY

elif infer_dtype(dates, skipna=False) == "datetime":
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexes/period/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ def test_constructor_simple_new(self):
msg = "Should be numpy array of type i8"
with pytest.raises(AssertionError, match=msg):
# Need ndarray, not Int64Index
type(idx._data)._simple_new(idx.astype("i8"), freq=idx.freq)
type(idx._data)._simple_new(idx._int64index, freq=idx.freq)

arr = type(idx._data)._simple_new(idx.asi8, freq=idx.freq)
result = idx._simple_new(arr, name="p")
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def test_frame_non_unique_columns(self, orient, data):
# in milliseconds; these are internally stored in nanosecond,
# so divide to get where we need
# TODO: a to_epoch method would also solve; see GH 14772
expected.iloc[:, 0] = expected.iloc[:, 0].astype(np.int64) // 1000000
expected.iloc[:, 0] = expected.iloc[:, 0].view(np.int64) // 1000000
elif orient == "split":
expected = df

Expand Down Expand Up @@ -254,7 +254,7 @@ def test_roundtrip_timestamp(self, orient, convert_axes, numpy, datetime_frame):

if not convert_axes: # one off for ts handling
# DTI gets converted to epoch values
idx = expected.index.astype(np.int64) // 1000000
idx = expected.index.view(np.int64) // 1000000
if orient != "split": # TODO: handle consistently across orients
idx = idx.astype(str)

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -841,7 +841,7 @@ def test_timedelta(self):
with tm.assert_produces_warning(UserWarning):
df.to_sql("test_timedelta", self.conn)
result = sql.read_sql_query("SELECT * FROM test_timedelta", self.conn)
tm.assert_series_equal(result["foo"], df["foo"].astype("int64"))
tm.assert_series_equal(result["foo"], df["foo"].view("int64"))

def test_complex_raises(self):
df = DataFrame({"a": [1 + 1j, 2j]})
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/window/test_ewm.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,11 @@ def test_ewma_with_times_variable_spacing(tz_aware_fixture):
result = df.ewm(halflife=halflife, times=times).mean()
expected = DataFrame([0.0, 0.5674161888241773, 1.545239952073459])
tm.assert_frame_equal(result, expected)


def test_ewm_with_nat_raises(halflife_with_times):
# GH#38535
ser = Series(range(1))
times = DatetimeIndex(["NaT"])
with pytest.raises(ValueError, match="Cannot convert NaT values to integer"):
ser.ewm(com=0.1, halflife=halflife_with_times, times=times)