Skip to content

Commit

Permalink
BUG: incorrectly accepting datetime64(nat) for dt64tz (pandas-dev#39769)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and znicholls committed Feb 17, 2021
1 parent 8a726f0 commit 4ea0473
Show file tree
Hide file tree
Showing 9 changed files with 66 additions and 9 deletions.
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,9 @@ Indexing
- Bug in setting ``timedelta64`` or ``datetime64`` values into numeric :class:`Series` failing to cast to object dtype (:issue:`39086`, issue:`39619`)
- Bug in setting :class:`Interval` values into a :class:`Series` or :class:`DataFrame` with mismatched :class:`IntervalDtype` incorrectly casting the new values to the existing dtype (:issue:`39120`)
- Bug in setting ``datetime64`` values into a :class:`Series` with integer-dtype incorrect casting the datetime64 values to integers (:issue:`39266`)
- Bug in setting ``np.datetime64("NaT")`` into a :class:`Series` with :class:`Datetime64TZDtype` incorrectly treating the timezone-naive value as timezone-aware (:issue:`39769`)
- Bug in :meth:`Index.get_loc` not raising ``KeyError`` when method is specified for ``NaN`` value when ``NaN`` is not in :class:`Index` (:issue:`39382`)
- Bug in :meth:`DatetimeIndex.insert` when inserting ``np.datetime64("NaT")`` into a timezone-aware index incorrectly treating the timezone-naive value as timezone-aware (:issue:`39769`)
- Bug in incorrectly raising in :meth:`Index.insert`, when setting a new column that cannot be held in the existing ``frame.columns``, or in :meth:`Series.reset_index` or :meth:`DataFrame.reset_index` instead of casting to a compatible dtype (:issue:`39068`)
- Bug in :meth:`RangeIndex.append` where a single object of length 1 was concatenated incorrectly (:issue:`39401`)
- Bug in setting ``numpy.timedelta64`` values into an object-dtype :class:`Series` using a boolean indexer (:issue:`39488`)
Expand Down
6 changes: 6 additions & 0 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -559,6 +559,12 @@ def _validate_scalar(
# GH#18295
value = NaT

elif isna(value):
# if we are dt64tz and value is dt64("NaT"), dont cast to NaT,
# or else we'll fail to raise in _unbox_scalar
msg = self._validation_error_message(value, allow_listlike)
raise TypeError(msg)

elif isinstance(value, self._recognized_scalars):
# error: Too many arguments for "object"
value = self._scalar_type(value) # type: ignore[call-arg]
Expand Down
6 changes: 2 additions & 4 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,10 +464,8 @@ def _generate_range(
def _unbox_scalar(self, value, setitem: bool = False) -> np.datetime64:
if not isinstance(value, self._scalar_type) and value is not NaT:
raise ValueError("'value' should be a Timestamp.")
if not isna(value):
self._check_compatible_with(value, setitem=setitem)
return value.asm8
return np.datetime64(value.value, "ns")
self._check_compatible_with(value, setitem=setitem)
return value.asm8

def _scalar_from_string(self, value):
return Timestamp(value, tz=self.tz)
Expand Down
9 changes: 7 additions & 2 deletions pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from pandas._config import get_option

from pandas._libs import NaT
from pandas._libs.interval import (
VALID_CLOSED,
Interval,
Expand All @@ -23,7 +24,8 @@
from pandas.core.dtypes.cast import maybe_convert_platform
from pandas.core.dtypes.common import (
is_categorical_dtype,
is_datetime64_any_dtype,
is_datetime64_dtype,
is_datetime64tz_dtype,
is_dtype_equal,
is_float_dtype,
is_integer_dtype,
Expand Down Expand Up @@ -999,9 +1001,12 @@ def _validate_setitem_value(self, value):
if is_integer_dtype(self.dtype.subtype):
# can't set NaN on a numpy integer array
needs_float_conversion = True
elif is_datetime64_any_dtype(self.dtype.subtype):
elif is_datetime64_dtype(self.dtype.subtype):
# need proper NaT to set directly on the numpy array
value = np.datetime64("NaT")
elif is_datetime64tz_dtype(self.dtype.subtype):
# need proper NaT to set directly on the DatetimeArray array
value = NaT
elif is_timedelta64_dtype(self.dtype.subtype):
# need proper NaT to set directly on the numpy array
value = np.timedelta64("NaT")
Expand Down
6 changes: 5 additions & 1 deletion pandas/core/dtypes/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -604,7 +604,11 @@ def is_valid_na_for_dtype(obj, dtype: DtypeObj) -> bool:
if not lib.is_scalar(obj) or not isna(obj):
return False
if dtype.kind == "M":
return not isinstance(obj, np.timedelta64)
if isinstance(dtype, np.dtype):
# i.e. not tzaware
return not isinstance(obj, np.timedelta64)
# we have to rule out tznaive dt64("NaT")
return not isinstance(obj, (np.timedelta64, np.datetime64))
if dtype.kind == "m":
return not isinstance(obj, np.datetime64)
if dtype.kind in ["i", "u", "f", "c"]:
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from pandas._libs import (
Interval,
NaT,
Period,
Timestamp,
algos as libalgos,
Expand Down Expand Up @@ -2097,7 +2098,7 @@ class DatetimeTZBlock(ExtensionBlock, DatetimeBlock):
_can_hold_element = DatetimeBlock._can_hold_element
to_native_types = DatetimeBlock.to_native_types
diff = DatetimeBlock.diff
fill_value = np.datetime64("NaT", "ns")
fill_value = NaT
where = DatetimeBlock.where
putmask = DatetimeLikeBlockMixin.putmask

Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/indexes/datetimes/test_insert.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,12 @@ class TestInsert:
@pytest.mark.parametrize("tz", [None, "UTC", "US/Eastern"])
def test_insert_nat(self, tz, null):
# GH#16537, GH#18295 (test missing)

idx = DatetimeIndex(["2017-01-01"], tz=tz)
expected = DatetimeIndex(["NaT", "2017-01-01"], tz=tz)
if tz is not None and isinstance(null, np.datetime64):
expected = Index([null, idx[0]], dtype=object)

res = idx.insert(0, null)
tm.assert_index_equal(res, expected)

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/series/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -559,7 +559,7 @@ def test_dt64_series_assign_nat(nat_val, tz, indexer_sli):
base = Series(dti)
expected = Series([pd.NaT] + list(dti[1:]), dtype=dti.dtype)

should_cast = nat_val is pd.NaT or base.dtype.kind == nat_val.dtype.kind
should_cast = nat_val is pd.NaT or base.dtype == nat_val.dtype
if not should_cast:
expected = expected.astype(object)

Expand Down
37 changes: 37 additions & 0 deletions pandas/tests/series/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -671,6 +671,43 @@ def key(self):
return 0


class TestSetitemNADatetime64Dtype(SetitemCastingEquivalents):
# some nat-like values should be cast to datetime64 when inserting
# into a datetime64 series. Others should coerce to object
# and retain their dtypes.

@pytest.fixture(params=[None, "UTC", "US/Central"])
def obj(self, request):
tz = request.param
dti = date_range("2016-01-01", periods=3, tz=tz)
return Series(dti)

@pytest.fixture(
params=[NaT, np.timedelta64("NaT", "ns"), np.datetime64("NaT", "ns")]
)
def val(self, request):
return request.param

@pytest.fixture
def is_inplace(self, val, obj):
if obj._values.tz is None:
# cast to object iff val is timedelta64("NaT")
return val is NaT or val.dtype.kind == "M"

# otherwise we have to exclude tznaive dt64("NaT")
return val is NaT

@pytest.fixture
def expected(self, obj, val, is_inplace):
dtype = obj.dtype if is_inplace else object
expected = Series([val] + list(obj[1:]), dtype=dtype)
return expected

@pytest.fixture
def key(self):
return 0


class TestSetitemMismatchedTZCastsToObject(SetitemCastingEquivalents):
# GH#24024
@pytest.fixture
Expand Down

0 comments on commit 4ea0473

Please sign in to comment.