Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: catch out-of-bounds datetime64 in Series/DataFrame constructor #26848

Merged
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -600,6 +600,7 @@ Datetimelike
- Bug in :meth:`isin` for datetimelike indexes; :class:`DatetimeIndex`, :class:`TimedeltaIndex` and :class:`PeriodIndex` where the ``levels`` parameter was ignored. (:issue:`26675`)
- Bug in :func:`to_datetime` which raises ``TypeError`` for ``format='%Y%m%d'`` when called for invalid integer dates with length >= 6 digits with ``errors='ignore'``
- Bug when comparing a :class:`PeriodIndex` against a zero-dimensional numpy array (:issue:`26689`)
- Bug in constructing a ``Series`` or ``DataFrame`` from a numpy ``datetime64`` array with a non-ns unit and out-of-bound timestamps generating rubbish data, which will now correctly raise an ``OutOfBoundsDatetime`` error (:issue:`26206`).
- Bug in :func:`date_range` with unnecessary ``OverflowError`` being raised for very large or very small dates (:issue:`26651`)

Timedelta
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -1038,6 +1038,8 @@ def maybe_cast_to_datetime(value, dtype, errors='raise'):
.tz_convert(dtype.tz))
elif is_timedelta64:
value = to_timedelta(value, errors=errors)._values
except OutOfBoundsDatetime:
jreback marked this conversation as resolved.
Show resolved Hide resolved
raise
except (AttributeError, ValueError, TypeError):
pass

Expand All @@ -1063,7 +1065,7 @@ def maybe_cast_to_datetime(value, dtype, errors='raise'):
dtype = value.dtype

if dtype.kind == 'M' and dtype != _NS_DTYPE:
value = value.astype(_NS_DTYPE)
value = tslibs.conversion.ensure_datetime64ns(value)

elif dtype.kind == 'm' and dtype != _TD_DTYPE:
value = to_timedelta(value)
Expand Down
5 changes: 4 additions & 1 deletion pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import numpy.ma as ma

from pandas._libs import lib
from pandas._libs.tslibs import IncompatibleFrequency
from pandas._libs.tslibs import IncompatibleFrequency, OutOfBoundsDatetime
from pandas.compat import raise_with_traceback

from pandas.core.dtypes.cast import (
Expand Down Expand Up @@ -700,6 +700,9 @@ def _try_cast(arr, take_fast_path, dtype, copy, raise_cast_failure):
elif not is_extension_type(subarr):
subarr = construct_1d_ndarray_preserving_na(subarr, dtype,
copy=copy)
except OutOfBoundsDatetime:
jreback marked this conversation as resolved.
Show resolved Hide resolved
# in case of out of bound datetime64 -> always raise
raise
except (ValueError, TypeError):
if is_categorical_dtype(dtype):
# We *do* allow casting to categorical, since we know
Expand Down
38 changes: 38 additions & 0 deletions pandas/tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1341,3 +1341,41 @@ def test_to_numpy_dtype(as_series):
expected = np.array(['2000-01-01T05', '2001-01-01T05'],
dtype='M8[ns]')
tm.assert_numpy_array_equal(result, expected)


class TestConstruction:
# test certain constructor behaviours on dtype inference across Series,
# Index and DataFrame

@pytest.mark.parametrize("klass", [
Series,
lambda x, **kwargs: DataFrame({'a': x}, **kwargs)['a'],
pytest.param(lambda x, **kwargs: DataFrame(x, **kwargs)[0],
marks=pytest.mark.xfail),
Index,
])
@pytest.mark.parametrize("a", [
np.array(['2263-01-01'], dtype='datetime64[D]'),
np.array([datetime(2263, 1, 1)], dtype=object),
np.array([np.datetime64('2263-01-01', 'D')], dtype=object),
np.array(["2263-01-01"], dtype=object)
], ids=['datetime64[D]', 'object-datetime.datetime',
'object-numpy-scalar', 'object-string'])
def test_constructor_datetime_outofbound(self, a, klass):
# GH-26853 (+ bug GH-26206 out of bound non-ns unit)

# No dtype specified (dtype inference)
# datetime64[non-ns] raise error, other cases result in object dtype
# and preserve original data
if a.dtype.kind == 'M':
with pytest.raises(pd.errors.OutOfBoundsDatetime):
klass(a)
else:
result = klass(a)
assert result.dtype == 'object'
tm.assert_numpy_array_equal(result.to_numpy(), a)

# Explicit dtype specified
# Forced conversion fails for all -> all cases raise error
with pytest.raises(pd.errors.OutOfBoundsDatetime):
klass(a, dtype='datetime64[ns]')