Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix parsing corner case closes #19382 #19529

Merged
merged 3 commits into from
Feb 6, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,7 @@ Datetimelike
- Bug in :func:`Series.truncate` which raises ``TypeError`` with a monotonic ``PeriodIndex`` (:issue:`17717`)
- Bug in :func:`~DataFrame.pct_change` using ``periods`` and ``freq`` returned different length outputs (:issue:`7292`)
- Bug in comparison of :class:`DatetimeIndex` against ``None`` or ``datetime.date`` objects raising ``TypeError`` for ``==`` and ``!=`` comparisons instead of all-``False`` and all-``True``, respectively (:issue:`19301`)
- Bug in :class:`Timestamp` and :func:`to_datetime` where a string representing a barely out-of-bounds timestamp would be incorrectly rounded down instead of raising ``OutOfBoundsDatetime`` (:issue:`19382`)
-

Timezones
Expand Down
30 changes: 24 additions & 6 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -609,20 +609,38 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
value = tz_convert_single(value, tz, 'UTC')
iresult[i] = value
check_dts_bounds(&dts)
except OutOfBoundsDatetime:
# GH#19382 for just-barely-OutOfBounds falling back to
# dateutil parser will return incorrect result because
# it will ignore nanoseconds
if require_iso8601:
if _parse_today_now(val, &iresult[i]):
continue
elif is_coerce:
iresult[i] = NPY_NAT
continue
elif is_raise:
raise ValueError("time data {val} doesn't match "
"format specified"
.format(val=val))
return values
elif is_coerce:
iresult[i] = NPY_NAT
continue
raise
except ValueError:
# if requiring iso8601 strings, skip trying other formats
if require_iso8601:
if _parse_today_now(val, &iresult[i]):
continue
if is_coerce:
elif is_coerce:
iresult[i] = NPY_NAT
continue
elif is_raise:
raise ValueError(
"time data %r doesn't match format "
"specified" % (val,))
else:
return values
raise ValueError("time data {val} doesn't match "
"format specified"
.format(val=val))
return values

try:
py_dt = parse_datetime_string(val, dayfirst=dayfirst,
Expand Down
8 changes: 8 additions & 0 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ from np_datetime cimport (check_dts_bounds,
dt64_to_dtstruct, dtstruct_to_dt64,
get_datetime64_unit, get_datetime64_value,
pydatetime_to_dt64)
from np_datetime import OutOfBoundsDatetime

from util cimport (is_string_object,
is_datetime64_object,
Expand Down Expand Up @@ -472,6 +473,13 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit,
ts = tz_localize_to_utc(np.array([ts], dtype='i8'), tz,
ambiguous='raise',
errors='raise')[0]

except OutOfBoundsDatetime:
# GH#19382 for just-barely-OutOfBounds falling back to dateutil
# parser will return incorrect result because it will ignore
# nanoseconds
raise

except ValueError:
try:
ts = parse_datetime_string(ts, dayfirst=dayfirst,
Expand Down
16 changes: 15 additions & 1 deletion pandas/tests/indexes/datetimes/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from pandas._libs.tslibs import parsing
from pandas.core.tools import datetimes as tools

from pandas.errors import OutOfBoundsDatetime
from pandas.compat import lmap
from pandas.compat.numpy import np_array_datetime64_compat
from pandas.core.dtypes.common import is_datetime64_ns_dtype
Expand Down Expand Up @@ -783,7 +784,6 @@ def test_dataframe_dtypes(self, cache):


class TestToDatetimeMisc(object):

@pytest.mark.parametrize('cache', [True, False])
def test_to_datetime_iso8601(self, cache):
result = to_datetime(["2012-01-01 00:00:00"], cache=cache)
Expand Down Expand Up @@ -1596,6 +1596,20 @@ def test_coerce_of_invalid_datetimes(self):
)
)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

import from pandas.errors

def test_to_datetime_barely_out_of_bounds(self):
# GH#19529
# GH#19382 close enough to bounds that dropping nanos would result
# in an in-bounds datetime
arr = np.array(['2262-04-11 23:47:16.854775808'], dtype=object)

with pytest.raises(OutOfBoundsDatetime):
to_datetime(arr)

with pytest.raises(OutOfBoundsDatetime):
# Essentially the same as above, but more directly calling
# the relevant function
tslib.array_to_datetime(arr)


def test_normalize_date():
value = date(2012, 9, 7)
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/scalar/test_timestamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from pandas._libs.tslibs import conversion
from pandas._libs.tslibs.timezones import get_timezone, dateutil_gettz as gettz

from pandas.errors import OutOfBoundsDatetime
from pandas.compat import long, PY3
from pandas.compat.numpy import np_datetime64_compat
from pandas import Timestamp, Period, Timedelta
Expand Down Expand Up @@ -410,6 +411,13 @@ def test_out_of_bounds_string(self):
with pytest.raises(ValueError):
Timestamp('2263-01-01')

def test_barely_out_of_bounds(self):
# GH#19529
# GH#19382 close enough to bounds that dropping nanos would result
# in an in-bounds datetime
with pytest.raises(OutOfBoundsDatetime):
Timestamp('2262-04-11 23:47:16.854775808')

def test_bounds_with_different_units(self):
out_of_bounds_dates = ('1677-09-21', '2262-04-12')

Expand Down