From 6e04264250f27787075dc0cb1685e20e7a6af071 Mon Sep 17 00:00:00 2001 From: Alex Kirko Date: Wed, 26 Feb 2020 15:35:02 +0300 Subject: [PATCH] ENH: add fold support to Timestamp constructor (#31563) --- doc/source/user_guide/timeseries.rst | 29 ++++++ doc/source/whatsnew/v1.1.0.rst | 22 +++++ pandas/_libs/tslib.pyx | 32 ++++--- pandas/_libs/tslibs/conversion.pxd | 1 + pandas/_libs/tslibs/conversion.pyx | 69 +++++++++++++- pandas/_libs/tslibs/timestamps.pxd | 2 +- pandas/_libs/tslibs/timestamps.pyx | 54 +++++++++-- pandas/_libs/tslibs/tzconversion.pxd | 1 + pandas/_libs/tslibs/tzconversion.pyx | 85 +++++++++++++++-- .../indexes/datetimes/test_constructors.py | 94 ++++++++++++++++++- .../tests/scalar/timestamp/test_timezones.py | 2 +- 11 files changed, 354 insertions(+), 37 deletions(-) diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst index 3fdab0fd26643..f208c8d576131 100644 --- a/doc/source/user_guide/timeseries.rst +++ b/doc/source/user_guide/timeseries.rst @@ -2297,6 +2297,35 @@ To remove time zone information, use ``tz_localize(None)`` or ``tz_convert(None) # tz_convert(None) is identical to tz_convert('UTC').tz_localize(None) didx.tz_convert('UTC').tz_localize(None) +.. _timeseries.fold: + +Fold +~~~~ + +.. versionadded:: 1.1.0 + +For ambiguous times, pandas supports explicitly specifying the keyword-only fold argument. +Due to daylight saving time, one wall clock time can occur twice when shifting +from summer to winter time; fold describes whether the datetime-like corresponds +to the first (0) or the second time (1) the wall clock hits the ambiguous time. +Fold is supported only for constructing from naive ``datetime.datetime`` +(see `datetime documentation `__ for details) or from :class:`Timestamp` +or for constructing from components (see below). Only ``dateutil`` timezones are supported +(see `dateutil documentation `__ +for ``dateutil`` methods that deal with ambiguous datetimes) as ``pytz`` +timezones do not support fold (see `pytz documentation `__ +for details on how ``pytz`` deals with ambiguous datetimes). To localize an ambiguous datetime +with ``pytz``, please use :meth:`Timestamp.tz_localize`. In general, we recommend to rely +on :meth:`Timestamp.tz_localize` when localizing ambiguous datetimes if you need direct +control over how they are handled. + +.. ipython:: python + + pd.Timestamp(datetime.datetime(2019, 10, 27, 1, 30, 0, 0), + tz='dateutil/Europe/London', fold=0) + pd.Timestamp(year=2019, month=10, day=27, hour=1, minute=30, + tz='dateutil/Europe/London', fold=1) + .. _timeseries.timezone_ambiguous: Ambiguous times when localizing diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 888b7d23aeb35..2b64b85863def 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -36,6 +36,28 @@ For example: ser["2014"] ser.loc["May 2015"] +.. _whatsnew_110.timestamp_fold_support: + +Fold argument support in Timestamp constructor +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:class:`Timestamp:` now supports the keyword-only fold argument according to `PEP 495 `_ similar to parent ``datetime.datetime`` class. It supports both accepting fold as an initialization argument and inferring fold from other constructor arguments (:issue:`25057`, :issue:`31338`). Support is limited to ``dateutil`` timezones as ``pytz`` doesn't support fold. + +For example: + +.. ipython:: python + + ts = pd.Timestamp("2019-10-27 01:30:00+00:00") + ts.fold + +.. ipython:: python + + ts = pd.Timestamp(year=2019, month=10, day=27, hour=1, minute=30, + tz="dateutil/Europe/London", fold=1) + ts + +For more on working with fold, see :ref:`Fold subsection ` in the user guide. + .. _whatsnew_110.enhancements.other: Other enhancements diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 53e3354ca8eb6..a176c4e41e834 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -49,30 +49,31 @@ from pandas._libs.tslibs.tzconversion cimport ( cdef inline object create_datetime_from_ts( int64_t value, npy_datetimestruct dts, - object tz, object freq): + object tz, object freq, bint fold): """ convenience routine to construct a datetime.datetime from its parts """ return datetime(dts.year, dts.month, dts.day, dts.hour, - dts.min, dts.sec, dts.us, tz) + dts.min, dts.sec, dts.us, tz, fold=fold) cdef inline object create_date_from_ts( int64_t value, npy_datetimestruct dts, - object tz, object freq): + object tz, object freq, bint fold): """ convenience routine to construct a datetime.date from its parts """ + # GH 25057 add fold argument to match other func_create signatures return date(dts.year, dts.month, dts.day) cdef inline object create_time_from_ts( int64_t value, npy_datetimestruct dts, - object tz, object freq): + object tz, object freq, bint fold): """ convenience routine to construct a datetime.time from its parts """ - return time(dts.hour, dts.min, dts.sec, dts.us, tz) + return time(dts.hour, dts.min, dts.sec, dts.us, tz, fold=fold) @cython.wraparound(False) @cython.boundscheck(False) def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None, - str box="datetime"): + bint fold=0, str box="datetime"): """ Convert an i8 repr to an ndarray of datetimes, date, time or Timestamp @@ -83,6 +84,13 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None, convert to this timezone freq : str/Offset, default None freq to convert + fold : bint, default is 0 + Due to daylight saving time, one wall clock time can occur twice + when shifting from summer to winter time; fold describes whether the + datetime-like corresponds to the first (0) or the second time (1) + the wall clock hits the ambiguous time + + .. versionadded:: 1.1.0 box : {'datetime', 'timestamp', 'date', 'time'}, default 'datetime' If datetime, convert to datetime.datetime If date, convert to datetime.date @@ -104,7 +112,7 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None, str typ int64_t value, delta, local_value ndarray[object] result = np.empty(n, dtype=object) - object (*func_create)(int64_t, npy_datetimestruct, object, object) + object (*func_create)(int64_t, npy_datetimestruct, object, object, bint) if box == "date": assert (tz is None), "tz should be None when converting to date" @@ -129,7 +137,7 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None, result[i] = NaT else: dt64_to_dtstruct(value, &dts) - result[i] = func_create(value, dts, tz, freq) + result[i] = func_create(value, dts, tz, freq, fold) elif is_tzlocal(tz): for i in range(n): value = arr[i] @@ -141,7 +149,7 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None, # using the i8 representation. local_value = tz_convert_utc_to_tzlocal(value, tz) dt64_to_dtstruct(local_value, &dts) - result[i] = func_create(value, dts, tz, freq) + result[i] = func_create(value, dts, tz, freq, fold) else: trans, deltas, typ = get_dst_info(tz) @@ -155,7 +163,7 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None, else: # Adjust datetime64 timestamp, recompute datetimestruct dt64_to_dtstruct(value + delta, &dts) - result[i] = func_create(value, dts, tz, freq) + result[i] = func_create(value, dts, tz, freq, fold) elif typ == 'dateutil': # no zone-name change for dateutil tzs - dst etc @@ -168,7 +176,7 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None, # Adjust datetime64 timestamp, recompute datetimestruct pos = trans.searchsorted(value, side='right') - 1 dt64_to_dtstruct(value + deltas[pos], &dts) - result[i] = func_create(value, dts, tz, freq) + result[i] = func_create(value, dts, tz, freq, fold) else: # pytz for i in range(n): @@ -182,7 +190,7 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None, new_tz = tz._tzinfos[tz._transition_info[pos]] dt64_to_dtstruct(value + deltas[pos], &dts) - result[i] = func_create(value, dts, new_tz, freq) + result[i] = func_create(value, dts, new_tz, freq, fold) return result diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd index c74307a3d2887..bb20296e24587 100644 --- a/pandas/_libs/tslibs/conversion.pxd +++ b/pandas/_libs/tslibs/conversion.pxd @@ -12,6 +12,7 @@ cdef class _TSObject: npy_datetimestruct dts # npy_datetimestruct int64_t value # numpy dt64 object tzinfo + bint fold cdef convert_to_tsobject(object ts, object tz, object unit, diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 6e978d495c325..57483783faf9f 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -39,7 +39,8 @@ from pandas._libs.tslibs.nattype cimport ( from pandas._libs.tslibs.tzconversion import ( tz_localize_to_utc, tz_convert_single) -from pandas._libs.tslibs.tzconversion cimport _tz_convert_tzlocal_utc +from pandas._libs.tslibs.tzconversion cimport ( + _tz_convert_tzlocal_utc, _tz_convert_tzlocal_fromutc) # ---------------------------------------------------------------------- # Constants @@ -215,6 +216,11 @@ cdef class _TSObject: # npy_datetimestruct dts # npy_datetimestruct # int64_t value # numpy dt64 # object tzinfo + # bint fold + + def __cinit__(self): + # GH 25057. As per PEP 495, set fold to 0 by default + self.fold = 0 @property def value(self): @@ -322,6 +328,7 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, cdef: _TSObject obj = _TSObject() + obj.fold = ts.fold if tz is not None: tz = maybe_get_tz(tz) @@ -380,6 +387,8 @@ cdef _TSObject create_tsobject_tz_using_offset(npy_datetimestruct dts, _TSObject obj = _TSObject() int64_t value # numpy dt64 datetime dt + ndarray[int64_t] trans + int64_t[:] deltas value = dtstruct_to_dt64(&dts) obj.dts = dts @@ -389,10 +398,23 @@ cdef _TSObject create_tsobject_tz_using_offset(npy_datetimestruct dts, check_overflows(obj) return obj + # Infer fold from offset-adjusted obj.value + # see PEP 495 https://www.python.org/dev/peps/pep-0495/#the-fold-attribute + if is_utc(tz): + pass + elif is_tzlocal(tz): + _tz_convert_tzlocal_fromutc(obj.value, tz, &obj.fold) + else: + trans, deltas, typ = get_dst_info(tz) + + if typ == 'dateutil': + pos = trans.searchsorted(obj.value, side='right') - 1 + obj.fold = _infer_tsobject_fold(obj, trans, deltas, pos) + # Keep the converter same as PyDateTime's dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day, obj.dts.hour, obj.dts.min, obj.dts.sec, - obj.dts.us, obj.tzinfo) + obj.dts.us, obj.tzinfo, fold=obj.fold) obj = convert_datetime_to_tsobject( dt, tz, nanos=obj.dts.ps // 1000) return obj @@ -543,7 +565,7 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz): elif obj.value == NPY_NAT: pass elif is_tzlocal(tz): - local_val = _tz_convert_tzlocal_utc(obj.value, tz, to_utc=False) + local_val = _tz_convert_tzlocal_fromutc(obj.value, tz, &obj.fold) dt64_to_dtstruct(local_val, &obj.dts) else: # Adjust datetime64 timestamp, recompute datetimestruct @@ -562,6 +584,8 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz): # i.e. treat_tz_as_dateutil(tz) pos = trans.searchsorted(obj.value, side='right') - 1 dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts) + # dateutil supports fold, so we infer fold from value + obj.fold = _infer_tsobject_fold(obj, trans, deltas, pos) else: # Note: as of 2018-07-17 all tzinfo objects that are _not_ # either pytz or dateutil have is_fixed_offset(tz) == True, @@ -571,6 +595,45 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz): obj.tzinfo = tz +cdef inline bint _infer_tsobject_fold(_TSObject obj, ndarray[int64_t] trans, + int64_t[:] deltas, int32_t pos): + """ + Infer _TSObject fold property from value by assuming 0 and then setting + to 1 if necessary. + + Parameters + ---------- + obj : _TSObject + trans : ndarray[int64_t] + ndarray of offset transition points in nanoseconds since epoch. + deltas : int64_t[:] + array of offsets corresponding to transition points in trans. + pos : int32_t + Position of the last transition point before taking fold into account. + + Returns + ------- + bint + Due to daylight saving time, one wall clock time can occur twice + when shifting from summer to winter time; fold describes whether the + datetime-like corresponds to the first (0) or the second time (1) + the wall clock hits the ambiguous time + + References + ---------- + .. [1] "PEP 495 - Local Time Disambiguation" + https://www.python.org/dev/peps/pep-0495/#the-fold-attribute + """ + cdef: + bint fold = 0 + + if pos > 0: + fold_delta = deltas[pos - 1] - deltas[pos] + if obj.value - fold_delta < trans[pos]: + fold = 1 + + return fold + cdef inline datetime _localize_pydatetime(datetime dt, tzinfo tz): """ Take a datetime/Timestamp in UTC and localizes to timezone tz. diff --git a/pandas/_libs/tslibs/timestamps.pxd b/pandas/_libs/tslibs/timestamps.pxd index b7282e02ff117..5e55e6e8d5297 100644 --- a/pandas/_libs/tslibs/timestamps.pxd +++ b/pandas/_libs/tslibs/timestamps.pxd @@ -5,4 +5,4 @@ from pandas._libs.tslibs.np_datetime cimport npy_datetimestruct cdef object create_timestamp_from_ts(int64_t value, npy_datetimestruct dts, - object tz, object freq) + object tz, object freq, bint fold) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 9f3b4a8a554b5..5cd3467eed042 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -6,12 +6,12 @@ from numpy cimport int64_t cnp.import_array() from datetime import time as datetime_time, timedelta -from cpython.datetime cimport (datetime, +from cpython.datetime cimport (datetime, PyDateTime_Check, PyTZInfo_Check, PyDateTime_IMPORT) PyDateTime_IMPORT from pandas._libs.tslibs.util cimport ( - is_integer_object, is_offset_object) + is_datetime64_object, is_float_object, is_integer_object, is_offset_object) from pandas._libs.tslibs.c_timestamp cimport _Timestamp cimport pandas._libs.tslibs.ccalendar as ccalendar @@ -41,12 +41,12 @@ _no_input = object() cdef inline object create_timestamp_from_ts(int64_t value, npy_datetimestruct dts, - object tz, object freq): + object tz, object freq, bint fold): """ convenience routine to construct a Timestamp from its parts """ cdef _Timestamp ts_base ts_base = _Timestamp.__new__(Timestamp, dts.year, dts.month, dts.day, dts.hour, dts.min, - dts.sec, dts.us, tz) + dts.sec, dts.us, tz, fold=fold) ts_base.value = value ts_base.freq = freq ts_base.nanosecond = dts.ps // 1000 @@ -195,6 +195,13 @@ class Timestamp(_Timestamp): nanosecond : int, optional, default 0 .. versionadded:: 0.23.0 tzinfo : datetime.tzinfo, optional, default None + fold : {0, 1}, default None, keyword-only + Due to daylight saving time, one wall clock time can occur twice + when shifting from summer to winter time; fold describes whether the + datetime-like corresponds to the first (0) or the second time (1) + the wall clock hits the ambiguous time + + .. versionadded:: 1.1.0 Notes ----- @@ -350,7 +357,9 @@ class Timestamp(_Timestamp): second=None, microsecond=None, nanosecond=None, - tzinfo=None + tzinfo=None, + *, + fold=None ): # The parameter list folds together legacy parameter names (the first # four) and positional and keyword parameter names from pydatetime. @@ -390,6 +399,32 @@ class Timestamp(_Timestamp): # User passed tzinfo instead of tz; avoid silently ignoring tz, tzinfo = tzinfo, None + # Allow fold only for unambiguous input + if fold is not None: + if fold not in [0, 1]: + raise ValueError( + "Valid values for the fold argument are None, 0, or 1." + ) + + if (ts_input is not _no_input and not ( + PyDateTime_Check(ts_input) and + getattr(ts_input, 'tzinfo', None) is None)): + raise ValueError( + "Cannot pass fold with possibly unambiguous input: int, " + "float, numpy.datetime64, str, or timezone-aware " + "datetime-like. Pass naive datetime-like or build " + "Timestamp from components." + ) + + if tz is not None and treat_tz_as_pytz(tz): + raise ValueError( + "pytz timezones do not support fold. Please use dateutil " + "timezones." + ) + + if hasattr(ts_input, 'fold'): + ts_input = ts_input.replace(fold=fold) + # GH 30543 if pd.Timestamp already passed, return it # check that only ts_input is passed # checking verbosely, because cython doesn't optimize @@ -419,7 +454,8 @@ class Timestamp(_Timestamp): "hour": hour or 0, "minute": minute or 0, "second": second or 0, - "microsecond": microsecond or 0 + "microsecond": microsecond or 0, + "fold": fold or 0 } if year is not None: datetime_kwargs["year"] = year @@ -435,7 +471,7 @@ class Timestamp(_Timestamp): # Timestamp(year, month, day[, hour[, minute[, second[, # microsecond[, nanosecond[, tzinfo]]]]]]) ts_input = datetime(ts_input, freq, tz, unit or 0, - year or 0, month or 0, day or 0) + year or 0, month or 0, day or 0, fold=fold or 0) nanosecond = hour tz = minute freq = None @@ -455,7 +491,7 @@ class Timestamp(_Timestamp): elif not is_offset_object(freq): freq = to_offset(freq) - return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq) + return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq, ts.fold) def _round(self, freq, mode, ambiguous='raise', nonexistent='raise'): if self.tz is not None: @@ -999,7 +1035,7 @@ default 'raise' if value != NPY_NAT: check_dts_bounds(&dts) - return create_timestamp_from_ts(value, dts, _tzinfo, self.freq) + return create_timestamp_from_ts(value, dts, _tzinfo, self.freq, fold) def isoformat(self, sep='T'): base = super(_Timestamp, self).isoformat(sep=sep) diff --git a/pandas/_libs/tslibs/tzconversion.pxd b/pandas/_libs/tslibs/tzconversion.pxd index 9c86057b0a392..c1dd88e5b2313 100644 --- a/pandas/_libs/tslibs/tzconversion.pxd +++ b/pandas/_libs/tslibs/tzconversion.pxd @@ -4,4 +4,5 @@ from numpy cimport int64_t cdef int64_t tz_convert_utc_to_tzlocal(int64_t utc_val, tzinfo tz) cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=*) +cdef int64_t _tz_convert_tzlocal_fromutc(int64_t val, tzinfo tz, bint *fold) cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2) diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index b368f0fde3edc..a9702f91107ec 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -444,12 +444,12 @@ cdef int64_t[:] _tz_convert_one_way(int64_t[:] vals, object tz, bint to_utc): return converted -cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=True): +cdef inline int64_t _tzlocal_get_offset_components(int64_t val, tzinfo tz, + bint to_utc, + bint *fold=NULL): """ - Convert the i8 representation of a datetime from a tzlocal timezone to - UTC, or vice-versa. - - Private, not intended for use outside of tslibs.conversion + Calculate offset in nanoseconds needed to convert the i8 representation of + a datetime from a tzlocal timezone to UTC, or vice-versa. Parameters ---------- @@ -457,15 +457,22 @@ cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=True): tz : tzinfo to_utc : bint True if converting tzlocal _to_ UTC, False if going the other direction + fold : bint*, default NULL + pointer to fold: whether datetime ends up in a fold or not + after adjustment Returns ------- - result : int64_t + delta : int64_t + + Notes + ----- + Sets fold by pointer """ cdef: npy_datetimestruct dts - int64_t delta datetime dt + int64_t delta dt64_to_dtstruct(val, &dts) dt = datetime(dts.year, dts.month, dts.day, dts.hour, @@ -475,11 +482,69 @@ cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=True): if not to_utc: dt = dt.replace(tzinfo=tzutc()) dt = dt.astimezone(tz) - delta = int(get_utcoffset(tz, dt).total_seconds()) * 1000000000 - if not to_utc: + if fold is not NULL: + fold[0] = dt.fold + + return int(get_utcoffset(tz, dt).total_seconds()) * 1000000000 + + +cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=True): + """ + Convert the i8 representation of a datetime from a tzlocal timezone to + UTC, or vice-versa. + + Private, not intended for use outside of tslibs.conversion + + Parameters + ---------- + val : int64_t + tz : tzinfo + to_utc : bint + True if converting tzlocal _to_ UTC, False if going the other direction + + Returns + ------- + result : int64_t + """ + cdef int64_t delta + + delta = _tzlocal_get_offset_components(val, tz, to_utc, NULL) + + if to_utc: + return val - delta + else: return val + delta - return val - delta + + +cdef int64_t _tz_convert_tzlocal_fromutc(int64_t val, tzinfo tz, bint *fold): + """ + Convert the i8 representation of a datetime from UTC to local timezone, + set fold by pointer + + Private, not intended for use outside of tslibs.conversion + + Parameters + ---------- + val : int64_t + tz : tzinfo + fold : bint* + pointer to fold: whether datetime ends up in a fold or not + after adjustment + + Returns + ------- + result : int64_t + + Notes + ----- + Sets fold by pointer + """ + cdef int64_t delta + + delta = _tzlocal_get_offset_components(val, tz, False, fold) + + return val + delta @cython.boundscheck(False) diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index 1d1d371fcec1e..b293c008d6683 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -1,4 +1,4 @@ -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone from functools import partial from operator import attrgetter @@ -959,3 +959,95 @@ def test_pass_datetimeindex_to_index(self): expected = Index(rng.to_pydatetime(), dtype=object) tm.assert_numpy_array_equal(idx.values, expected.values) + + +def test_timestamp_constructor_invalid_fold_raise(): + # Test for #25057 + # Valid fold values are only [None, 0, 1] + msg = "Valid values for the fold argument are None, 0, or 1." + with pytest.raises(ValueError, match=msg): + Timestamp(123, fold=2) + + +def test_timestamp_constructor_pytz_fold_raise(): + # Test for #25057 + # pytz doesn't support fold. Check that we raise + # if fold is passed with pytz + msg = "pytz timezones do not support fold. Please use dateutil timezones." + tz = pytz.timezone("Europe/London") + with pytest.raises(ValueError, match=msg): + Timestamp(datetime(2019, 10, 27, 0, 30, 0, 0), tz=tz, fold=0) + + +@pytest.mark.parametrize("fold", [0, 1]) +@pytest.mark.parametrize( + "ts_input", + [ + 1572136200000000000, + 1572136200000000000.0, + np.datetime64(1572136200000000000, "ns"), + "2019-10-27 01:30:00+01:00", + datetime(2019, 10, 27, 0, 30, 0, 0, tzinfo=timezone.utc), + ], +) +def test_timestamp_constructor_fold_conflict(ts_input, fold): + # Test for #25057 + # Check that we raise on fold conflict + msg = ( + "Cannot pass fold with possibly unambiguous input: int, float, " + "numpy.datetime64, str, or timezone-aware datetime-like. " + "Pass naive datetime-like or build Timestamp from components." + ) + with pytest.raises(ValueError, match=msg): + Timestamp(ts_input=ts_input, fold=fold) + + +@pytest.mark.parametrize("tz", ["dateutil/Europe/London", None]) +@pytest.mark.parametrize("fold", [0, 1]) +def test_timestamp_constructor_retain_fold(tz, fold): + # Test for #25057 + # Check that we retain fold + ts = pd.Timestamp(year=2019, month=10, day=27, hour=1, minute=30, tz=tz, fold=fold) + result = ts.fold + expected = fold + assert result == expected + + +@pytest.mark.parametrize("tz", ["dateutil/Europe/London"]) +@pytest.mark.parametrize( + "ts_input,fold_out", + [ + (1572136200000000000, 0), + (1572139800000000000, 1), + ("2019-10-27 01:30:00+01:00", 0), + ("2019-10-27 01:30:00+00:00", 1), + (datetime(2019, 10, 27, 1, 30, 0, 0, fold=0), 0), + (datetime(2019, 10, 27, 1, 30, 0, 0, fold=1), 1), + ], +) +def test_timestamp_constructor_infer_fold_from_value(tz, ts_input, fold_out): + # Test for #25057 + # Check that we infer fold correctly based on timestamps since utc + # or strings + ts = pd.Timestamp(ts_input, tz=tz) + result = ts.fold + expected = fold_out + assert result == expected + + +@pytest.mark.parametrize("tz", ["dateutil/Europe/London"]) +@pytest.mark.parametrize( + "ts_input,fold,value_out", + [ + (datetime(2019, 10, 27, 1, 30, 0, 0), 0, 1572136200000000000), + (datetime(2019, 10, 27, 1, 30, 0, 0), 1, 1572139800000000000), + ], +) +def test_timestamp_constructor_adjust_value_for_fold(tz, ts_input, fold, value_out): + # Test for #25057 + # Check that we adjust value for fold correctly + # based on timestamps since utc + ts = pd.Timestamp(ts_input, tz=tz, fold=fold) + result = ts.value + expected = value_out + assert result == expected diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index 6537f6ccd8432..cfa7da810ada1 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -140,7 +140,7 @@ def test_tz_localize_ambiguous_compat(self): # see gh-14621 assert result_pytz.to_pydatetime().tzname() == "GMT" assert result_dateutil.to_pydatetime().tzname() == "BST" - assert str(result_pytz) != str(result_dateutil) + assert str(result_pytz) == str(result_dateutil) # 1 hour difference result_pytz = naive.tz_localize(pytz_zone, ambiguous=1)