From a3d538ab72380471f5de7b8e4a3f811aa4de84af Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 2 Oct 2017 05:43:39 -0700 Subject: [PATCH] Separate out _convert_datetime_to_tsobject (#17715) --- pandas/_libs/tslib.pyx | 145 +++++++++++++++++++++++++++-------------- 1 file changed, 95 insertions(+), 50 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 096ebe9a5627b..ff20ea287bd9d 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -708,7 +708,7 @@ class Timestamp(_Timestamp): # reconstruct & check bounds ts_input = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tzinfo=_tzinfo) - ts = convert_to_tsobject(ts_input, _tzinfo, None, 0, 0) + ts = convert_datetime_to_tsobject(ts_input, _tzinfo) value = ts.value + (dts.ps // 1000) if value != NPY_NAT: _check_dts_bounds(&dts) @@ -1455,52 +1455,11 @@ cdef convert_to_tsobject(object ts, object tz, object unit, obj.value = ts pandas_datetime_to_datetimestruct(ts, PANDAS_FR_ns, &obj.dts) elif PyDateTime_Check(ts): - if tz is not None: - # sort of a temporary hack - if ts.tzinfo is not None: - if (hasattr(tz, 'normalize') and - hasattr(ts.tzinfo, '_utcoffset')): - ts = tz.normalize(ts) - obj.value = _pydatetime_to_dts(ts, &obj.dts) - obj.tzinfo = ts.tzinfo - else: #tzoffset - try: - tz = ts.astimezone(tz).tzinfo - except: - pass - obj.value = _pydatetime_to_dts(ts, &obj.dts) - ts_offset = get_utcoffset(ts.tzinfo, ts) - obj.value -= _delta_to_nanoseconds(ts_offset) - tz_offset = get_utcoffset(tz, ts) - obj.value += _delta_to_nanoseconds(tz_offset) - pandas_datetime_to_datetimestruct(obj.value, - PANDAS_FR_ns, &obj.dts) - obj.tzinfo = tz - elif not is_utc(tz): - ts = _localize_pydatetime(ts, tz) - obj.value = _pydatetime_to_dts(ts, &obj.dts) - obj.tzinfo = ts.tzinfo - else: - # UTC - obj.value = _pydatetime_to_dts(ts, &obj.dts) - obj.tzinfo = pytz.utc - else: - obj.value = _pydatetime_to_dts(ts, &obj.dts) - obj.tzinfo = ts.tzinfo - - if obj.tzinfo is not None and not is_utc(obj.tzinfo): - offset = get_utcoffset(obj.tzinfo, ts) - obj.value -= _delta_to_nanoseconds(offset) - - if is_timestamp(ts): - obj.value += ts.nanosecond - obj.dts.ps = ts.nanosecond * 1000 - _check_dts_bounds(&obj.dts) - return obj + return convert_datetime_to_tsobject(ts, tz) elif PyDate_Check(ts): # Keep the converter same as PyDateTime's ts = datetime.combine(ts, datetime_time()) - return convert_to_tsobject(ts, tz, None, 0, 0) + return convert_datetime_to_tsobject(ts, tz) elif getattr(ts, '_typ', None) == 'period': raise ValueError( "Cannot convert Period to Timestamp " @@ -1518,6 +1477,83 @@ cdef convert_to_tsobject(object ts, object tz, object unit, return obj +cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, + int32_t nanos=0): + """ + Convert a datetime (or Timestamp) input `ts`, along with optional timezone + object `tz` to a _TSObject. + + The optional argument `nanos` allows for cases where datetime input + needs to be supplemented with higher-precision information. + + Parameters + ---------- + ts : datetime or Timestamp + Value to be converted to _TSObject + tz : tzinfo or None + timezone for the timezone-aware output + nanos : int32_t, default is 0 + nanoseconds supplement the precision of the datetime input ts + + Returns + ------- + obj : _TSObject + """ + cdef: + _TSObject obj = _TSObject() + + if tz is not None: + tz = maybe_get_tz(tz) + + # sort of a temporary hack + if ts.tzinfo is not None: + if (hasattr(tz, 'normalize') and + hasattr(ts.tzinfo, '_utcoffset')): + ts = tz.normalize(ts) + obj.value = _pydatetime_to_dts(ts, &obj.dts) + obj.tzinfo = ts.tzinfo + else: + # tzoffset + try: + tz = ts.astimezone(tz).tzinfo + except: + pass + obj.value = _pydatetime_to_dts(ts, &obj.dts) + ts_offset = get_utcoffset(ts.tzinfo, ts) + obj.value -= int(ts_offset.total_seconds() * 1e9) + tz_offset = get_utcoffset(tz, ts) + obj.value += int(tz_offset.total_seconds() * 1e9) + pandas_datetime_to_datetimestruct(obj.value, + PANDAS_FR_ns, &obj.dts) + obj.tzinfo = tz + elif not is_utc(tz): + ts = _localize_pydatetime(ts, tz) + obj.value = _pydatetime_to_dts(ts, &obj.dts) + obj.tzinfo = ts.tzinfo + else: + # UTC + obj.value = _pydatetime_to_dts(ts, &obj.dts) + obj.tzinfo = pytz.utc + else: + obj.value = _pydatetime_to_dts(ts, &obj.dts) + obj.tzinfo = ts.tzinfo + + if obj.tzinfo is not None and not is_utc(obj.tzinfo): + offset = get_utcoffset(obj.tzinfo, ts) + obj.value -= int(offset.total_seconds() * 1e9) + + if is_timestamp(ts): + obj.value += ts.nanosecond + obj.dts.ps = ts.nanosecond * 1000 + + if nanos: + obj.value += nanos + obj.dts.ps = nanos * 1000 + + _check_dts_bounds(&obj.dts) + return obj + + cpdef convert_str_to_tsobject(object ts, object tz, object unit, dayfirst=False, yearfirst=False): """ ts must be a string """ @@ -1538,11 +1574,12 @@ cpdef convert_str_to_tsobject(object ts, object tz, object unit, elif ts == 'now': # Issue 9000, we short-circuit rather than going # into np_datetime_strings which returns utc - ts = Timestamp.now(tz) + ts = datetime.now(tz) elif ts == 'today': # Issue 9000, we short-circuit rather than going # into np_datetime_strings which returns a normalized datetime - ts = Timestamp.today(tz) + ts = datetime.now(tz) + # equiv: datetime.today().replace(tzinfo=tz) else: try: _string_to_dts(ts, &obj.dts, &out_local, &out_tzoffset) @@ -1557,7 +1594,15 @@ cpdef convert_str_to_tsobject(object ts, object tz, object unit, return obj else: # Keep the converter same as PyDateTime's - ts = Timestamp(obj.value, tz=obj.tzinfo) + obj = convert_to_tsobject(obj.value, obj.tzinfo, + None, 0, 0) + dtime = datetime(obj.dts.year, obj.dts.month, obj.dts.day, + obj.dts.hour, obj.dts.min, obj.dts.sec, + obj.dts.us, obj.tzinfo) + obj = convert_datetime_to_tsobject(dtime, tz, + nanos=obj.dts.ps / 1000) + return obj + else: ts = obj.value if tz is not None: @@ -1706,7 +1751,7 @@ def datetime_to_datetime64(ndarray[object] values): else: inferred_tz = get_timezone(val.tzinfo) - _ts = convert_to_tsobject(val, None, None, 0, 0) + _ts = convert_datetime_to_tsobject(val, None) iresult[i] = _ts.value _check_dts_bounds(&_ts.dts) else: @@ -2026,7 +2071,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', seen_datetime=1 if val.tzinfo is not None: if utc_convert: - _ts = convert_to_tsobject(val, None, 'ns', 0, 0) + _ts = convert_datetime_to_tsobject(val, None) iresult[i] = _ts.value try: _check_dts_bounds(&_ts.dts) @@ -2135,7 +2180,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', raise TypeError("invalid string coercion to datetime") try: - _ts = convert_to_tsobject(py_dt, None, None, 0, 0) + _ts = convert_datetime_to_tsobject(py_dt, None) iresult[i] = _ts.value except ValueError: if is_coerce: