-
-
Notifications
You must be signed in to change notification settings - Fork 18k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Last of the timezones funcs #17669
Last of the timezones funcs #17669
Changes from 3 commits
1932996
6ccbfb2
adf9099
fcd26c1
355cbe8
e17089c
b82396d
e6096c5
926bb73
2afc7b2
8f4b368
7b4c9b2
0248f53
ce701e5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -105,7 +105,7 @@ from tslibs.timezones cimport ( | |
is_utc, is_tzlocal, is_fixed_offset, | ||
treat_tz_as_dateutil, treat_tz_as_pytz, | ||
get_timezone, get_utcoffset, maybe_get_tz, | ||
get_dst_info | ||
get_dst_info, _infer_dst | ||
) | ||
|
||
|
||
|
@@ -4003,48 +4003,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, | |
result_b[i] = v | ||
|
||
if infer_dst: | ||
dst_hours = np.empty(n, dtype=np.int64) | ||
dst_hours.fill(NPY_NAT) | ||
|
||
# Get the ambiguous hours (given the above, these are the hours | ||
# where result_a != result_b and neither of them are NAT) | ||
both_nat = np.logical_and(result_a != NPY_NAT, result_b != NPY_NAT) | ||
both_eq = result_a == result_b | ||
trans_idx = np.squeeze(np.nonzero(np.logical_and(both_nat, ~both_eq))) | ||
if trans_idx.size == 1: | ||
stamp = Timestamp(vals[trans_idx]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. revert this routine. |
||
raise pytz.AmbiguousTimeError( | ||
"Cannot infer dst time from %s as there " | ||
"are no repeated times" % stamp) | ||
# Split the array into contiguous chunks (where the difference between | ||
# indices is 1). These are effectively dst transitions in different | ||
# years which is useful for checking that there is not an ambiguous | ||
# transition in an individual year. | ||
if trans_idx.size > 0: | ||
one_diff = np.where(np.diff(trans_idx) != 1)[0] +1 | ||
trans_grp = np.array_split(trans_idx, one_diff) | ||
|
||
# Iterate through each day, if there are no hours where the | ||
# delta is negative (indicates a repeat of hour) the switch | ||
# cannot be inferred | ||
for grp in trans_grp: | ||
|
||
delta = np.diff(result_a[grp]) | ||
if grp.size == 1 or np.all(delta > 0): | ||
stamp = Timestamp(vals[grp[0]]) | ||
raise pytz.AmbiguousTimeError(stamp) | ||
|
||
# Find the index for the switch and pull from a for dst and b | ||
# for standard | ||
switch_idx = (delta <= 0).nonzero()[0] | ||
if switch_idx.size > 1: | ||
raise pytz.AmbiguousTimeError( | ||
"There are %i dst switches when " | ||
"there should only be 1." % switch_idx.size) | ||
switch_idx = switch_idx[0] + 1 # Pull the only index and adjust | ||
a_idx = grp[:switch_idx] | ||
b_idx = grp[switch_idx:] | ||
dst_hours[grp] = np.hstack((result_a[a_idx], result_b[b_idx])) | ||
dst_hours = _infer_dst(vals, result_a, result_b) | ||
|
||
for i in range(n): | ||
left = result_a[i] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -275,3 +275,84 @@ cdef object get_dst_info(object tz): | |
dst_cache[cache_key] = (trans, deltas, typ) | ||
|
||
return dst_cache[cache_key] | ||
|
||
|
||
def _infer_tzinfo(start, end): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. de-privatize these There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. k |
||
def _infer(a, b): | ||
tz = a.tzinfo | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this should be an outside function There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we're going to bother changing it, might as well get rid of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sure |
||
if b and b.tzinfo: | ||
if not (get_timezone(tz) == get_timezone(b.tzinfo)): | ||
raise AssertionError('Inputs must both have the same timezone,' | ||
' {timezone1} != {timezone2}' | ||
.format(timezone1=tz, timezone2=b.tzinfo)) | ||
return tz | ||
|
||
tz = None | ||
if start is not None: | ||
tz = _infer(start, end) | ||
elif end is not None: | ||
tz = _infer(end, start) | ||
return tz | ||
|
||
|
||
cdef ndarray[int64_t] _infer_dst(ndarray[int64_t] vals, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you are removing part of this function. I think its better to move almost all of it to timezones or leave it. (ok with latter for now, maybe do former at some point). Otherwise you end up splitting the logic / comments in 2 places. I think this requires a bit more thought here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the appropriate block to separate out. It is exclusively focused on getting DST info for use in the original function. The rest of the function is thematically more related to _TSObject conversion. In particular, |
||
ndarray[int64_t] result_a, | ||
ndarray[int64_t] result_b): | ||
cdef: | ||
Py_ssize_t n = len(vals) | ||
ndarray[int64_t] dst_hours | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. make sure u r typing s the original There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The typings should be identical. There are a couple of variables that do not have type declarations; those can be added. |
||
|
||
dst_hours = np.empty(n, dtype=np.int64) | ||
dst_hours.fill(NPY_NAT) | ||
|
||
# Get the ambiguous hours (given the above, these are the hours | ||
# where result_a != result_b and neither of them are NAT) | ||
both_nat = np.logical_and(result_a != NPY_NAT, result_b != NPY_NAT) | ||
both_eq = result_a == result_b | ||
trans_idx = np.squeeze(np.nonzero(np.logical_and(both_nat, ~both_eq))) | ||
if trans_idx.size == 1: | ||
stamp = np.int64(vals[trans_idx]).astype('datetime64[ns]') | ||
# Render `stamp` as e.g. '2017-08-30 07:59:23.123456' | ||
# as opposed to str(stamp) which would | ||
# be '2017-08-30T07:59:23.123456789' | ||
stamp = str(stamp).replace('T', ' ')[:-3] | ||
raise pytz.AmbiguousTimeError( | ||
"Cannot infer dst time from %s as there " | ||
"are no repeated times" % stamp) | ||
|
||
# Split the array into contiguous chunks (where the difference between | ||
# indices is 1). These are effectively dst transitions in different | ||
# years which is useful for checking that there is not an ambiguous | ||
# transition in an individual year. | ||
if trans_idx.size > 0: | ||
one_diff = np.where(np.diff(trans_idx) != 1)[0] +1 | ||
trans_grp = np.array_split(trans_idx, one_diff) | ||
|
||
# Iterate through each day, if there are no hours where the | ||
# delta is negative (indicates a repeat of hour) the switch | ||
# cannot be inferred | ||
for grp in trans_grp: | ||
|
||
delta = np.diff(result_a[grp]) | ||
if grp.size == 1 or np.all(delta > 0): | ||
stamp = np.int64(vals[grp[0]]).astype('datetime64[ns]') | ||
# Render `stamp` as e.g. '2017-08-30 07:59:23.123456' | ||
# as opposed to str(stamp) which would | ||
# be '2017-08-30T07:59:23.123456789' | ||
stamp = str(stamp).replace('T', ' ')[:-3] | ||
raise pytz.AmbiguousTimeError(stamp) | ||
|
||
# Find the index for the switch and pull from a for dst and b | ||
# for standard | ||
switch_idx = (delta <= 0).nonzero()[0] | ||
if switch_idx.size > 1: | ||
raise pytz.AmbiguousTimeError( | ||
"There are %i dst switches when " | ||
"there should only be 1." % switch_idx.size) | ||
|
||
switch_idx = switch_idx[0] + 1 # Pull the only index and adjust | ||
a_idx = grp[:switch_idx] | ||
b_idx = grp[switch_idx:] | ||
dst_hours[grp] = np.hstack((result_a[a_idx], result_b[b_idx])) | ||
|
||
return dst_hours |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,7 +4,7 @@ | |
|
||
from pandas._libs import lib, tslib | ||
from pandas._libs.tslibs.strptime import array_strptime | ||
from pandas._libs.tslibs.timezones import get_timezone | ||
from pandas._libs.tslibs.timezones import get_timezone, _infer_tzinfo # noqa | ||
|
||
from pandas.core.dtypes.common import ( | ||
_ensure_object, | ||
|
@@ -42,22 +42,6 @@ def _lexer_split_from_str(dt_str): | |
pass | ||
|
||
|
||
def _infer_tzinfo(start, end): | ||
def _infer(a, b): | ||
tz = a.tzinfo | ||
if b and b.tzinfo: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. where is this actually used? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Outside of tests, its used once in |
||
if not (get_timezone(tz) == get_timezone(b.tzinfo)): | ||
raise AssertionError('Inputs must both have the same timezone,' | ||
' {timezone1} != {timezone2}' | ||
.format(timezone1=tz, timezone2=b.tzinfo)) | ||
return tz | ||
|
||
tz = None | ||
if start is not None: | ||
tz = _infer(start, end) | ||
elif end is not None: | ||
tz = _infer(end, start) | ||
return tz | ||
|
||
|
||
def _guess_datetime_format(dt_str, dayfirst=False, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
let's give this a more descriptive name:
infer_dst_transitions
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
sure