From 6fe2db5758b59800e41a5bf3d7f047ae36e6235f Mon Sep 17 00:00:00 2001 From: Adam Klein Date: Wed, 22 Feb 2012 18:48:43 -0500 Subject: [PATCH] ENH: added access to new offsets & deprecated old, unified offset/freq parameter --- pandas/core/daterange.py | 630 +------------------------------- pandas/core/datetools.py | 59 +-- pandas/core/index.py | 89 ++--- pandas/tests/test_daterange.py | 20 +- pandas/tests/test_datetime64.py | 22 +- pandas/util/testing.py | 1 - 6 files changed, 90 insertions(+), 731 deletions(-) diff --git a/pandas/core/daterange.py b/pandas/core/daterange.py index 201cde2372cb5..41651ba1065ee 100644 --- a/pandas/core/daterange.py +++ b/pandas/core/daterange.py @@ -1,638 +1,26 @@ # pylint: disable=E1101,E1103 -from datetime import datetime -import operator - -import numpy as np - from pandas.core.index import DatetimeIndex import pandas.core.datetools as datetools -from pandas.core.datetools import _dt_box, _dt_unbox_array -from pandas._tseries import Timestamp __all__ = ['DateRange'] -#------------------------------------------------------------------------------- +#----------------------------------------------------------------------------- # DateRange class -def _bin_op(op): - def f(self, other): - if isinstance(other, datetime): - other = np.datetime64(other) - return op(self.view(np.ndarray), other) - - return f - -_CACHE_START = Timestamp(datetime(1950, 1, 1)) -_CACHE_END = Timestamp(datetime(2030, 1, 1)) - -_daterange_cache = {} - class DateRange(DatetimeIndex): def __new__(cls, start=None, end=None, periods=None, offset=datetools.bday, time_rule=None, tzinfo=None, name=None, **kwds): - retval = super(DateRange, cls).__new__(cls, start=start, end=end, - periods=periods, offset=offset, freq=time_rule, tzinfo=tzinfo, - name=name, **kwds) - - return retval - -class DateRangeOld: - """ - Fixed frequency date range according to input parameters. - - Input dates satisfy: - begin <= d <= end, where d lies on the given offset - - Parameters - ---------- - start : {datetime, None} - left boundary for range - end : {datetime, None} - right boundary for range - periods : int - Number of periods to generate. - offset : DateOffset, default is 1 BusinessDay - Used to determine the dates returned - time_rule : time_rule to use - tzinfo : pytz.timezone - To endow DateRange with time zone information - """ - def __new__(cls, start=None, end=None, periods=None, - offset=datetools.bday, time_rule=None, - tzinfo=None, name=None, **kwds): - - time_rule = kwds.get('timeRule', time_rule) - if time_rule is not None: - offset = datetools.getOffset(time_rule) - - if time_rule is None: - if offset in datetools._offsetNames: - time_rule = datetools._offsetNames[offset] - - # Cachable - if not start: - start = kwds.get('begin') - if not periods: - periods = kwds.get('nPeriods') - - start = datetools.to_datetime(start) - end = datetools.to_datetime(end) - - if (start is not None - and not isinstance(start, (datetime, np.datetime64, Timestamp))): - raise ValueError('Failed to convert %s to datetime' % start) - - if (end is not None - and not isinstance(end, (datetime, np.datetime64, Timestamp))): - raise ValueError('Failed to convert %s to datetime' % end) - - # inside cache range. Handle UTC case - useCache = _will_use_cache(offset) - - start, end, tzinfo = _figure_out_timezone(start, end, tzinfo) - useCache = useCache and _naive_in_cache_range(start, end) - - if useCache: - index = cls._cached_range(start, end, periods=periods, - offset=offset, time_rule=time_rule, - name=name) - if tzinfo is None: - return index - else: - xdr = generate_range(start=start, end=end, periods=periods, - offset=offset, time_rule=time_rule) - index = list(xdr) - - if tzinfo is not None: - index = [d.replace(tzinfo=tzinfo) for d in index] - - index = np.array(_dt_unbox_array(index), dtype='M8[us]', copy=False) - index = index.view(cls) - index.name = name - index.offset = offset - index.tzinfo = tzinfo - - index.freq = time_rule - index.regular = True - - return index - - def __reduce__(self): - """Necessary for making this object picklable""" - a, b, state = DatetimeIndex.__reduce__(self) - aug_state = state, self.offset, self.tzinfo - - return a, b, aug_state - - def __setstate__(self, aug_state): - """Necessary for making this object picklable""" - index_state = aug_state[:1] - offset = aug_state[1] - - # for backwards compatibility - if len(aug_state) > 2: - tzinfo = aug_state[2] - else: # pragma: no cover - tzinfo = None - - self.offset = offset - self.tzinfo = tzinfo - DatetimeIndex.__setstate__(self, *index_state) + import warnings + warnings.warn("DateRange is deprecated, use DatetimeIndex instead", + FutureWarning) - def equals(self, other): - if self is other: - return True - - if not isinstance(other, DatetimeIndex): - return False - - return DatetimeIndex.equals(self.view(DatetimeIndex), other) - - @property - def is_all_dates(self): - return True - - @classmethod - def _cached_range(cls, start=None, end=None, periods=None, offset=None, - time_rule=None, name=None): - if start is not None: - start = Timestamp(start) - if end is not None: - end = Timestamp(end) - - # HACK: fix this dependency later + # use old mapping if time_rule is not None: - offset = datetools.getOffset(time_rule) - - if offset is None: - raise Exception('Must provide a DateOffset!') - - if time_rule is None: - time_rule = datetools._offsetMap[offset] - - if offset not in _daterange_cache: - xdr = generate_range(_CACHE_START, _CACHE_END, offset=offset) - arr = np.array(_dt_unbox_array(list(xdr)), - dtype='M8[us]', copy=False) - - cachedRange = arr.view(DateRange) - cachedRange.offset = offset - cachedRange.tzinfo = None - cachedRange.name = None - _daterange_cache[offset] = cachedRange - else: - cachedRange = _daterange_cache[offset] - - if start is None: - if end is None: - raise Exception('Must provide start or end date!') - if periods is None: - raise Exception('Must provide number of periods!') - - assert(isinstance(end, (datetime, Timestamp))) - - end = offset.rollback(end) - - endLoc = cachedRange.get_loc(end) + 1 - startLoc = endLoc - periods - elif end is None: - assert(isinstance(start, (datetime, Timestamp))) - start = offset.rollforward(start) - - startLoc = cachedRange.get_loc(start) - if periods is None: - raise Exception('Must provide number of periods!') - - endLoc = startLoc + periods - else: - start = offset.rollforward(start) - end = offset.rollback(end) - - startLoc = cachedRange.get_loc(start) - endLoc = cachedRange.get_loc(end) + 1 - - indexSlice = cachedRange[startLoc:endLoc] - indexSlice.name = name - indexSlice.freq = time_rule - indexSlice.regular = True - - return indexSlice - - def __array_finalize__(self, obj): - if self.ndim == 0: # pragma: no cover - return self.item() - - self.offset = getattr(obj, 'offset', None) - - __lt__ = _bin_op(operator.lt) - __le__ = _bin_op(operator.le) - __gt__ = _bin_op(operator.gt) - __ge__ = _bin_op(operator.ge) - __eq__ = _bin_op(operator.eq) - - def __getslice__(self, i, j): - return self.__getitem__(slice(i, j)) - - def __getitem__(self, key): - """Override numpy.ndarray's __getitem__ method to work as desired""" - result = self.view(np.ndarray)[key] - - if isinstance(result, np.datetime64): - result = _dt_box(result).replace(tzinfo=self.tzinfo) - - if isinstance(key, (int, np.integer)): - return result - elif isinstance(key, slice): - new_index = result.view(DateRange) - if key.step is not None: - new_index.offset = key.step * self.offset - else: - new_index.offset = self.offset - - new_index.tzinfo = self.tzinfo - new_index.name = self.name - return new_index - else: - return DatetimeIndex(result, name=self.name) - - def summary(self): - if len(self) > 0: - index_summary = ', %s to %s' % (self[0], self[-1]) - else: - index_summary = '' - sum_line = 'DateRange: %s entries%s' % (len(self), index_summary) - sum_line += '\noffset: %s' % self.offset - if self.tzinfo is not None: - sum_line += ', tzinfo: %s' % self.tzinfo - - return sum_line - - def __repr__(self): - output = str(self.__class__) + '\n' - output += 'offset: %s, tzinfo: %s\n' % (self.offset, self.tzinfo) - if len(self) > 0: - output += '[%s, ..., %s]\n' % (self[0], self[-1]) - output += 'length: %d' % len(self) - return output - - __str__ = __repr__ - - def shift(self, n, offset=None): - """ - Specialized shift which produces a DateRange - - Parameters - ---------- - n : int - Periods to shift by - offset : DateOffset or timedelta-like, optional - - Returns - ------- - shifted : DateRange - """ - if offset is not None and offset != self.offset: - return DatetimeIndex.shift(self, n, offset) - - if n == 0: - # immutable so OK - return self - - start = self[0] + n * self.offset - end = self[-1] + n * self.offset - return DateRange(start, end, offset=self.offset, name=self.name) - - def union(self, other): - """ - Specialized union for DateRange objects. If combine - overlapping ranges with the same DateOffset, will be much - faster than Index.union - - Parameters - ---------- - other : DateRange or array-like - - Returns - ------- - y : Index or DateRange - """ - if not isinstance(other, DateRange) or other.offset != self.offset: - return DatetimeIndex.union(self.view(DatetimeIndex), other) - - if self._can_fast_union(other): - return self._fast_union(other) - else: - return DatetimeIndex.union(self, other) - - def _wrap_union_result(self, other, result): - # If we are here, _can_fast_union is false or other is not a - # DateRange, so their union has to be an Index. - name = self.name if self.name == other.name else None - return DatetimeIndex(result, name=name) - - def _wrap_joined_index(self, joined, other): - name = self.name if self.name == other.name else None - if (isinstance(other, DateRange) - and self.offset == other.offset - and self._can_fast_union(other)): - joined = self._view_like(joined) - joined.name = name - return joined - else: - return DatetimeIndex(joined, name=name) - - def _can_fast_union(self, other): - offset = self.offset - - # to make our life easier, "sort" the two ranges - if self[0] <= other[0]: - left, right = self, other - else: - left, right = other, self - - left_end = left[-1] - right_start = right[0] - - # Only need to "adjoin", not overlap - return (left_end + offset) >= right_start - - def _fast_union(self, other): - # to make our life easier, "sort" the two ranges - if self[0] <= other[0]: - left, right = self, other - else: - left, right = other, self - - left_start, left_end = left[0], left[-1] - right_end = right[-1] - - if not _will_use_cache(self.offset): - # concatenate dates - if left_end < right_end: - loc = right.searchsorted(left_end, side='right') - right_chunk = right.values[loc:] - dates = np.concatenate((left.values, right_chunk)) - return self._view_like(dates) - else: - return left - else: - return DateRange(left_start, max(left_end, right_end), - offset=left.offset) - - def intersection(self, other): - """ - Specialized intersection for DateRange objects. May be much faster than - Index.union - - Parameters - ---------- - other : DateRange or array-like - - Returns - ------- - y : Index or DateRange - """ - if not isinstance(other, DateRange) or other.offset != self.offset: - return DatetimeIndex.intersection(self.view(DatetimeIndex), other) - - # to make our life easier, "sort" the two ranges - if self[0] <= other[0]: - left, right = self, other - else: - left, right = other, self - - end = min(left[-1], right[-1]) - start = right[0] - - if end < start: - return DatetimeIndex([]) - else: - lslice = slice(*left.slice_locs(start, end)) - left_chunk = left.values[lslice] - return self._view_like(left_chunk) - - def _view_like(self, ndarray): - result = ndarray.view(DateRange) - result.offset = self.offset - result.tzinfo = self.tzinfo - result.name = self.name - return result - - def tz_normalize(self, tz): - """ - Convert DateRange from one time zone to another (using pytz) - - Returns - ------- - normalized : DateRange - """ - new_dates = np.array([tz.normalize(x.replace(tzinfo=self.tzinfo)) - for x in self]) - new_dates = new_dates.view(DateRange) - new_dates.offset = self.offset - new_dates.tzinfo = tz - new_dates.name = self.name - return new_dates - - def tz_localize(self, tz): - """ - Localize tzinfo-naive DateRange to given time zone (using pytz) - - Returns - ------- - localized : DateRange - """ - new_dates = np.array( - [np.datetime64(tz.localize(x.replace(tzinfo=self.tzinfo))) - for x in self]) - new_dates = new_dates.view(DateRange) - new_dates.offset = self.offset - new_dates.tzinfo = tz - new_dates.name = self.name - return new_dates - - def tz_validate(self): - """ - For a localized time zone, verify that there are no DST ambiguities - - Returns - ------- - result : boolean - True if there are no DST ambiguities - """ - import pytz - - tz = self.tzinfo - if tz is None or tz is pytz.utc: - return True - - # See if there are any DST resolution problems - for date in self: - try: - tz.utcoffset(date.replace(tzinfo=None)) - except pytz.InvalidTimeError: - return False - - return True - -def generate_range(start=None, end=None, periods=None, - offset=datetools.BDay(), time_rule=None): - """ - Generates a sequence of dates corresponding to the specified time - offset. Similar to dateutil.rrule except uses pandas DateOffset - objects to represent time increments - - Parameters - ---------- - start : datetime (default None) - end : datetime (default None) - periods : int, optional - - Note - ---- - * This method is faster for generating weekdays than dateutil.rrule - * At least two of (start, end, periods) must be specified. - * If both start and end are specified, the returned dates will - satisfy start <= date <= end. - - Returns - ------- - dates : generator object - - See also - -------- - DateRange, dateutil.rrule - """ - - if time_rule is not None: - offset = datetools.getOffset(time_rule) - - if time_rule is None: - if offset in datetools._offsetNames: - time_rule = datetools._offsetNames[offset] - - start = datetools.to_datetime(start) - end = datetools.to_datetime(end) - - if start and not offset.onOffset(start): - start = offset.rollforward(start) - - if end and not offset.onOffset(end): - end = offset.rollback(end) - - if periods is None and end < start: - end = None - periods = 0 - - if end is None: - end = start + (periods - 1) * offset - - if start is None: - start = end - (periods - 1) * offset - - cur = start - if offset._normalizeFirst: - cur = datetools.normalize_date(cur) - - next_date = cur - while cur <= end: - yield cur - - # faster than cur + offset - next_date = offset.apply(cur) - if next_date <= cur: - raise ValueError('Offset %s did not increment date' % offset) - cur = next_date - -# Do I want to cache UTC dates? Can't decide... - -# def _utc_in_cache_range(start, end): -# import pytz -# if start is None or end is None: -# return False - -# _CACHE_START = datetime(1950, 1, 1, tzinfo=pytz.utc) -# _CACHE_END = datetime(2030, 1, 1, tzinfo=pytz.utc) - -# try: -# assert(_isutc(start)) -# assert(_isutc(end)) -# except AssertionError: -# raise Exception('To use localized time zone, create ' -# 'DateRange with pytz.UTC then call ' -# 'tz_normalize') -# return _in_range(start, end, _CACHE_START, _CACHE_END) - -# def _isutc(dt): -# import pytz -# return dt.tzinfo is pytz.utc - -# def _hastz(dt): -# return dt is not None and dt.tzinfo is not None - -# def _have_pytz(): -# try: -# import pytz -# return True -# except ImportError: -# return False - -def _in_range(start, end, rng_start, rng_end): - if isinstance(rng_start, datetime): - rng_start = Timestamp(rng_start) - if isinstance(rng_end, datetime): - rng_end = Timestamp(rng_end) - if isinstance(start, datetime): - start = Timestamp(start) - if isinstance(end, datetime): - end = Timestamp(end) - - return start > rng_start and end < rng_end - -def _naive_in_cache_range(start, end): - if start is None or end is None: - return False - else: - return _in_range(start, end, _CACHE_START, _CACHE_END) - -def _figure_out_timezone(start, end, tzinfo): - inferred_tz = _infer_tzinfo(start, end) - tz = inferred_tz - if inferred_tz is None and tzinfo is not None: - tz = tzinfo - elif tzinfo is not None: - assert(inferred_tz == tzinfo) - # make tz naive for now - - start = start if start is None else start.replace(tzinfo=None) - end = end if end is None else end.replace(tzinfo=None) - - return start, end, tz - -def _infer_tzinfo(start, end): - def _infer(a, b): - tz = a.tzinfo - if b and b.tzinfo: - assert(tz == b.tzinfo) - return tz - tz = None - if start is not None: - tz = _infer(start, end) - elif end is not None: - tz = _infer(end, start) - return tz - -def _will_use_cache(offset): - return (offset.isAnchored() and - isinstance(offset, datetools.CacheableOffset)) + offset = datetools._offsetMap[time_rule] -if __name__ == '__main__': - import pytz - # just want it to work - tz = pytz.timezone('US/Eastern') - dr = DateRange(datetime(2011, 3, 12, tzinfo=pytz.utc), - periods=50, offset=datetools.Hour()) - dr2 = dr.tz_normalize(tz) + return super(DateRange, cls).__new__(cls, start=start, end=end, + periods=periods, offset=offset, tzinfo=tzinfo, name=name, + _deprecated=True, **kwds) diff --git a/pandas/core/datetools.py b/pandas/core/datetools.py index 01b151aa31422..fdd16b960c277 100644 --- a/pandas/core/datetools.py +++ b/pandas/core/datetools.py @@ -538,8 +538,6 @@ def __init__(self, n=1, **kwds): def apply(self, other): offsetOfMonth = self.getOffsetOfMonth(other) - one_month = lib.Delta(months=1, day=1) - if offsetOfMonth > other: if self.n > 0: months = self.n - 1 @@ -948,7 +946,7 @@ class Second(Tick): "A@DEC" : BYearEnd() } -_newoffsetMap = { +_newOffsetMap = { # Annual - Calendar "A@JAN" : YearEnd(month=1), "A@FEB" : YearEnd(month=2), @@ -1090,35 +1088,38 @@ class Second(Tick): "S" : Second(), "U" : None, None : None, - } - +} for i, weekday in enumerate(['MON', 'TUE', 'WED', 'THU', 'FRI']): for iweek in xrange(4): _offsetMap['WOM@%d%s' % (iweek + 1, weekday)] = \ WeekOfMonth(week=iweek, weekday=i) #NOTE: don't delete. this is for new map - _newoffsetMap['WOM@%d%s' % (iweek + 1, weekday)] = \ + _newOffsetMap['WOM@%d%s' % (iweek + 1, weekday)] = \ WeekOfMonth(week=iweek, weekday=i) _offsetNames = dict([(v, k) for k, v in _offsetMap.iteritems()]) -#NOTE: the below doesn't make sense since the values aren't unique -# could have lists for non-unique keys, but then variable output... -_newoffsetNames = dict([(v,k) for k,v in _newoffsetMap.iteritems()]) +# NOTE: don't use the below for exact reverse-lookups b/c it's not 1-1 +_newOffsetNames = dict([(v,k) for k,v in _newOffsetMap.iteritems()]) -def inferTimeRule(index): +def inferTimeRule(index, _deprecated=True): if len(index) < 3: raise Exception('Need at least three dates to infer time rule!') first, second, third = index[:3] - for rule, offset in _offsetMap.iteritems(): + if _deprecated: + items = _offsetMap.iteritems() + else: + items = _newOffsetMap.iteritems() + + for rule, offset in items: if (first + offset) == second and (second + offset) == third: return rule raise Exception('Could not infer time rule from data!') -def getOffset(name): +def getOffset(name, _deprecated=True): """ Return DateOffset object associated with rule name @@ -1126,7 +1127,11 @@ def getOffset(name): ------- getOffset('EOM') --> BMonthEnd(1) """ - offset = _offsetMap.get(name) + if _deprecated: + offset = _offsetMap.get(name) + else: + offset = _newOffsetMap.get(name) + if offset is not None: return offset else: @@ -1135,7 +1140,7 @@ def getOffset(name): def hasOffsetName(offset): return offset in _offsetNames -def getOffsetName(offset): +def getOffsetName(offset, _deprecated=True): """ Return rule name associated with a DateOffset object @@ -1143,7 +1148,11 @@ def getOffsetName(offset): ------- getOffsetName(BMonthEnd(1)) --> 'EOM' """ - name = _offsetNames.get(offset) + if _deprecated: + name = _offsetNames.get(offset) + else: + name = _newOffsetNames.get(offset) + if name is not None: return name else: @@ -1184,8 +1193,8 @@ def _figure_out_timezone(start, end, tzinfo): _daterange_cache = {} -def generate_range(start=_CACHE_START, end=_CACHE_END, periods=None, - offset=BDay(), freq=None): +def generate_range(start=None, end=None, periods=None, + offset=BDay(), time_rule=None, _deprecated=True): """ Generates a sequence of dates corresponding to the specified time offset. Similar to dateutil.rrule except uses pandas DateOffset @@ -1193,8 +1202,8 @@ def generate_range(start=_CACHE_START, end=_CACHE_END, periods=None, Parameters ---------- - start : timestamp-like (default None) - end : timestamp-like (default None) + start : datetime (default None) + end : datetime (default None) periods : int, optional Note @@ -1213,15 +1222,11 @@ def generate_range(start=_CACHE_START, end=_CACHE_END, periods=None, DateRange, dateutil.rrule """ - if freq is not None: - offset = getOffset(freq) - - if freq is None: - if offset in _offsetNames: - freq = _offsetNames[offset] + if time_rule is not None: + offset = getOffset(time_rule) - start = to_timestamp(start) - end = to_timestamp(end) + start = to_datetime(start) + end = to_datetime(end) if start and not offset.onOffset(start): start = offset.rollforward(start) diff --git a/pandas/core/index.py b/pandas/core/index.py index a2038715c47fe..59b295fa3357e 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -15,7 +15,7 @@ import pandas.core.datetools as datetools from pandas.core.datetools import (_dt_box, _dt_unbox, _dt_box_array, - _dt_unbox_array, _offsetMap) + _dt_unbox_array) __all__ = ['Index'] @@ -1067,11 +1067,11 @@ class DatetimeIndex(Int64Index): dtype : NumPy dtype (default: M8[us]) copy : bool Make a copy of input ndarray - freq : string, optional - One of pandas date offset strings + offset : string or offset object, optional + One of pandas date offset strings or corresponding objects start : starting value, datetime-like, optional If data is None, start is used as the start point in generating regular - timestamp data. must conform to freq argument + timestamp data. periods : int, optional, > 0 Number of periods to generate, if generating data. Takes precedence over end argument @@ -1105,36 +1105,27 @@ class DatetimeIndex(Int64Index): __sub__ = _dt_index_op('__sub__') def __new__(cls, data=None, - freq=None, offset=None, start=None, end=None, periods=None, + offset=None, start=None, end=None, periods=None, dtype=None, copy=False, name=None, tzinfo=None, - **kwds): + _deprecated=False): - if 'timeRule' in kwds or 'time_rule' in kwds: - import warnings - warnings.warn("timeRule/time_rule is deprecated, please use freq " - "argument", DeprecationWarning,) - freq = kwds.get('timeRule', kwds.get('time_rule', None)) - - if freq is not None: - offset = datetools.getOffset(freq) - elif offset is not None and offset in datetools._offsetNames: - freq = datetools.getOffsetName(offset) + if isinstance(offset, basestring): + offset = datetools.getOffset(offset, _deprecated=_deprecated) if data is None and offset is None: - raise ValueError("Must provide offset/freq argument " - "if no data is supplied") + raise ValueError("Must provide offset argument if no data is " + "supplied") if data is None: start = datetools.to_timestamp(start) end = datetools.to_timestamp(end) if (start is not None and not isinstance(start, Timestamp)): - raise ValueError('Failed to convert %s to datetime' % start) + raise ValueError('Failed to convert %s to timestamp' % start) if (end is not None and not isinstance(end, Timestamp)): - raise ValueError('Failed to convert %s to datetime' % end) + raise ValueError('Failed to convert %s to timestamp' % end) - # inside cache range. Handle UTC case useCache = datetools._will_use_cache(offset) start, end, tzinfo = datetools._figure_out_timezone(start, end, @@ -1144,10 +1135,12 @@ def __new__(cls, data=None, if useCache: index = cls._cached_range(start, end, periods=periods, - offset=offset, name=name) + offset=offset, name=name, + _deprecated=_deprecated) else: xdr = datetools.generate_range(start=start, end=end, - periods=periods, offset=offset) + periods=periods, offset=offset, + _deprecated=_deprecated) index = np.array(_dt_unbox_array(list(xdr)), dtype='M8[us]', copy=False) @@ -1155,7 +1148,6 @@ def __new__(cls, data=None, index = index.view(cls) index.name = name index.offset = offset - index.freq = freq index.tzinfo = tzinfo return index @@ -1200,14 +1192,13 @@ def __new__(cls, data=None, subarr = subarr.view(cls) subarr.name = name subarr.offset = offset - subarr.freq = freq subarr.tzinfo = tzinfo return subarr @classmethod def _cached_range(cls, start=None, end=None, periods=None, offset=None, - name=None): + name=None, _deprecated=False): if start is not None: start = Timestamp(start) if end is not None: @@ -1218,7 +1209,10 @@ def _cached_range(cls, start=None, end=None, periods=None, offset=None, drc = datetools._daterange_cache if offset not in drc: - xdr = datetools.generate_range(offset=offset) + xdr = datetools.generate_range(offset=offset, + start=datetools._CACHE_START, end=datetools._CACHE_END, + _deprecated=_deprecated) + arr = np.array(_dt_unbox_array(list(xdr)), dtype='M8[us]', copy=False) @@ -1267,21 +1261,22 @@ def _cached_range(cls, start=None, end=None, periods=None, offset=None, # TODO: fix repr def __repr__(self): - output = str(self.__class__) + '\n' - output += 'offset: %s, tzinfo: %s\n' % (self.offset, self.tzinfo) - if len(self) > 0: - output += '[%s, ..., %s]\n' % (self[0], self[-1]) - output += 'length: %d' % len(self) - return output + if self.offset is not None: + output = str(self.__class__) + '\n' + output += 'offset: %s, tzinfo: %s\n' % (self.offset, self.tzinfo) + if len(self) > 0: + output += '[%s, ..., %s]\n' % (self[0], self[-1]) + output += 'length: %d' % len(self) + return output + else: + return super(DatetimeIndex, self).__repr__() __str__ = __repr__ - # TODO: fix reduce, setstate - def __reduce__(self): """Necessary for making this object picklable""" object_state = list(np.ndarray.__reduce__(self)) - subclass_state = self.name, self.offset, self.freq, self.tzinfo + subclass_state = self.name, self.offset, self.tzinfo object_state[2] = (object_state[2], subclass_state) return tuple(object_state) @@ -1291,8 +1286,7 @@ def __setstate__(self, state): nd_state, own_state = state self.name = own_state[0] self.offset = own_state[1] - self.freq = own_state[2] - self.tzinfo = own_state[3] + self.tzinfo = own_state[2] np.ndarray.__setstate__(self, nd_state) else: # pragma: no cover np.ndarray.__setstate__(self, state) @@ -1332,14 +1326,10 @@ def shift(self, n, offset=None): if self.offset is None: raise ValueError("Cannot shift with no offset") - if self.freq: - start = self[0] + n * self.offset - end = self[-1] + n * self.offset - return DatetimeIndex(start=start, end=end, offset=self.offset, - freq=self.freq, name=self.name) - else: - return DatetimeIndex([d + n * self.offset for d in self], - offset=self.offset, name=self.name) + start = self[0] + n * self.offset + end = self[-1] + n * self.offset + return DatetimeIndex(start=start, end=end, offset=self.offset, + name=self.name) def union(self, other): """ @@ -1418,7 +1408,6 @@ def __array_finalize__(self, obj): return self.item() self.offset = getattr(obj, 'offset', None) - self.freq = getattr(obj, 'freq', None) self.tzinfo = getattr(obj, 'tzinfo', None) def intersection(self, other): @@ -1467,11 +1456,9 @@ def __getitem__(self, key): return _dt_box(val, tzinfo=self.tzinfo) else: new_offset = self.offset - new_freq = self.freq if (type(key) == slice and new_offset is not None and key.step is not None): new_offset = key.step * self.offset - new_freq = None if com._is_bool_indexer(key): key = np.asarray(key) @@ -1481,7 +1468,7 @@ def __getitem__(self, key): return result return DatetimeIndex(result, name=self.name, offset=new_offset, - freq=new_freq, tzinfo=self.tzinfo) + tzinfo=self.tzinfo) # Try to run function on index first, and then on elements of index # Especially important for group-by functionality @@ -1579,7 +1566,7 @@ def equals(self, other): if (not hasattr(other, 'inferred_type') or other.inferred_type != 'datetime64'): - if self.freq is not None or self.offset is not None: + if self.offset is not None: return False try: other = DatetimeIndex(other) diff --git a/pandas/tests/test_daterange.py b/pandas/tests/test_daterange.py index 40137d3c031e9..0edd07d41842d 100644 --- a/pandas/tests/test_daterange.py +++ b/pandas/tests/test_daterange.py @@ -5,9 +5,9 @@ import numpy as np import pandas.core.datetools as datetools +from pandas.core.datetools import generate_range from pandas.core.index import Index, DatetimeIndex -from pandas.core.daterange import DateRange, generate_range -import pandas.core.daterange as daterange +from pandas.core.daterange import DateRange import pandas.util.testing as tm import pandas._tseries as lib @@ -30,8 +30,7 @@ def test_generate(self): self.assert_(np.array_equal(rng1, rng2)) def test_1(self): - eqXDateRange(dict(start=datetime(2009, 3, 25), - periods=2), + eqXDateRange(dict(start=datetime(2009, 3, 25), periods=2), [datetime(2009, 3, 25), datetime(2009, 3, 26)]) def test_2(self): @@ -326,17 +325,17 @@ def test_infer_tzinfo(self): start = eastern.localize(_start) end = eastern.localize(_end) - assert(daterange._infer_tzinfo(start, end) is eastern) - assert(daterange._infer_tzinfo(start, None) is eastern) - assert(daterange._infer_tzinfo(None, end) is eastern) + assert(datetools._infer_tzinfo(start, end) is eastern) + assert(datetools._infer_tzinfo(start, None) is eastern) + assert(datetools._infer_tzinfo(None, end) is eastern) start = utc.localize(_start) end = utc.localize(_end) - assert(daterange._infer_tzinfo(start, end) is utc) + assert(datetools._infer_tzinfo(start, end) is utc) end = eastern.localize(_end) - self.assertRaises(Exception, daterange._infer_tzinfo, start, end) - self.assertRaises(Exception, daterange._infer_tzinfo, end, start) + self.assertRaises(Exception, datetools._infer_tzinfo, start, end) + self.assertRaises(Exception, datetools._infer_tzinfo, end, start) def test_date_parse_failure(self): badly_formed_date = '2007/100/1' @@ -542,7 +541,6 @@ def test_dayoffset(self): t0.value - t1.value == 3 * us_in_day) t0 = t1 - def test_dayofmonthoffset(self): for week in (-1, 0, 1): for day in (0, 2, 4): diff --git a/pandas/tests/test_datetime64.py b/pandas/tests/test_datetime64.py index 8be72193bdc34..98950dfba40e7 100644 --- a/pandas/tests/test_datetime64.py +++ b/pandas/tests/test_datetime64.py @@ -202,24 +202,12 @@ def test_dayofmonthoffset(self): self.assert_(t.weekday() == day) def test_datetimeindex_diff(self): - dti1 = DatetimeIndex(freq='Q@JAN', start=datetime(1997,12,31), + dti1 = DatetimeIndex(offset='Q@JAN', start=datetime(1997,12,31), periods=100) - dti2 = DatetimeIndex(freq='Q@JAN', start=datetime(1997,12,31), + dti2 = DatetimeIndex(offset='Q@JAN', start=datetime(1997,12,31), periods=98) self.assert_( len(dti1.diff(dti2)) == 2) - #def test_datetimeindex_shift(self): - # dti = DatetimeIndex(freq='W@TUE', start=datetime(2005,1,4), n=100) - - # # fast shift - # self.assert_(dti.fshift(1)[0] == datetime(2005,1,11)) - # self.assert_(dti.fshift(-1)[0] == datetime(2004,12,28)) - - # # slow shift - # dti.contiguous = False - # self.assert_(dti.fshift(1)[0] == datetime(2005,1,11)) - # self.assert_(dti.fshift(-1)[0] == datetime(2004,12,28)) - def test_datetimecache(self): lib.flush_tcache('W@TUE') @@ -242,12 +230,6 @@ def test_datetimecache(self): lib.flush_tcache('W@TUE') - #def test_groupby_quarterly(self): - # dti = DatetimeIndex(freq='W@TUE', start=datetime(2005,1,4), n=100) - # s = Series(rand(100), index = dti) - # # s.groupby('Q@FEB').mean() - # s.groupby(lambda x: x.month).mean() - if __name__ == '__main__': import nose nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], diff --git a/pandas/util/testing.py b/pandas/util/testing.py index bfd4cc2752b7e..c79ad688ea032 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -20,7 +20,6 @@ import pandas.core.panel as panel from pandas.core.index import DatetimeIndex -from pandas.core.datetools import BDay # to_reload = ['index', 'daterange', 'series', 'frame', 'matrix', 'panel'] # for mod in to_reload: