diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 40ae38f12fccb..a6c3c0c5d7f79 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -194,7 +194,7 @@ Other enhancements pd.to_numeric(s, downcast='unsigned') pd.to_numeric(s, downcast='integer') -- ``Index`` now supports ``.str.extractall()`` which returns a ``DataFrame``, see :ref:`documentation here ` (:issue:`10008`, :issue:`13156`) +- ``Index`` now supports ``.str.extractall()`` which returns a ``DataFrame``, the see :ref:`docs here ` (:issue:`10008`, :issue:`13156`) - ``.to_hdf/read_hdf()`` now accept path objects (e.g. ``pathlib.Path``, ``py.path.local``) for the file path (:issue:`11773`) .. ipython:: python @@ -202,7 +202,7 @@ Other enhancements idx = pd.Index(["a1a2", "b1", "c1"]) idx.str.extractall("[ab](?P\d)") -- ``Timestamp`` s can now accept positional and keyword parameters like :func:`datetime.datetime` (:issue:`10758`, :issue:`11630`) +- ``Timestamp`` can now accept positional and keyword parameters similar to :func:`datetime.datetime` (:issue:`10758`, :issue:`11630`) .. ipython:: python @@ -227,8 +227,7 @@ Other enhancements - Consistent with the Python API, ``pd.read_csv()`` will now interpret ``+inf`` as positive infinity (:issue:`13274`) - The ``DataFrame`` constructor will now respect key ordering if a list of ``OrderedDict`` objects are passed in (:issue:`13304`) - ``pd.read_html()`` has gained support for the ``decimal`` option (:issue:`12907`) -- A ``union_categorical`` function has been added for combining categoricals, see :ref:`Unioning Categoricals` (:issue:`13361`) -- ``eval``'s upcasting rules for ``float32`` types have been updated to be more consistent with NumPy's rules. New behavior will not upcast to ``float64`` if you multiply a pandas ``float32`` object by a scalar float64. (:issue:`12388`) +- A top-level function :func:`union_categorical` has been added for combining categoricals, see :ref:`Unioning Categoricals` (:issue:`13361`) - ``Series`` has gained the properties ``.is_monotonic``, ``.is_monotonic_increasing``, ``.is_monotonic_decreasing``, similar to ``Index`` (:issue:`13336`) .. _whatsnew_0190.api: @@ -238,9 +237,16 @@ API changes - Non-convertible dates in an excel date column will be returned without conversion and the column will be ``object`` dtype, rather than raising an exception (:issue:`10001`) +- ``eval``'s upcasting rules for ``float32`` types have been updated to be more consistent with NumPy's rules. New behavior will not upcast to ``float64`` if you multiply a pandas ``float32`` object by a scalar float64. (:issue:`12388`) - An ``UnsupportedFunctionCall`` error is now raised if NumPy ufuncs like ``np.mean`` are called on groupby or resample objects (:issue:`12811`) - Calls to ``.sample()`` will respect the random seed set via ``numpy.random.seed(n)`` (:issue:`13161`) - ``Styler.apply`` is now more strict about the outputs your function must return. For ``axis=0`` or ``axis=1``, the output shape must be identical. For ``axis=None``, the output must be a DataFrame with identical columns and index labels. (:issue:`13222`) +- ``Float64Index.astype(int)`` will now raise ``ValueError`` if ``Float64Index`` contains ``NaN`` values (:issue:`13149`) +- ``TimedeltaIndex.astype(int)`` and ``DatetimeIndex.astype(int)`` will now return ``Int64Index`` instead of ``np.array`` (:issue:`13209`) +- ``.filter()`` enforces mutual exclusion of the keyword arguments. (:issue:`12399`) +- ``PeridIndex`` can now accept ``list`` and ``array`` which contains ``pd.NaT`` (:issue:`13430`) +- ``__setitem__`` will no longer apply a callable rhs as a function instead of storing it. Call ``where`` directly to get the previous behavior. (:issue:`13299`) + .. _whatsnew_0190.api.tolist: @@ -361,7 +367,7 @@ We are able to preserve the join keys pd.merge(df1, df2, how='outer').dtypes Of course if you have missing values that are introduced, then the -resulting dtype will be upcast (unchanged from previous). +resulting dtype will be upcast, which is unchanged from previous. .. ipython:: python @@ -419,17 +425,6 @@ Furthermore: - Passing duplicated ``percentiles`` will now raise a ``ValueError``. - Bug in ``.describe()`` on a DataFrame with a mixed-dtype column index, which would previously raise a ``TypeError`` (:issue:`13288`) -.. _whatsnew_0190.api.other: - -Other API changes -^^^^^^^^^^^^^^^^^ - -- ``Float64Index.astype(int)`` will now raise ``ValueError`` if ``Float64Index`` contains ``NaN`` values (:issue:`13149`) -- ``TimedeltaIndex.astype(int)`` and ``DatetimeIndex.astype(int)`` will now return ``Int64Index`` instead of ``np.array`` (:issue:`13209`) -- ``.filter()`` enforces mutual exclusion of the keyword arguments. (:issue:`12399`) -- ``PeridIndex`` can now accept ``list`` and ``array`` which contains ``pd.NaT`` (:issue:`13430`) -- ``__setitem__`` will no longer apply a callable rhs as a function instead of storing it. Call ``where`` directly to get the previous behavior. (:issue:`13299`) - .. _whatsnew_0190.deprecations: Deprecations @@ -439,6 +434,7 @@ Deprecations - ``buffer_lines`` has been deprecated in ``pd.read_csv()`` and will be removed in a future version (:issue:`13360`) - ``as_recarray`` has been deprecated in ``pd.read_csv()`` and will be removed in a future version (:issue:`13373`) - top-level ``pd.ordered_merge()`` has been renamed to ``pd.merge_ordered()`` and the original name will be removed in a future version (:issue:`13358`) +- ``Timestamp.offset`` property (and named arg in the constructor), has been deprecated in favor of ``freq`` (:issue:`12160`) .. _whatsnew_0190.performance: @@ -503,7 +499,7 @@ Bug Fixes - Bug in ``pd.read_csv()`` in which the ``nrows`` argument was not properly validated for both engines (:issue:`10476`) - Bug in ``pd.read_csv()`` with ``engine='python'`` in which infinities of mixed-case forms were not being interpreted properly (:issue:`13274`) - Bug in ``pd.read_csv()`` with ``engine='python'`` in which trailing ``NaN`` values were not being parsed (:issue:`13320`) -- Bug in ``pd.read_csv()`` with ``engine='python'`` when reading from a tempfile.TemporaryFile on Windows with Python 3 (:issue:`13398`) +- Bug in ``pd.read_csv()`` with ``engine='python'`` when reading from a ``tempfile.TemporaryFile`` on Windows with Python 3 (:issue:`13398`) - Bug in ``pd.read_csv()`` that prevents ``usecols`` kwarg from accepting single-byte unicode strings (:issue:`13219`) - Bug in ``pd.read_csv()`` that prevents ``usecols`` from being an empty set (:issue:`13402`) - Bug in ``pd.read_csv()`` with ``engine=='c'`` in which null ``quotechar`` was not accepted even though ``quoting`` was specified as ``None`` (:issue:`13411`) @@ -516,7 +512,7 @@ Bug Fixes - Bug in ``pd.to_datetime()`` when passing invalid datatypes (e.g. bool); will now respect the ``errors`` keyword (:issue:`13176`) -- Bug in ``pd.to_datetime()`` which overflowed on ``int8``, `int16`` dtypes (:issue:`13451`) +- Bug in ``pd.to_datetime()`` which overflowed on ``int8``, and ``int16`` dtypes (:issue:`13451`) - Bug in extension dtype creation where the created types were not is/identical (:issue:`13285`) - Bug in ``NaT`` - ``Period`` raises ``AttributeError`` (:issue:`13071`) diff --git a/pandas/io/packers.py b/pandas/io/packers.py index 23aa133125213..ff06a5f212f8b 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -481,12 +481,12 @@ def encode(obj): tz = obj.tzinfo if tz is not None: tz = u(tz.zone) - offset = obj.offset - if offset is not None: - offset = u(offset.freqstr) + freq = obj.freq + if freq is not None: + freq = u(freq.freqstr) return {u'typ': u'timestamp', u'value': obj.value, - u'offset': offset, + u'freq': freq, u'tz': tz} if isinstance(obj, NaTType): return {u'typ': u'nat'} @@ -556,7 +556,8 @@ def decode(obj): if typ is None: return obj elif typ == u'timestamp': - return Timestamp(obj[u'value'], tz=obj[u'tz'], offset=obj[u'offset']) + freq = obj[u'freq'] if 'freq' in obj else obj[u'offset'] + return Timestamp(obj[u'value'], tz=obj[u'tz'], freq=freq) elif typ == u'nat': return NaT elif typ == u'period': diff --git a/pandas/io/tests/data/legacy_msgpack/0.18.1/0.18.1_x86_64_darwin_2.7.12.msgpack b/pandas/io/tests/data/legacy_msgpack/0.18.1/0.18.1_x86_64_darwin_2.7.12.msgpack new file mode 100644 index 0000000000000..978c2c5045314 Binary files /dev/null and b/pandas/io/tests/data/legacy_msgpack/0.18.1/0.18.1_x86_64_darwin_2.7.12.msgpack differ diff --git a/pandas/io/tests/data/legacy_msgpack/0.18.1/0.18.1_x86_64_darwin_3.5.2.msgpack b/pandas/io/tests/data/legacy_msgpack/0.18.1/0.18.1_x86_64_darwin_3.5.2.msgpack new file mode 100644 index 0000000000000..ea8efdc86dd2d Binary files /dev/null and b/pandas/io/tests/data/legacy_msgpack/0.18.1/0.18.1_x86_64_darwin_3.5.2.msgpack differ diff --git a/pandas/io/tests/data/legacy_pickle/0.18.1/0.18.1_x86_64_darwin_2.7.12.pickle b/pandas/io/tests/data/legacy_pickle/0.18.1/0.18.1_x86_64_darwin_2.7.12.pickle index 5ee1f88c93a34..bb237f53476b5 100644 Binary files a/pandas/io/tests/data/legacy_pickle/0.18.1/0.18.1_x86_64_darwin_2.7.12.pickle and b/pandas/io/tests/data/legacy_pickle/0.18.1/0.18.1_x86_64_darwin_2.7.12.pickle differ diff --git a/pandas/io/tests/data/legacy_pickle/0.18.1/0.18.1_x86_64_darwin_3.5.2.pickle b/pandas/io/tests/data/legacy_pickle/0.18.1/0.18.1_x86_64_darwin_3.5.2.pickle new file mode 100644 index 0000000000000..db1d17a8b67c3 Binary files /dev/null and b/pandas/io/tests/data/legacy_pickle/0.18.1/0.18.1_x86_64_darwin_3.5.2.pickle differ diff --git a/pandas/io/tests/generate_legacy_storage_files.py b/pandas/io/tests/generate_legacy_storage_files.py index 25fd86d899c08..d0365cb2c30b3 100644 --- a/pandas/io/tests/generate_legacy_storage_files.py +++ b/pandas/io/tests/generate_legacy_storage_files.py @@ -5,7 +5,7 @@ SparseSeries, SparseDataFrame, Index, MultiIndex, bdate_range, to_msgpack, date_range, period_range, - Timestamp, Categorical, Period) + Timestamp, NaT, Categorical, Period) from pandas.compat import u import os import sys @@ -140,6 +140,13 @@ def create_data(): int16=Categorical(np.arange(1000)), int32=Categorical(np.arange(10000))) + timestamp = dict(normal=Timestamp('2011-01-01'), + nat=NaT, + tz=Timestamp('2011-01-01', tz='US/Eastern'), + freq=Timestamp('2011-01-01', freq='D'), + both=Timestamp('2011-01-01', tz='Asia/Tokyo', + freq='M')) + return dict(series=series, frame=frame, panel=panel, @@ -149,7 +156,8 @@ def create_data(): sp_series=dict(float=_create_sp_series(), ts=_create_sp_tsseries()), sp_frame=dict(float=_create_sp_frame()), - cat=cat) + cat=cat, + timestamp=timestamp) def create_pickle_data(): diff --git a/pandas/io/tests/test_packers.py b/pandas/io/tests/test_packers.py index ad7d6c3c9f94f..0a491a69af8e2 100644 --- a/pandas/io/tests/test_packers.py +++ b/pandas/io/tests/test_packers.py @@ -8,7 +8,7 @@ from distutils.version import LooseVersion from pandas import compat -from pandas.compat import u +from pandas.compat import u, PY3 from pandas import (Series, DataFrame, Panel, MultiIndex, bdate_range, date_range, period_range, Index, Categorical) from pandas.core.common import PerformanceWarning @@ -58,6 +58,19 @@ def check_arbitrary(a, b): assert_series_equal(a, b) elif isinstance(a, Index): assert_index_equal(a, b) + elif isinstance(a, Categorical): + # Temp, + # Categorical.categories is changed from str to bytes in PY3 + # maybe the same as GH 13591 + if PY3 and b.categories.inferred_type == 'string': + pass + else: + tm.assert_categorical_equal(a, b) + elif a is NaT: + assert b is NaT + elif isinstance(a, Timestamp): + assert a == b + assert a.freq == b.freq else: assert(a == b) @@ -815,8 +828,8 @@ def check_min_structure(self, data): for typ, v in self.minimum_structure.items(): assert typ in data, '"{0}" not found in unpacked data'.format(typ) for kind in v: - assert kind in data[ - typ], '"{0}" not found in data["{1}"]'.format(kind, typ) + msg = '"{0}" not found in data["{1}"]'.format(kind, typ) + assert kind in data[typ], msg def compare(self, vf, version): # GH12277 encoding default used to be latin-1, now utf-8 @@ -839,8 +852,8 @@ def compare(self, vf, version): # use a specific comparator # if available - comparator = getattr( - self, "compare_{typ}_{dt}".format(typ=typ, dt=dt), None) + comp_method = "compare_{typ}_{dt}".format(typ=typ, dt=dt) + comparator = getattr(self, comp_method, None) if comparator is not None: comparator(result, expected, typ, version) else: @@ -872,9 +885,8 @@ def read_msgpacks(self, version): n = 0 for f in os.listdir(pth): # GH12142 0.17 files packed in P2 can't be read in P3 - if (compat.PY3 and - version.startswith('0.17.') and - f.split('.')[-4][-1] == '2'): + if (compat.PY3 and version.startswith('0.17.') and + f.split('.')[-4][-1] == '2'): continue vf = os.path.join(pth, f) try: diff --git a/pandas/io/tests/test_pickle.py b/pandas/io/tests/test_pickle.py index e337ad4dcfed2..55c14fee9e3ed 100644 --- a/pandas/io/tests/test_pickle.py +++ b/pandas/io/tests/test_pickle.py @@ -46,6 +46,12 @@ def compare_element(self, result, expected, typ, version=None): if typ.startswith('sp_'): comparator = getattr(tm, "assert_%s_equal" % typ) comparator(result, expected, exact_indices=False) + elif typ == 'timestamp': + if expected is pd.NaT: + assert result is pd.NaT + else: + tm.assert_equal(result, expected) + tm.assert_equal(result.freq, expected.freq) else: comparator = getattr(tm, "assert_%s_equal" % typ, tm.assert_almost_equal) diff --git a/pandas/lib.pxd b/pandas/lib.pxd index 36c91faa00036..554b0248e97ea 100644 --- a/pandas/lib.pxd +++ b/pandas/lib.pxd @@ -1,3 +1,4 @@ # prototypes for sharing cdef bint is_null_datetimelike(v) +cpdef bint is_period(val) diff --git a/pandas/src/inference.pyx b/pandas/src/inference.pyx index 262e036ff44f1..234ac7ea2c60c 100644 --- a/pandas/src/inference.pyx +++ b/pandas/src/inference.pyx @@ -33,7 +33,7 @@ def is_bool(object obj): def is_complex(object obj): return util.is_complex_object(obj) -def is_period(object val): +cpdef bint is_period(object val): """ Return a boolean if this is a Period object """ return util.is_period_object(val) @@ -538,9 +538,6 @@ def is_time_array(ndarray[object] values): return False return True -def is_period(object o): - from pandas import Period - return isinstance(o,Period) def is_period_array(ndarray[object] values): cdef Py_ssize_t i, n = len(values) diff --git a/pandas/src/period.pyx b/pandas/src/period.pyx index aca0d0dbc107b..af2e295ae0cfc 100644 --- a/pandas/src/period.pyx +++ b/pandas/src/period.pyx @@ -24,7 +24,7 @@ cimport cython from datetime cimport * cimport util cimport lib -from lib cimport is_null_datetimelike +from lib cimport is_null_datetimelike, is_period import lib from pandas import tslib from tslib import Timedelta, Timestamp, iNaT, NaT @@ -484,8 +484,11 @@ def extract_freq(ndarray[object] values): for i in range(n): p = values[i] + try: - return p.freq + # now Timestamp / NaT has freq attr + if is_period(p): + return p.freq except AttributeError: pass diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index b86b248ead290..a6246790f83cb 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -965,7 +965,7 @@ def test_indexing_with_datetime_tz(self): # indexing - fast_xs df = DataFrame({'a': date_range('2014-01-01', periods=10, tz='UTC')}) result = df.iloc[5] - expected = Timestamp('2014-01-06 00:00:00+0000', tz='UTC', offset='D') + expected = Timestamp('2014-01-06 00:00:00+0000', tz='UTC', freq='D') self.assertEqual(result, expected) result = df.loc[5] diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index a80a3af56b18f..c632704b7c5eb 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -426,10 +426,10 @@ def test_constructor_with_datetime_tz(self): # indexing result = s.iloc[0] self.assertEqual(result, Timestamp('2013-01-01 00:00:00-0500', - tz='US/Eastern', offset='D')) + tz='US/Eastern', freq='D')) result = s[0] self.assertEqual(result, Timestamp('2013-01-01 00:00:00-0500', - tz='US/Eastern', offset='D')) + tz='US/Eastern', freq='D')) result = s[Series([True, True, False], index=s.index)] assert_series_equal(result, s[0:2]) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index c4ccef13f2844..1b1db90ea713d 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -2365,7 +2365,7 @@ def test_reset_index_datetime(self): 'a': np.arange(6, dtype='int64')}, columns=['level_0', 'level_1', 'a']) expected['level_1'] = expected['level_1'].apply( - lambda d: pd.Timestamp(d, offset='D', tz=tz)) + lambda d: pd.Timestamp(d, freq='D', tz=tz)) assert_frame_equal(df.reset_index(), expected) def test_reset_index_period(self): diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 83cb768b37aaa..9b36bc5907066 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -558,7 +558,7 @@ def _generate(cls, start, end, periods, name, offset, @property def _box_func(self): - return lambda x: Timestamp(x, offset=self.offset, tz=self.tz) + return lambda x: Timestamp(x, freq=self.offset, tz=self.tz) def _convert_for_op(self, value): """ Convert value to be insertable to ndarray """ @@ -1199,8 +1199,9 @@ def __iter__(self): for i in range(chunks): start_i = i * chunksize end_i = min((i + 1) * chunksize, l) - converted = tslib.ints_to_pydatetime( - data[start_i:end_i], tz=self.tz, offset=self.offset, box=True) + converted = tslib.ints_to_pydatetime(data[start_i:end_i], + tz=self.tz, freq=self.freq, + box=True) for v in converted: yield v diff --git a/pandas/tseries/tests/test_base.py b/pandas/tseries/tests/test_base.py index 360944e355b4d..17b6dd12a5c02 100644 --- a/pandas/tseries/tests/test_base.py +++ b/pandas/tseries/tests/test_base.py @@ -124,10 +124,11 @@ def test_minmax(self): def test_numpy_minmax(self): dr = pd.date_range(start='2016-01-15', end='2016-01-20') - self.assertEqual(np.min(dr), Timestamp( - '2016-01-15 00:00:00', offset='D')) - self.assertEqual(np.max(dr), Timestamp( - '2016-01-20 00:00:00', offset='D')) + + self.assertEqual(np.min(dr), + Timestamp('2016-01-15 00:00:00', freq='D')) + self.assertEqual(np.max(dr), + Timestamp('2016-01-20 00:00:00', freq='D')) errmsg = "the 'out' parameter is not supported" tm.assertRaisesRegexp(ValueError, errmsg, np.min, dr, out=0) @@ -148,11 +149,11 @@ def test_round(self): elt = rng[1] expected_rng = DatetimeIndex([ - Timestamp('2016-01-01 00:00:00', tz=tz, offset='30T'), - Timestamp('2016-01-01 00:00:00', tz=tz, offset='30T'), - Timestamp('2016-01-01 01:00:00', tz=tz, offset='30T'), - Timestamp('2016-01-01 02:00:00', tz=tz, offset='30T'), - Timestamp('2016-01-01 02:00:00', tz=tz, offset='30T'), + Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 01:00:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 02:00:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 02:00:00', tz=tz, freq='30T'), ]) expected_elt = expected_rng[1] @@ -175,10 +176,10 @@ def test_repeat(self): freq='30Min', tz=tz) expected_rng = DatetimeIndex([ - Timestamp('2016-01-01 00:00:00', tz=tz, offset='30T'), - Timestamp('2016-01-01 00:00:00', tz=tz, offset='30T'), - Timestamp('2016-01-01 00:30:00', tz=tz, offset='30T'), - Timestamp('2016-01-01 00:30:00', tz=tz, offset='30T'), + Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 00:30:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 00:30:00', tz=tz, freq='30T'), ]) tm.assert_index_equal(rng.repeat(reps), expected_rng) @@ -192,10 +193,10 @@ def test_numpy_repeat(self): freq='30Min', tz=tz) expected_rng = DatetimeIndex([ - Timestamp('2016-01-01 00:00:00', tz=tz, offset='30T'), - Timestamp('2016-01-01 00:00:00', tz=tz, offset='30T'), - Timestamp('2016-01-01 00:30:00', tz=tz, offset='30T'), - Timestamp('2016-01-01 00:30:00', tz=tz, offset='30T'), + Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 00:30:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 00:30:00', tz=tz, freq='30T'), ]) tm.assert_index_equal(np.repeat(rng, reps), expected_rng) diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index b0caa1f6a77cb..e594d31e57296 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -3884,36 +3884,36 @@ def test_datetimeindex_accessors(self): self.assertEqual(dti.is_month_start[0], 1) tests = [ - (Timestamp('2013-06-01', offset='M').is_month_start, 1), - (Timestamp('2013-06-01', offset='BM').is_month_start, 0), - (Timestamp('2013-06-03', offset='M').is_month_start, 0), - (Timestamp('2013-06-03', offset='BM').is_month_start, 1), - (Timestamp('2013-02-28', offset='Q-FEB').is_month_end, 1), - (Timestamp('2013-02-28', offset='Q-FEB').is_quarter_end, 1), - (Timestamp('2013-02-28', offset='Q-FEB').is_year_end, 1), - (Timestamp('2013-03-01', offset='Q-FEB').is_month_start, 1), - (Timestamp('2013-03-01', offset='Q-FEB').is_quarter_start, 1), - (Timestamp('2013-03-01', offset='Q-FEB').is_year_start, 1), - (Timestamp('2013-03-31', offset='QS-FEB').is_month_end, 1), - (Timestamp('2013-03-31', offset='QS-FEB').is_quarter_end, 0), - (Timestamp('2013-03-31', offset='QS-FEB').is_year_end, 0), - (Timestamp('2013-02-01', offset='QS-FEB').is_month_start, 1), - (Timestamp('2013-02-01', offset='QS-FEB').is_quarter_start, 1), - (Timestamp('2013-02-01', offset='QS-FEB').is_year_start, 1), - (Timestamp('2013-06-30', offset='BQ').is_month_end, 0), - (Timestamp('2013-06-30', offset='BQ').is_quarter_end, 0), - (Timestamp('2013-06-30', offset='BQ').is_year_end, 0), - (Timestamp('2013-06-28', offset='BQ').is_month_end, 1), - (Timestamp('2013-06-28', offset='BQ').is_quarter_end, 1), - (Timestamp('2013-06-28', offset='BQ').is_year_end, 0), - (Timestamp('2013-06-30', offset='BQS-APR').is_month_end, 0), - (Timestamp('2013-06-30', offset='BQS-APR').is_quarter_end, 0), - (Timestamp('2013-06-30', offset='BQS-APR').is_year_end, 0), - (Timestamp('2013-06-28', offset='BQS-APR').is_month_end, 1), - (Timestamp('2013-06-28', offset='BQS-APR').is_quarter_end, 1), - (Timestamp('2013-03-29', offset='BQS-APR').is_year_end, 1), - (Timestamp('2013-11-01', offset='AS-NOV').is_year_start, 1), - (Timestamp('2013-10-31', offset='AS-NOV').is_year_end, 1), + (Timestamp('2013-06-01', freq='M').is_month_start, 1), + (Timestamp('2013-06-01', freq='BM').is_month_start, 0), + (Timestamp('2013-06-03', freq='M').is_month_start, 0), + (Timestamp('2013-06-03', freq='BM').is_month_start, 1), + (Timestamp('2013-02-28', freq='Q-FEB').is_month_end, 1), + (Timestamp('2013-02-28', freq='Q-FEB').is_quarter_end, 1), + (Timestamp('2013-02-28', freq='Q-FEB').is_year_end, 1), + (Timestamp('2013-03-01', freq='Q-FEB').is_month_start, 1), + (Timestamp('2013-03-01', freq='Q-FEB').is_quarter_start, 1), + (Timestamp('2013-03-01', freq='Q-FEB').is_year_start, 1), + (Timestamp('2013-03-31', freq='QS-FEB').is_month_end, 1), + (Timestamp('2013-03-31', freq='QS-FEB').is_quarter_end, 0), + (Timestamp('2013-03-31', freq='QS-FEB').is_year_end, 0), + (Timestamp('2013-02-01', freq='QS-FEB').is_month_start, 1), + (Timestamp('2013-02-01', freq='QS-FEB').is_quarter_start, 1), + (Timestamp('2013-02-01', freq='QS-FEB').is_year_start, 1), + (Timestamp('2013-06-30', freq='BQ').is_month_end, 0), + (Timestamp('2013-06-30', freq='BQ').is_quarter_end, 0), + (Timestamp('2013-06-30', freq='BQ').is_year_end, 0), + (Timestamp('2013-06-28', freq='BQ').is_month_end, 1), + (Timestamp('2013-06-28', freq='BQ').is_quarter_end, 1), + (Timestamp('2013-06-28', freq='BQ').is_year_end, 0), + (Timestamp('2013-06-30', freq='BQS-APR').is_month_end, 0), + (Timestamp('2013-06-30', freq='BQS-APR').is_quarter_end, 0), + (Timestamp('2013-06-30', freq='BQS-APR').is_year_end, 0), + (Timestamp('2013-06-28', freq='BQS-APR').is_month_end, 1), + (Timestamp('2013-06-28', freq='BQS-APR').is_quarter_end, 1), + (Timestamp('2013-03-29', freq='BQS-APR').is_year_end, 1), + (Timestamp('2013-11-01', freq='AS-NOV').is_year_start, 1), + (Timestamp('2013-10-31', freq='AS-NOV').is_year_end, 1), (Timestamp('2012-02-01').days_in_month, 29), (Timestamp('2013-02-01').days_in_month, 28)] diff --git a/pandas/tseries/tests/test_tslib.py b/pandas/tseries/tests/test_tslib.py index c6436163b9edb..ce88edcf4249b 100644 --- a/pandas/tseries/tests/test_tslib.py +++ b/pandas/tseries/tests/test_tslib.py @@ -255,6 +255,21 @@ def test_constructor_keyword(self): hour=1, minute=2, second=3, microsecond=999999)), repr(Timestamp('2015-11-12 01:02:03.999999'))) + def test_constructor_offset_depr(self): + # GH 12160 + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + ts = Timestamp('2011-01-01', offset='D') + self.assertEqual(ts.freq, 'D') + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + self.assertEqual(ts.offset, 'D') + + msg = "Can only specify freq or offset, not both" + with tm.assertRaisesRegexp(TypeError, msg): + Timestamp('2011-01-01', offset='D', freq='D') + def test_conversion(self): # GH 9255 ts = Timestamp('2000-01-01') @@ -312,13 +327,13 @@ def test_repr(self): self.assertNotIn(freq_repr, repr(date_tz)) self.assertEqual(date_tz, eval(repr(date_tz))) - date_freq = Timestamp(date, offset=freq) + date_freq = Timestamp(date, freq=freq) self.assertIn(date, repr(date_freq)) self.assertNotIn(tz_repr, repr(date_freq)) self.assertIn(freq_repr, repr(date_freq)) self.assertEqual(date_freq, eval(repr(date_freq))) - date_tz_freq = Timestamp(date, tz=tz, offset=freq) + date_tz_freq = Timestamp(date, tz=tz, freq=freq) self.assertIn(date, repr(date_tz_freq)) self.assertIn(tz_repr, repr(date_tz_freq)) self.assertIn(freq_repr, repr(date_tz_freq)) diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index 0db4282808a26..e45523be738df 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -63,6 +63,7 @@ from pandas.compat import parse_date, string_types, iteritems, StringIO, callabl import operator import collections +import warnings # initialize numpy import_array() @@ -86,23 +87,24 @@ try: except NameError: # py3 basestring = str -cdef inline object create_timestamp_from_ts(int64_t value, pandas_datetimestruct dts, object tz, object offset): +cdef inline object create_timestamp_from_ts(int64_t value, pandas_datetimestruct dts, + object tz, object freq): cdef _Timestamp ts_base ts_base = _Timestamp.__new__(Timestamp, dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz) - ts_base.value = value - ts_base.offset = offset + ts_base.freq = freq ts_base.nanosecond = dts.ps / 1000 return ts_base -cdef inline object create_datetime_from_ts(int64_t value, pandas_datetimestruct dts, object tz, object offset): +cdef inline object create_datetime_from_ts(int64_t value, pandas_datetimestruct dts, + object tz, object freq): return datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz) -def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, offset=None, box=False): +def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None, box=False): # convert an i8 repr to an ndarray of datetimes or Timestamp (if box == True) cdef: @@ -113,9 +115,9 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, offset=None, box=False): ndarray[object] result = np.empty(n, dtype=object) object (*func_create)(int64_t, pandas_datetimestruct, object, object) - if box and util.is_string_object(offset): + if box and util.is_string_object(freq): from pandas.tseries.frequencies import to_offset - offset = to_offset(offset) + freq = to_offset(freq) if box: func_create = create_timestamp_from_ts @@ -130,7 +132,7 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, offset=None, box=False): result[i] = NaT else: pandas_datetime_to_datetimestruct(value, PANDAS_FR_ns, &dts) - result[i] = func_create(value, dts, tz, offset) + result[i] = func_create(value, dts, tz, freq) elif _is_tzlocal(tz) or _is_fixed_offset(tz): for i in range(n): value = arr[i] @@ -138,7 +140,7 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, offset=None, box=False): result[i] = NaT else: pandas_datetime_to_datetimestruct(value, PANDAS_FR_ns, &dts) - dt = create_datetime_from_ts(value, dts, tz, offset) + dt = create_datetime_from_ts(value, dts, tz, freq) dt = dt + tz.utcoffset(dt) if box: dt = Timestamp(dt) @@ -163,7 +165,7 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, offset=None, box=False): new_tz = tz pandas_datetime_to_datetimestruct(value + deltas[pos], PANDAS_FR_ns, &dts) - result[i] = func_create(value, dts, new_tz, offset) + result[i] = func_create(value, dts, new_tz, freq) else: for i in range(n): @@ -172,7 +174,7 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, offset=None, box=False): result[i] = NaT else: pandas_datetime_to_datetimestruct(value, PANDAS_FR_ns, &dts) - result[i] = func_create(value, dts, None, offset) + result[i] = func_create(value, dts, None, freq) return result @@ -259,10 +261,10 @@ class Timestamp(_Timestamp): """ @classmethod - def fromordinal(cls, ordinal, offset=None, tz=None): + def fromordinal(cls, ordinal, freq=None, tz=None, offset=None): """ passed an ordinal, translate and convert to a ts note: by definition there cannot be any tz info on the ordinal itself """ - return cls(datetime.fromordinal(ordinal),offset=offset,tz=tz) + return cls(datetime.fromordinal(ordinal), freq=freq, tz=tz, offset=offset) @classmethod def now(cls, tz=None): @@ -309,11 +311,12 @@ class Timestamp(_Timestamp): def combine(cls, date, time): return cls(datetime.combine(date, time)) - def __new__(cls, - object ts_input=_no_input, object offset=None, tz=None, unit=None, - year=None, month=None, day=None, - hour=None, minute=None, second=None, microsecond=None, - tzinfo=None): + def __new__(cls, object ts_input=_no_input, + object freq=None, tz=None, unit=None, + year=None, month=None, day=None, + hour=None, minute=None, second=None, microsecond=None, + tzinfo=None, + object offset=None): # The parameter list folds together legacy parameter names (the first # four) and positional and keyword parameter names from pydatetime. # @@ -338,15 +341,24 @@ class Timestamp(_Timestamp): cdef _TSObject ts cdef _Timestamp ts_base + if offset is not None: + # deprecate offset kwd in 0.19.0, GH13593 + if freq is not None: + msg = "Can only specify freq or offset, not both" + raise TypeError(msg) + warnings.warn("offset is deprecated. Use freq instead", + FutureWarning) + freq = offset + if ts_input is _no_input: # User passed keyword arguments. return Timestamp(datetime(year, month, day, hour or 0, minute or 0, second or 0, microsecond or 0, tzinfo), tz=tzinfo) - elif is_integer_object(offset): + elif is_integer_object(freq): # User passed positional arguments: # Timestamp(year, month, day[, hour[, minute[, second[, microsecond[, tzinfo]]]]]) - return Timestamp(datetime(ts_input, offset, tz, unit or 0, + return Timestamp(datetime(ts_input, freq, tz, unit or 0, year or 0, month or 0, day or 0, hour), tz=hour) ts = convert_to_tsobject(ts_input, tz, unit, 0, 0) @@ -354,9 +366,9 @@ class Timestamp(_Timestamp): if ts.value == NPY_NAT: return NaT - if util.is_string_object(offset): + if util.is_string_object(freq): from pandas.tseries.frequencies import to_offset - offset = to_offset(offset) + freq = to_offset(freq) # make datetime happy ts_base = _Timestamp.__new__(cls, ts.dts.year, ts.dts.month, @@ -365,7 +377,7 @@ class Timestamp(_Timestamp): # fill out rest of data ts_base.value = ts.value - ts_base.offset = offset + ts_base.freq = freq ts_base.nanosecond = ts.dts.ps / 1000 return ts_base @@ -433,16 +445,18 @@ class Timestamp(_Timestamp): return self.tzinfo @property - def freq(self): - return self.offset + def offset(self): + warnings.warn(".offset is deprecated. Use .freq instead", + FutureWarning) + return self.freq def __setstate__(self, state): self.value = state[0] - self.offset = state[1] + self.freq = state[1] self.tzinfo = state[2] def __reduce__(self): - object_state = self.value, self.offset, self.tzinfo + object_state = self.value, self.freq, self.tzinfo return (Timestamp, object_state) def to_period(self, freq=None): @@ -491,7 +505,7 @@ class Timestamp(_Timestamp): @property def freqstr(self): - return getattr(self.offset, 'freqstr', self.offset) + return getattr(self.freq, 'freqstr', self.freq) @property def is_month_start(self): @@ -602,7 +616,7 @@ class Timestamp(_Timestamp): def replace(self, **kwds): return Timestamp(datetime.replace(self, **kwds), - offset=self.offset) + freq=self.freq) def to_pydatetime(self, warn=True): """ @@ -911,16 +925,6 @@ cdef inline bint _is_multiple(int64_t us, int64_t mult): return us % mult == 0 -def apply_offset(ndarray[object] values, object offset): - cdef: - Py_ssize_t i, n = len(values) - ndarray[int64_t] new_values - object boxed - - result = np.empty(n, dtype='M8[ns]') - new_values = result.view('i8') - - cdef inline bint _cmp_scalar(int64_t lhs, int64_t rhs, int op) except -1: if op == Py_EQ: return lhs == rhs @@ -955,7 +959,7 @@ cdef str _NDIM_STRING = "ndim" cdef class _Timestamp(datetime): cdef readonly: int64_t value, nanosecond - object offset # frequency reference + object freq # frequency reference def __hash__(_Timestamp self): if self.nanosecond: @@ -1029,9 +1033,9 @@ cdef class _Timestamp(datetime): pass tz = ", tz='{0}'".format(zone) if zone is not None else "" - offset = ", offset='{0}'".format(self.offset.freqstr) if self.offset is not None else "" + freq = ", freq='{0}'".format(self.freq.freqstr) if self.freq is not None else "" - return "Timestamp('{stamp}'{tz}{offset})".format(stamp=stamp, tz=tz, offset=offset) + return "Timestamp('{stamp}'{tz}{freq})".format(stamp=stamp, tz=tz, freq=freq) cdef bint _compare_outside_nanorange(_Timestamp self, datetime other, int op) except -1: @@ -1083,17 +1087,17 @@ cdef class _Timestamp(datetime): if is_timedelta64_object(other): other_int = other.astype('timedelta64[ns]').view('i8') - return Timestamp(self.value + other_int, tz=self.tzinfo, offset=self.offset) + return Timestamp(self.value + other_int, tz=self.tzinfo, freq=self.freq) elif is_integer_object(other): - if self.offset is None: + if self.freq is None: raise ValueError("Cannot add integral value to Timestamp " - "without offset.") - return Timestamp((self.offset * other).apply(self), offset=self.offset) + "without freq.") + return Timestamp((self.freq * other).apply(self), freq=self.freq) elif isinstance(other, timedelta) or hasattr(other, 'delta'): nanos = _delta_to_nanoseconds(other) - result = Timestamp(self.value + nanos, tz=self.tzinfo, offset=self.offset) + result = Timestamp(self.value + nanos, tz=self.tzinfo, freq=self.freq) if getattr(other, 'normalize', False): result = Timestamp(normalize_date(result)) return result