diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 3cb902900a49e0..3dff5eed8a81a3 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -281,6 +281,43 @@ that the dates have been converted to UTC .. ipython:: python pd.to_datetime(["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30"], utc=True) +.. _whatsnew_0240.api_breaking.period_end_time: + +Time values in ``dt.end_time`` and ``to_timestamp(how='end')`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The time values in :class:`Period` and :class:`PeriodIndex` objects are now set +to '23:59:59.999999999' when calling :attr:`Series.dt.end_time`, :attr:`Period.end_time`, +:attr:`PeriodIndex.end_time`, :func:`Period.to_timestamp()` with ``how='end'``, +or :func:`PeriodIndex.to_timestamp()` with ``how='end'`` (:issue:`17157`) + +Previous Behavior: + +.. code-block:: ipython + + In [2]: p = pd.Period('2017-01-01', 'D') + In [3]: pi = pd.PeriodIndex([p]) + + In [4]: pd.Series(pi).dt.end_time[0] + Out[4]: Timestamp(2017-01-01 00:00:00) + + In [5]: p.end_time + Out[5]: Timestamp(2017-01-01 23:59:59.999999999) + +Current Behavior: + +Calling :attr:`Series.dt.end_time` will now result in a time of '23:59:59.999999999' as +is the case with :attr:`Period.end_time`, for example + +.. ipython:: python + + p = pd.Period('2017-01-01', 'D') + pi = pd.PeriodIndex([p]) + + pd.Series(pi).dt.end_time[0] + + p.end_time + .. _whatsnew_0240.api.datetimelike.normalize: Tick DateOffset Normalize Restrictions @@ -615,6 +652,7 @@ Reshaping - Bug in :meth:`Series.combine_first` with ``datetime64[ns, tz]`` dtype which would return tz-naive result (:issue:`21469`) - Bug in :meth:`Series.where` and :meth:`DataFrame.where` with ``datetime64[ns, tz]`` dtype (:issue:`21546`) - Bug in :meth:`Series.mask` and :meth:`DataFrame.mask` with ``list`` conditionals (:issue:`21891`) +- Bug in :meth:`DataFrame.replace` raises RecursionError when converting OutOfBounds ``datetime64[ns, tz]`` (:issue:`20380`) - :func:`pandas.core.groupby.GroupBy.rank` now raises a ``ValueError`` when an invalid value is passed for argument ``na_option`` (:issue:`22124`) - diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index ecfc7355dddfcd..124792638e3df2 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -129,7 +129,7 @@ def is_lexsorted(list list_of_arrays): for i in range(nlevels): arr = list_of_arrays[i] assert arr.dtype.name == 'int64' - vecs[i] = arr.data + vecs[i] = cnp.PyArray_DATA(arr) # Assume uniqueness?? with nogil: diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 5e4a431caca003..5681d01c6bb25c 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -7,10 +7,12 @@ from cython cimport Py_ssize_t from libc.stdlib cimport malloc, free import numpy as np +cimport numpy as cnp from numpy cimport (ndarray, double_t, int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, uint32_t, uint64_t, float32_t, float64_t) +cnp.import_array() from util cimport numeric, get_nat @@ -118,7 +120,7 @@ def group_median_float64(ndarray[float64_t, ndim=2] out, counts[:] = _counts[1:] data = np.empty((K, N), dtype=np.float64) - ptr = data.data + ptr = cnp.PyArray_DATA(data) take_2d_axis1_float64_float64(values.T, indexer, out=data) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 31ef4b7a3e807a..5918560cf14365 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -37,7 +37,7 @@ cdef inline bint is_definitely_invalid_key(object val): return True # we have a _data, means we are a NDFrame - return (PySlice_Check(val) or cnp.PyArray_Check(val) + return (PySlice_Check(val) or util.is_array(val) or PyList_Check(val) or hasattr(val, '_data')) @@ -104,7 +104,7 @@ cdef class IndexEngine: void* data_ptr loc = self.get_loc(key) - if PySlice_Check(loc) or cnp.PyArray_Check(loc): + if PySlice_Check(loc) or util.is_array(loc): return arr[loc] else: return get_value_at(arr, loc, tz=tz) @@ -120,7 +120,7 @@ cdef class IndexEngine: loc = self.get_loc(key) value = convert_scalar(arr, value) - if PySlice_Check(loc) or cnp.PyArray_Check(loc): + if PySlice_Check(loc) or util.is_array(loc): arr[loc] = value else: util.set_value_at(arr, loc, value) diff --git a/pandas/_libs/src/numpy_helper.h b/pandas/_libs/src/numpy_helper.h index 98eca92fd1ab2c..753cba6ce62aa0 100644 --- a/pandas/_libs/src/numpy_helper.h +++ b/pandas/_libs/src/numpy_helper.h @@ -16,8 +16,6 @@ The full license is in the LICENSE file, distributed with this software. #include "numpy/arrayscalars.h" -PANDAS_INLINE npy_int64 get_nat(void) { return NPY_MIN_INT64; } - PANDAS_INLINE int assign_value_1d(PyArrayObject* ap, Py_ssize_t _i, PyObject* v) { npy_intp i = (npy_intp)_i; @@ -40,16 +38,10 @@ PANDAS_INLINE const char* get_c_string(PyObject* obj) { #endif } -PANDAS_INLINE PyObject* char_to_string(const char* data) { -#if PY_VERSION_HEX >= 0x03000000 - return PyUnicode_FromString(data); -#else - return PyString_FromString(data); -#endif -} - void set_array_not_contiguous(PyArrayObject* ao) { - ao->flags &= ~(NPY_ARRAY_C_CONTIGUOUS | NPY_ARRAY_F_CONTIGUOUS); + // Numpy>=1.8-compliant equivalent to: + // ao->flags &= ~(NPY_ARRAY_C_CONTIGUOUS | NPY_ARRAY_F_CONTIGUOUS); + PyArray_CLEARFLAGS(ao, (NPY_ARRAY_C_CONTIGUOUS | NPY_ARRAY_F_CONTIGUOUS)); } #endif // PANDAS__LIBS_SRC_NUMPY_HELPER_H_ diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 7621ac912d4d59..4335e7baeafe96 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -888,7 +888,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, trans, deltas, typ = get_dst_info(tz) - tdata = trans.data + tdata = cnp.PyArray_DATA(trans) ntrans = len(trans) result_a = np.empty(n, dtype=np.int64) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 65fb0f331d039f..96d7994bdc822d 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -34,6 +34,7 @@ cdef extern from "../src/datetime/np_datetime.h": cimport util from util cimport is_period_object, is_string_object, INT32_MIN +from pandas._libs.tslibs.timedeltas import Timedelta from timestamps import Timestamp from timezones cimport is_utc, is_tzlocal, get_dst_info from timedeltas cimport delta_to_nanoseconds @@ -1221,6 +1222,10 @@ cdef class _Period(object): freq = self._maybe_convert_freq(freq) how = _validate_end_alias(how) + end = how == 'E' + if end: + return (self + 1).to_timestamp(how='start') - Timedelta(1, 'ns') + if freq is None: base, mult = get_freq_code(self.freq) freq = get_to_timestamp_base(base) diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd index efdb1570ed8786..624ed7ced26543 100644 --- a/pandas/_libs/tslibs/util.pxd +++ b/pandas/_libs/tslibs/util.pxd @@ -1,10 +1,18 @@ -from numpy cimport ndarray -cimport numpy as cnp -cnp.import_array() -cimport cpython from cpython cimport PyTypeObject +cdef extern from *: + """ + PyObject* char_to_string(const char* data) { + #if PY_VERSION_HEX >= 0x03000000 + return PyUnicode_FromString(data); + #else + return PyString_FromString(data); + #endif + } + """ + object char_to_string(const char* data) + cdef extern from "Python.h": # Note: importing extern-style allows us to declare these as nogil @@ -19,6 +27,8 @@ cdef extern from "Python.h": cdef extern from "numpy/arrayobject.h": PyTypeObject PyFloatingArrType_Type + ctypedef signed long long int64_t + int _import_array() except -1 cdef extern from "numpy/ndarrayobject.h": PyTypeObject PyTimedeltaArrType_Type @@ -29,142 +39,177 @@ cdef extern from "numpy/ndarrayobject.h": bint PyArray_IsIntegerScalar(obj) nogil bint PyArray_Check(obj) nogil +cdef extern from "numpy/npy_common.h": + int64_t NPY_MIN_INT64 + + +cdef extern from "../src/headers/stdint.h": + enum: UINT8_MAX + enum: UINT16_MAX + enum: UINT32_MAX + enum: UINT64_MAX + enum: INT8_MIN + enum: INT8_MAX + enum: INT16_MIN + enum: INT16_MAX + enum: INT32_MAX + enum: INT32_MIN + enum: INT64_MAX + enum: INT64_MIN + + +cdef inline int64_t get_nat(): + return NPY_MIN_INT64 + + +cdef inline int import_array() except -1: + _import_array() + + # -------------------------------------------------------------------- # Type Checking cdef inline bint is_string_object(object obj) nogil: + """ + Cython equivalent of `isinstance(val, compat.string_types)` + + Parameters + ---------- + val : object + + Returns + ------- + is_string : bool + """ return PyString_Check(obj) or PyUnicode_Check(obj) cdef inline bint is_integer_object(object obj) nogil: + """ + Cython equivalent of + + `isinstance(val, (int, long, np.integer)) and not isinstance(val, bool)` + + Parameters + ---------- + val : object + + Returns + ------- + is_integer : bool + + Notes + ----- + This counts np.timedelta64 objects as integers. + """ return not PyBool_Check(obj) and PyArray_IsIntegerScalar(obj) cdef inline bint is_float_object(object obj) nogil: + """ + Cython equivalent of `isinstance(val, (float, np.complex_))` + + Parameters + ---------- + val : object + + Returns + ------- + is_float : bool + """ return (PyFloat_Check(obj) or (PyObject_TypeCheck(obj, &PyFloatingArrType_Type))) cdef inline bint is_complex_object(object obj) nogil: + """ + Cython equivalent of `isinstance(val, (complex, np.complex_))` + + Parameters + ---------- + val : object + + Returns + ------- + is_complex : bool + """ return (PyComplex_Check(obj) or PyObject_TypeCheck(obj, &PyComplexFloatingArrType_Type)) cdef inline bint is_bool_object(object obj) nogil: + """ + Cython equivalent of `isinstance(val, (bool, np.bool_))` + + Parameters + ---------- + val : object + + Returns + ------- + is_bool : bool + """ return (PyBool_Check(obj) or PyObject_TypeCheck(obj, &PyBoolArrType_Type)) cdef inline bint is_timedelta64_object(object obj) nogil: - return PyObject_TypeCheck(obj, &PyTimedeltaArrType_Type) - - -cdef inline bint is_datetime64_object(object obj) nogil: - return PyObject_TypeCheck(obj, &PyDatetimeArrType_Type) - -# -------------------------------------------------------------------- - -cdef extern from "../src/numpy_helper.h": - void set_array_not_contiguous(ndarray ao) - - int assign_value_1d(ndarray, Py_ssize_t, object) except -1 - cnp.int64_t get_nat() - object get_value_1d(ndarray, Py_ssize_t) - const char *get_c_string(object) except NULL - object char_to_string(char*) - -ctypedef fused numeric: - cnp.int8_t - cnp.int16_t - cnp.int32_t - cnp.int64_t - - cnp.uint8_t - cnp.uint16_t - cnp.uint32_t - cnp.uint64_t - - cnp.float32_t - cnp.float64_t - -cdef extern from "../src/headers/stdint.h": - enum: UINT8_MAX - enum: UINT16_MAX - enum: UINT32_MAX - enum: UINT64_MAX - enum: INT8_MIN - enum: INT8_MAX - enum: INT16_MIN - enum: INT16_MAX - enum: INT32_MAX - enum: INT32_MIN - enum: INT64_MAX - enum: INT64_MIN - - -cdef inline object get_value_at(ndarray arr, object loc): - cdef: - Py_ssize_t i, sz - int casted + """ + Cython equivalent of `isinstance(val, np.timedelta64)` - if is_float_object(loc): - casted = int(loc) - if casted == loc: - loc = casted - i = loc - sz = cnp.PyArray_SIZE(arr) + Parameters + ---------- + val : object - if i < 0 and sz > 0: - i += sz - elif i >= sz or sz == 0: - raise IndexError('index out of bounds') + Returns + ------- + is_timedelta64 : bool + """ + return PyObject_TypeCheck(obj, &PyTimedeltaArrType_Type) - return get_value_1d(arr, i) +cdef inline bint is_datetime64_object(object obj) nogil: + """ + Cython equivalent of `isinstance(val, np.datetime64)` -cdef inline set_value_at_unsafe(ndarray arr, object loc, object value): - """Sets a value into the array without checking the writeable flag. + Parameters + ---------- + val : object - This should be used when setting values in a loop, check the writeable - flag above the loop and then eschew the check on each iteration. + Returns + ------- + is_datetime64 : bool """ - cdef: - Py_ssize_t i, sz - if is_float_object(loc): - casted = int(loc) - if casted == loc: - loc = casted - i = loc - sz = cnp.PyArray_SIZE(arr) - - if i < 0: - i += sz - elif i >= sz: - raise IndexError('index out of bounds') + return PyObject_TypeCheck(obj, &PyDatetimeArrType_Type) - assign_value_1d(arr, i, value) -cdef inline set_value_at(ndarray arr, object loc, object value): - """Sets a value into the array after checking that the array is mutable. +cdef inline bint is_array(object val): """ - if not cnp.PyArray_ISWRITEABLE(arr): - raise ValueError('assignment destination is read-only') - - set_value_at_unsafe(arr, loc, value) + Cython equivalent of `isinstance(val, np.ndarray)` + Parameters + ---------- + val : object -cdef inline is_array(object o): - return cnp.PyArray_Check(o) + Returns + ------- + is_ndarray : bool + """ + return PyArray_Check(val) -cdef inline bint _checknull(object val): - try: - return val is None or (cpython.PyFloat_Check(val) and val != val) - except ValueError: - return False +cdef inline bint is_period_object(object val): + """ + Cython equivalent of `isinstance(val, pd.Period)` + Parameters + ---------- + val : object -cdef inline bint is_period_object(object val): + Returns + ------- + is_period : bool + """ return getattr(val, '_typ', '_typ') == 'period' @@ -181,3 +226,7 @@ cdef inline bint is_offset_object(object val): is_date_offset : bool """ return getattr(val, '_typ', None) == "dateoffset" + + +cdef inline bint _checknull(object val): + return val is None or (PyFloat_Check(val) and val != val) diff --git a/pandas/_libs/util.pxd b/pandas/_libs/util.pxd index 0b7e66902cbb1c..134f34330d8aa6 100644 --- a/pandas/_libs/util.pxd +++ b/pandas/_libs/util.pxd @@ -1 +1,81 @@ from tslibs.util cimport * + +from cython cimport Py_ssize_t + +cimport numpy as cnp +from numpy cimport ndarray + + +cdef extern from "src/numpy_helper.h": + void set_array_not_contiguous(ndarray ao) + + int assign_value_1d(ndarray, Py_ssize_t, object) except -1 + object get_value_1d(ndarray, Py_ssize_t) + const char *get_c_string(object) except NULL + + +ctypedef fused numeric: + cnp.int8_t + cnp.int16_t + cnp.int32_t + cnp.int64_t + + cnp.uint8_t + cnp.uint16_t + cnp.uint32_t + cnp.uint64_t + + cnp.float32_t + cnp.float64_t + + +cdef inline object get_value_at(ndarray arr, object loc): + cdef: + Py_ssize_t i, sz + int casted + + if is_float_object(loc): + casted = int(loc) + if casted == loc: + loc = casted + i = loc + sz = cnp.PyArray_SIZE(arr) + + if i < 0 and sz > 0: + i += sz + elif i >= sz or sz == 0: + raise IndexError('index out of bounds') + + return get_value_1d(arr, i) + + +cdef inline set_value_at_unsafe(ndarray arr, object loc, object value): + """Sets a value into the array without checking the writeable flag. + + This should be used when setting values in a loop, check the writeable + flag above the loop and then eschew the check on each iteration. + """ + cdef: + Py_ssize_t i, sz + if is_float_object(loc): + casted = int(loc) + if casted == loc: + loc = casted + i = loc + sz = cnp.PyArray_SIZE(arr) + + if i < 0: + i += sz + elif i >= sz: + raise IndexError('index out of bounds') + + assign_value_1d(arr, i, value) + + +cdef inline set_value_at(ndarray arr, object loc, object value): + """Sets a value into the array after checking that the array is mutable. + """ + if not cnp.PyArray_ISWRITEABLE(arr): + raise ValueError('assignment destination is read-only') + + set_value_at_unsafe(arr, loc, value) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 00d53ad82b2dca..26aaab2b1b237c 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1235,11 +1235,9 @@ def _generate_regular_range(cls, start, end, periods, freq): tz = None if isinstance(start, Timestamp): tz = start.tz - start = start.to_pydatetime() if isinstance(end, Timestamp): tz = end.tz - end = end.to_pydatetime() xdr = generate_range(start=start, end=end, periods=periods, offset=freq) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 60464bcfda1e7b..76614454e5a101 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -688,10 +688,6 @@ def size(self): def shape(self): return self.left.shape - @property - def itemsize(self): - return self.left.itemsize + self.right.itemsize - def take(self, indices, allow_fill=False, fill_value=None, axis=None, **kwargs): """ diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index e369679d2146f2..3971e90e64a14e 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -6,7 +6,7 @@ import warnings from pandas._libs import tslib, lib, tslibs -from pandas._libs.tslibs import iNaT +from pandas._libs.tslibs import iNaT, OutOfBoundsDatetime from pandas.compat import string_types, text_type, PY3 from .common import (ensure_object, is_bool, is_integer, is_float, is_complex, is_datetimetz, is_categorical_dtype, @@ -838,7 +838,13 @@ def soft_convert_objects(values, datetime=True, numeric=True, timedelta=True, # Soft conversions if datetime: - values = lib.maybe_convert_objects(values, convert_datetime=datetime) + # GH 20380, when datetime is beyond year 2262, hence outside + # bound of nanosecond-resolution 64-bit integers. + try: + values = lib.maybe_convert_objects(values, + convert_datetime=datetime) + except OutOfBoundsDatetime: + pass if timedelta and is_object_dtype(values.dtype): # Object check to ensure only run if previous did not convert diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 905073645fcb39..b8cbb41501dd19 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -9,7 +9,8 @@ from pandas.core.dtypes.dtypes import ( registry, CategoricalDtype, CategoricalDtypeType, DatetimeTZDtype, DatetimeTZDtypeType, PeriodDtype, PeriodDtypeType, IntervalDtype, - IntervalDtypeType, ExtensionDtype) + IntervalDtypeType, PandasExtensionDtype, ExtensionDtype, + _pandas_registry) from pandas.core.dtypes.generic import ( ABCCategorical, ABCPeriodIndex, ABCDatetimeIndex, ABCSeries, ABCSparseArray, ABCSparseSeries, ABCCategoricalIndex, ABCIndexClass, @@ -1709,17 +1710,9 @@ def is_extension_array_dtype(arr_or_dtype): Third-party libraries may implement arrays or types satisfying this interface as well. """ - from pandas.core.arrays import ExtensionArray - - if isinstance(arr_or_dtype, (ABCIndexClass, ABCSeries)): - arr_or_dtype = arr_or_dtype._values - - try: - arr_or_dtype = pandas_dtype(arr_or_dtype) - except TypeError: - pass - - return isinstance(arr_or_dtype, (ExtensionDtype, ExtensionArray)) + dtype = getattr(arr_or_dtype, 'dtype', arr_or_dtype) + return (isinstance(dtype, ExtensionDtype) or + registry.find(dtype) is not None) def is_complex_dtype(arr_or_dtype): @@ -1999,12 +1992,12 @@ def pandas_dtype(dtype): return dtype # registered extension types - result = registry.find(dtype) + result = _pandas_registry.find(dtype) or registry.find(dtype) if result is not None: return result # un-registered extension types - elif isinstance(dtype, ExtensionDtype): + elif isinstance(dtype, (PandasExtensionDtype, ExtensionDtype)): return dtype # try a numpy dtype @@ -2023,7 +2016,9 @@ def pandas_dtype(dtype): # also catch some valid dtypes such as object, np.object_ and 'object' # which we safeguard against by catching them earlier and returning # np.dtype(valid_dtype) before this condition is evaluated. - if dtype in [object, np.object_, 'object', 'O']: + if is_hashable(dtype) and dtype in [object, np.object_, 'object', 'O']: + # check hashability to avoid errors/DeprecationWarning when we get + # here and `dtype` is an array return npdtype elif npdtype.kind == 'O': raise TypeError("dtype '{}' not understood".format(dtype)) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index cf771a127a6966..f53ccc86fc4ff6 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -22,9 +22,9 @@ class Registry(object): -------- registry.register(MyExtensionDtype) """ - dtypes = [] + def __init__(self): + self.dtypes = [] - @classmethod def register(self, dtype): """ Parameters @@ -50,7 +50,7 @@ def find(self, dtype): dtype_type = dtype if not isinstance(dtype, type): dtype_type = type(dtype) - if issubclass(dtype_type, (PandasExtensionDtype, ExtensionDtype)): + if issubclass(dtype_type, ExtensionDtype): return dtype return None @@ -65,6 +65,9 @@ def find(self, dtype): registry = Registry() +# TODO(Extension): remove the second registry once all internal extension +# dtypes are real extension dtypes. +_pandas_registry = Registry() class PandasExtensionDtype(_DtypeOpsMixin): @@ -822,7 +825,7 @@ def is_dtype(cls, dtype): # register the dtypes in search order -registry.register(DatetimeTZDtype) -registry.register(PeriodDtype) registry.register(IntervalDtype) registry.register(CategoricalDtype) +_pandas_registry.register(DatetimeTZDtype) +_pandas_registry.register(PeriodDtype) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 0b467760d82d92..838b12468e85e7 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -369,8 +369,14 @@ def shape(self): @property def itemsize(self): - # Avoid materializing ndarray[Interval] - return self._data.itemsize + msg = ('IntervalIndex.itemsize is deprecated and will be removed in ' + 'a future version') + warnings.warn(msg, FutureWarning, stacklevel=2) + + # supress the warning from the underlying left/right itemsize + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + return self.left.itemsize + self.right.itemsize def __len__(self): return len(self.left) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index b315e3ec20830a..32aa89010b2060 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -25,7 +25,7 @@ from pandas.core.tools.datetimes import parse_time_string from pandas._libs.lib import infer_dtype -from pandas._libs import tslib, index as libindex +from pandas._libs import tslib, index as libindex, Timedelta from pandas._libs.tslibs.period import (Period, IncompatibleFrequency, DIFFERENT_FREQ_INDEX, _validate_end_alias) @@ -501,6 +501,16 @@ def to_timestamp(self, freq=None, how='start'): """ how = _validate_end_alias(how) + end = how == 'E' + if end: + if freq == 'B': + # roll forward to ensure we land on B date + adjust = Timedelta(1, 'D') - Timedelta(1, 'ns') + return self.to_timestamp(how='start') + adjust + else: + adjust = Timedelta(1, 'ns') + return (self + 1).to_timestamp(how='start') - adjust + if freq is None: base, mult = _gfc(self.freq) freq = frequencies.get_to_timestamp_base(base) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 0f3ffb8055330b..8ee91ded4ab7a2 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -802,12 +802,14 @@ def replace(self, to_replace, value, inplace=False, filter=None, copy=not inplace) for b in blocks] return blocks except (TypeError, ValueError): - # try again with a compatible block block = self.astype(object) - return block.replace( - to_replace=original_to_replace, value=value, inplace=inplace, - filter=filter, regex=regex, convert=convert) + return block.replace(to_replace=original_to_replace, + value=value, + inplace=inplace, + filter=filter, + regex=regex, + convert=convert) def _replace_single(self, *args, **kwargs): """ no-op on a non-ObjectBlock """ diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 02ac7fc7d5ed7c..55c841ba1fc46b 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -9,7 +9,7 @@ from pandas.core.dtypes.dtypes import ( DatetimeTZDtype, PeriodDtype, - IntervalDtype, CategoricalDtype, registry) + IntervalDtype, CategoricalDtype, registry, _pandas_registry) from pandas.core.dtypes.common import ( is_categorical_dtype, is_categorical, is_datetime64tz_dtype, is_datetimetz, @@ -775,21 +775,31 @@ def test_update_dtype_errors(self, bad_dtype): @pytest.mark.parametrize( 'dtype', - [DatetimeTZDtype, CategoricalDtype, - PeriodDtype, IntervalDtype]) + [CategoricalDtype, IntervalDtype]) def test_registry(dtype): assert dtype in registry.dtypes +@pytest.mark.parametrize('dtype', [DatetimeTZDtype, PeriodDtype]) +def test_pandas_registry(dtype): + assert dtype not in registry.dtypes + assert dtype in _pandas_registry.dtypes + + @pytest.mark.parametrize( 'dtype, expected', [('int64', None), ('interval', IntervalDtype()), ('interval[int64]', IntervalDtype()), ('interval[datetime64[ns]]', IntervalDtype('datetime64[ns]')), - ('category', CategoricalDtype()), - ('period[D]', PeriodDtype('D')), - ('datetime64[ns, US/Eastern]', DatetimeTZDtype('ns', 'US/Eastern'))]) + ('category', CategoricalDtype())]) def test_registry_find(dtype, expected): - assert registry.find(dtype) == expected + + +@pytest.mark.parametrize( + 'dtype, expected', + [('period[D]', PeriodDtype('D')), + ('datetime64[ns, US/Eastern]', DatetimeTZDtype('ns', 'US/Eastern'))]) +def test_pandas_registry_find(dtype, expected): + assert _pandas_registry.find(dtype) == expected diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index b48395efaf5c8a..f72cf8cdaafe9d 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -74,29 +74,29 @@ def test_corr_non_numeric(self): tm.assert_frame_equal(result, expected) @td.skip_if_no_scipy - def test_corr_nooverlap(self): + @pytest.mark.parametrize('meth', ['pearson', 'kendall', 'spearman']) + def test_corr_nooverlap(self, meth): # nothing in common - for meth in ['pearson', 'kendall', 'spearman']: - df = DataFrame({'A': [1, 1.5, 1, np.nan, np.nan, np.nan], - 'B': [np.nan, np.nan, np.nan, 1, 1.5, 1], - 'C': [np.nan, np.nan, np.nan, np.nan, - np.nan, np.nan]}) - rs = df.corr(meth) - assert isna(rs.loc['A', 'B']) - assert isna(rs.loc['B', 'A']) - assert rs.loc['A', 'A'] == 1 - assert rs.loc['B', 'B'] == 1 - assert isna(rs.loc['C', 'C']) + df = DataFrame({'A': [1, 1.5, 1, np.nan, np.nan, np.nan], + 'B': [np.nan, np.nan, np.nan, 1, 1.5, 1], + 'C': [np.nan, np.nan, np.nan, np.nan, + np.nan, np.nan]}) + rs = df.corr(meth) + assert isna(rs.loc['A', 'B']) + assert isna(rs.loc['B', 'A']) + assert rs.loc['A', 'A'] == 1 + assert rs.loc['B', 'B'] == 1 + assert isna(rs.loc['C', 'C']) @td.skip_if_no_scipy - def test_corr_constant(self): + @pytest.mark.parametrize('meth', ['pearson', 'spearman']) + def test_corr_constant(self, meth): # constant --> all NA - for meth in ['pearson', 'spearman']: - df = DataFrame({'A': [1, 1, 1, np.nan, np.nan, np.nan], - 'B': [np.nan, np.nan, np.nan, 1, 1, 1]}) - rs = df.corr(meth) - assert isna(rs.values).all() + df = DataFrame({'A': [1, 1, 1, np.nan, np.nan, np.nan], + 'B': [np.nan, np.nan, np.nan, 1, 1, 1]}) + rs = df.corr(meth) + assert isna(rs.values).all() def test_corr_int(self): # dtypes other than float64 #1761 @@ -658,21 +658,21 @@ def test_numeric_only_flag(self, meth): pytest.raises(TypeError, lambda: getattr(df2, meth)( axis=1, numeric_only=False)) - def test_mixed_ops(self): + @pytest.mark.parametrize('op', ['mean', 'std', 'var', + 'skew', 'kurt', 'sem']) + def test_mixed_ops(self, op): # GH 16116 df = DataFrame({'int': [1, 2, 3, 4], 'float': [1., 2., 3., 4.], 'str': ['a', 'b', 'c', 'd']}) - for op in ['mean', 'std', 'var', 'skew', - 'kurt', 'sem']: + result = getattr(df, op)() + assert len(result) == 2 + + with pd.option_context('use_bottleneck', False): result = getattr(df, op)() assert len(result) == 2 - with pd.option_context('use_bottleneck', False): - result = getattr(df, op)() - assert len(result) == 2 - def test_cumsum(self): self.tsframe.loc[5:10, 0] = nan self.tsframe.loc[10:15, 1] = nan diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index 344838493f0b1c..f18163d51c7210 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -120,16 +120,15 @@ def test_apply_standard_nonunique(self): rs = df.T.apply(lambda s: s[0], axis=0) assert_series_equal(rs, xp) - def test_with_string_args(self): - - for arg in ['sum', 'mean', 'min', 'max', 'std']: - result = self.frame.apply(arg) - expected = getattr(self.frame, arg)() - tm.assert_series_equal(result, expected) + @pytest.mark.parametrize('arg', ['sum', 'mean', 'min', 'max', 'std']) + def test_with_string_args(self, arg): + result = self.frame.apply(arg) + expected = getattr(self.frame, arg)() + tm.assert_series_equal(result, expected) - result = self.frame.apply(arg, axis=1) - expected = getattr(self.frame, arg)(axis=1) - tm.assert_series_equal(result, expected) + result = self.frame.apply(arg, axis=1) + expected = getattr(self.frame, arg)(axis=1) + tm.assert_series_equal(result, expected) def test_apply_broadcast_deprecated(self): with tm.assert_produces_warning(FutureWarning): diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index 004fb4eb0c128a..0bc74c6890ee9e 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -674,29 +674,12 @@ def _check_align(self, a, b, axis, fill_axis, how, method, limit=None): assert_frame_equal(aa, ea) assert_frame_equal(ab, eb) - def test_align_fill_method_inner(self): - for meth in ['pad', 'bfill']: - for ax in [0, 1, None]: - for fax in [0, 1]: - self._check_align_fill('inner', meth, ax, fax) - - def test_align_fill_method_outer(self): - for meth in ['pad', 'bfill']: - for ax in [0, 1, None]: - for fax in [0, 1]: - self._check_align_fill('outer', meth, ax, fax) - - def test_align_fill_method_left(self): - for meth in ['pad', 'bfill']: - for ax in [0, 1, None]: - for fax in [0, 1]: - self._check_align_fill('left', meth, ax, fax) - - def test_align_fill_method_right(self): - for meth in ['pad', 'bfill']: - for ax in [0, 1, None]: - for fax in [0, 1]: - self._check_align_fill('right', meth, ax, fax) + @pytest.mark.parametrize('meth', ['pad', 'bfill']) + @pytest.mark.parametrize('ax', [0, 1, None]) + @pytest.mark.parametrize('fax', [0, 1]) + @pytest.mark.parametrize('how', ['inner', 'outer', 'left', 'right']) + def test_align_fill_method(self, how, meth, ax, fax): + self._check_align_fill(how, meth, ax, fax) def _check_align_fill(self, kind, meth, ax, fax): left = self.frame.iloc[0:4, :10] diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index fdf50805ad8184..1702b2e7d29a44 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -72,18 +72,18 @@ def test_operators(self): assert (df + df).equals(df) assert_frame_equal(df + df, df) - def test_ops_np_scalar(self): - vals, xs = np.random.rand(5, 3), [nan, 7, -23, 2.718, -3.14, np.inf] + @pytest.mark.parametrize('other', [nan, 7, -23, 2.718, -3.14, np.inf]) + def test_ops_np_scalar(self, other): + vals = np.random.randn(5, 3) f = lambda x: DataFrame(x, index=list('ABCDE'), columns=['jim', 'joe', 'jolie']) df = f(vals) - for x in xs: - assert_frame_equal(df / np.array(x), f(vals / x)) - assert_frame_equal(np.array(x) * df, f(vals * x)) - assert_frame_equal(df + np.array(x), f(vals + x)) - assert_frame_equal(np.array(x) - df, f(x - vals)) + assert_frame_equal(df / np.array(other), f(vals / other)) + assert_frame_equal(np.array(other) * df, f(vals * other)) + assert_frame_equal(df + np.array(other), f(vals + other)) + assert_frame_equal(np.array(other) - df, f(other - vals)) def test_operators_boolean(self): @@ -116,41 +116,40 @@ def test_operators_boolean(self): True, index=[1], columns=['A']) assert_frame_equal(result, DataFrame(1, index=[1], columns=['A'])) - def f(): - DataFrame(1.0, index=[1], columns=['A']) | DataFrame( - True, index=[1], columns=['A']) - pytest.raises(TypeError, f) + df1 = DataFrame(1.0, index=[1], columns=['A']) + df2 = DataFrame(True, index=[1], columns=['A']) + with pytest.raises(TypeError): + df1 | df2 - def f(): - DataFrame('foo', index=[1], columns=['A']) | DataFrame( - True, index=[1], columns=['A']) - pytest.raises(TypeError, f) + df1 = DataFrame('foo', index=[1], columns=['A']) + df2 = DataFrame(True, index=[1], columns=['A']) + with pytest.raises(TypeError): + df1 | df2 - def test_operators_none_as_na(self): + @pytest.mark.parametrize('op', [operator.add, operator.sub, + operator.mul, operator.truediv]) + def test_operators_none_as_na(self, op): df = DataFrame({"col1": [2, 5.0, 123, None], "col2": [1, 2, 3, 4]}, dtype=object) - ops = [operator.add, operator.sub, operator.mul, operator.truediv] - # since filling converts dtypes from object, changed expected to be # object - for op in ops: - filled = df.fillna(np.nan) - result = op(df, 3) - expected = op(filled, 3).astype(object) - expected[com.isna(expected)] = None - assert_frame_equal(result, expected) + filled = df.fillna(np.nan) + result = op(df, 3) + expected = op(filled, 3).astype(object) + expected[com.isna(expected)] = None + assert_frame_equal(result, expected) - result = op(df, df) - expected = op(filled, filled).astype(object) - expected[com.isna(expected)] = None - assert_frame_equal(result, expected) + result = op(df, df) + expected = op(filled, filled).astype(object) + expected[com.isna(expected)] = None + assert_frame_equal(result, expected) - result = op(df, df.fillna(7)) - assert_frame_equal(result, expected) + result = op(df, df.fillna(7)) + assert_frame_equal(result, expected) - result = op(df.fillna(7), df) - assert_frame_equal(result, expected, check_dtype=False) + result = op(df.fillna(7), df) + assert_frame_equal(result, expected, check_dtype=False) def test_comparison_invalid(self): @@ -978,8 +977,11 @@ def test_boolean_comparison(self): result = df.values > b_r assert_numpy_array_equal(result, expected.values) - pytest.raises(ValueError, df.__gt__, b_c) - pytest.raises(ValueError, df.values.__gt__, b_c) + with pytest.raises(ValueError): + df > b_c + + with pytest.raises(ValueError): + df.values > b_c # == expected = DataFrame([[False, False], [True, False], [False, False]]) @@ -998,7 +1000,9 @@ def test_boolean_comparison(self): result = df.values == b_r assert_numpy_array_equal(result, expected.values) - pytest.raises(ValueError, lambda: df == b_c) + with pytest.raises(ValueError): + df == b_c + assert df.values.shape != b_c.shape # with alignment diff --git a/pandas/tests/frame/test_period.py b/pandas/tests/frame/test_period.py index 482210966fe6ba..d56df2371b2e3a 100644 --- a/pandas/tests/frame/test_period.py +++ b/pandas/tests/frame/test_period.py @@ -5,7 +5,7 @@ import pandas as pd import pandas.util.testing as tm from pandas import (PeriodIndex, period_range, DataFrame, date_range, - Index, to_datetime, DatetimeIndex) + Index, to_datetime, DatetimeIndex, Timedelta) def _permute(obj): @@ -51,6 +51,7 @@ def test_frame_to_time_stamp(self): df['mix'] = 'a' exp_index = date_range('1/1/2001', end='12/31/2009', freq='A-DEC') + exp_index = exp_index + Timedelta(1, 'D') - Timedelta(1, 'ns') result = df.to_timestamp('D', 'end') tm.assert_index_equal(result.index, exp_index) tm.assert_numpy_array_equal(result.values, df.values) @@ -66,22 +67,26 @@ def _get_with_delta(delta, freq='A-DEC'): delta = timedelta(hours=23) result = df.to_timestamp('H', 'end') exp_index = _get_with_delta(delta) + exp_index = exp_index + Timedelta(1, 'h') - Timedelta(1, 'ns') tm.assert_index_equal(result.index, exp_index) delta = timedelta(hours=23, minutes=59) result = df.to_timestamp('T', 'end') exp_index = _get_with_delta(delta) + exp_index = exp_index + Timedelta(1, 'm') - Timedelta(1, 'ns') tm.assert_index_equal(result.index, exp_index) result = df.to_timestamp('S', 'end') delta = timedelta(hours=23, minutes=59, seconds=59) exp_index = _get_with_delta(delta) + exp_index = exp_index + Timedelta(1, 's') - Timedelta(1, 'ns') tm.assert_index_equal(result.index, exp_index) # columns df = df.T exp_index = date_range('1/1/2001', end='12/31/2009', freq='A-DEC') + exp_index = exp_index + Timedelta(1, 'D') - Timedelta(1, 'ns') result = df.to_timestamp('D', 'end', axis=1) tm.assert_index_equal(result.columns, exp_index) tm.assert_numpy_array_equal(result.values, df.values) @@ -93,16 +98,19 @@ def _get_with_delta(delta, freq='A-DEC'): delta = timedelta(hours=23) result = df.to_timestamp('H', 'end', axis=1) exp_index = _get_with_delta(delta) + exp_index = exp_index + Timedelta(1, 'h') - Timedelta(1, 'ns') tm.assert_index_equal(result.columns, exp_index) delta = timedelta(hours=23, minutes=59) result = df.to_timestamp('T', 'end', axis=1) exp_index = _get_with_delta(delta) + exp_index = exp_index + Timedelta(1, 'm') - Timedelta(1, 'ns') tm.assert_index_equal(result.columns, exp_index) result = df.to_timestamp('S', 'end', axis=1) delta = timedelta(hours=23, minutes=59, seconds=59) exp_index = _get_with_delta(delta) + exp_index = exp_index + Timedelta(1, 's') - Timedelta(1, 'ns') tm.assert_index_equal(result.columns, exp_index) # invalid axis diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index a226f8de3c8bd9..a02f78bfaf8a5c 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -1029,11 +1029,10 @@ def test_bool_arith_expr(self, parser, engine): expect = self.frame.a[self.frame.a < 1] + self.frame.b assert_series_equal(res, expect) - def test_invalid_type_for_operator_raises(self, parser, engine): + @pytest.mark.parametrize('op', ['+', '-', '*', '/']) + def test_invalid_type_for_operator_raises(self, parser, engine, op): df = DataFrame({'a': [1, 2], 'b': ['c', 'd']}) - ops = '+', '-', '*', '/' - for op in ops: - with tm.assert_raises_regex(TypeError, - r"unsupported operand type\(s\) " - "for .+: '.+' and '.+'"): - df.eval('a {0} b'.format(op), engine=engine, parser=parser) + with tm.assert_raises_regex(TypeError, + r"unsupported operand type\(s\) " + "for .+: '.+' and '.+'"): + df.eval('a {0} b'.format(op), engine=engine, parser=parser) diff --git a/pandas/tests/frame/test_replace.py b/pandas/tests/frame/test_replace.py index dd83a94b7062a4..227484abb82c13 100644 --- a/pandas/tests/frame/test_replace.py +++ b/pandas/tests/frame/test_replace.py @@ -547,14 +547,12 @@ def test_regex_replace_numeric_to_object_conversion(self): assert_frame_equal(res, expec) assert res.a.dtype == np.object_ - def test_replace_regex_metachar(self): - metachars = '[]', '()', r'\d', r'\w', r'\s' - - for metachar in metachars: - df = DataFrame({'a': [metachar, 'else']}) - result = df.replace({'a': {metachar: 'paren'}}) - expected = DataFrame({'a': ['paren', 'else']}) - assert_frame_equal(result, expected) + @pytest.mark.parametrize('metachar', ['[]', '()', r'\d', r'\w', r'\s']) + def test_replace_regex_metachar(self, metachar): + df = DataFrame({'a': [metachar, 'else']}) + result = df.replace({'a': {metachar: 'paren'}}) + expected = DataFrame({'a': ['paren', 'else']}) + assert_frame_equal(result, expected) def test_replace(self): self.tsframe['A'][:5] = nan @@ -757,40 +755,37 @@ def test_replace_for_new_dtypes(self): result = tsframe.fillna(method='bfill') assert_frame_equal(result, tsframe.fillna(method='bfill')) - def test_replace_dtypes(self): - # int - df = DataFrame({'ints': [1, 2, 3]}) - result = df.replace(1, 0) - expected = DataFrame({'ints': [0, 2, 3]}) - assert_frame_equal(result, expected) - - df = DataFrame({'ints': [1, 2, 3]}, dtype=np.int32) - result = df.replace(1, 0) - expected = DataFrame({'ints': [0, 2, 3]}, dtype=np.int32) - assert_frame_equal(result, expected) - - df = DataFrame({'ints': [1, 2, 3]}, dtype=np.int16) - result = df.replace(1, 0) - expected = DataFrame({'ints': [0, 2, 3]}, dtype=np.int16) - assert_frame_equal(result, expected) - - # bools - df = DataFrame({'bools': [True, False, True]}) - result = df.replace(False, True) - assert result.values.all() - - # complex blocks - df = DataFrame({'complex': [1j, 2j, 3j]}) - result = df.replace(1j, 0j) - expected = DataFrame({'complex': [0j, 2j, 3j]}) - assert_frame_equal(result, expected) - - # datetime blocks - prev = datetime.today() - now = datetime.today() - df = DataFrame({'datetime64': Index([prev, now, prev])}) - result = df.replace(prev, now) - expected = DataFrame({'datetime64': Index([now] * 3)}) + @pytest.mark.parametrize('frame, to_replace, value, expected', [ + (DataFrame({'ints': [1, 2, 3]}), 1, 0, + DataFrame({'ints': [0, 2, 3]})), + (DataFrame({'ints': [1, 2, 3]}, dtype=np.int32), 1, 0, + DataFrame({'ints': [0, 2, 3]}, dtype=np.int32)), + (DataFrame({'ints': [1, 2, 3]}, dtype=np.int16), 1, 0, + DataFrame({'ints': [0, 2, 3]}, dtype=np.int16)), + (DataFrame({'bools': [True, False, True]}), False, True, + DataFrame({'bools': [True, True, True]})), + (DataFrame({'complex': [1j, 2j, 3j]}), 1j, 0, + DataFrame({'complex': [0j, 2j, 3j]})), + (DataFrame({'datetime64': Index([datetime(2018, 5, 28), + datetime(2018, 7, 28), + datetime(2018, 5, 28)])}), + datetime(2018, 5, 28), datetime(2018, 7, 28), + DataFrame({'datetime64': Index([datetime(2018, 7, 28)] * 3)})), + # GH 20380 + (DataFrame({'dt': [datetime(3017, 12, 20)], 'str': ['foo']}), + 'foo', 'bar', + DataFrame({'dt': [datetime(3017, 12, 20)], 'str': ['bar']})), + (DataFrame({'A': date_range('20130101', periods=3, tz='US/Eastern'), + 'B': [0, np.nan, 2]}), + Timestamp('20130102', tz='US/Eastern'), + Timestamp('20130104', tz='US/Eastern'), + DataFrame({'A': [Timestamp('20130101', tz='US/Eastern'), + Timestamp('20130104', tz='US/Eastern'), + Timestamp('20130103', tz='US/Eastern')], + 'B': [0, np.nan, 2]})) + ]) + def test_replace_dtypes(self, frame, to_replace, value, expected): + result = getattr(frame, 'replace')(to_replace, value) assert_frame_equal(result, expected) def test_replace_input_formats_listlike(self): diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index ebf6c5e37b9162..2f90d24f652cad 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -855,21 +855,21 @@ def _test_stack_with_multiindex(multiindex): dtype=df.dtypes[0]) assert_frame_equal(result, expected) - def test_stack_preserve_categorical_dtype(self): + @pytest.mark.parametrize('ordered', [False, True]) + @pytest.mark.parametrize('labels', [list("yxz"), list("yxy")]) + def test_stack_preserve_categorical_dtype(self, ordered, labels): # GH13854 - for ordered in [False, True]: - for labels in [list("yxz"), list("yxy")]: - cidx = pd.CategoricalIndex(labels, categories=list("xyz"), - ordered=ordered) - df = DataFrame([[10, 11, 12]], columns=cidx) - result = df.stack() - - # `MutliIndex.from_product` preserves categorical dtype - - # it's tested elsewhere. - midx = pd.MultiIndex.from_product([df.index, cidx]) - expected = Series([10, 11, 12], index=midx) - - tm.assert_series_equal(result, expected) + cidx = pd.CategoricalIndex(labels, categories=list("xyz"), + ordered=ordered) + df = DataFrame([[10, 11, 12]], columns=cidx) + result = df.stack() + + # `MutliIndex.from_product` preserves categorical dtype - + # it's tested elsewhere. + midx = pd.MultiIndex.from_product([df.index, cidx]) + expected = Series([10, 11, 12], index=midx) + + tm.assert_series_equal(result, expected) @pytest.mark.parametrize("level", [0, 'baz']) def test_unstack_swaplevel_sortlevel(self, level): diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 47d4d15420f1df..22fb8b2942bea6 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -301,172 +301,6 @@ def test_construct_with_different_start_end_string_format(self): Timestamp('2013-01-01 02:00:00+09:00')]) tm.assert_index_equal(result, expected) - -class TestGenRangeGeneration(object): - - def test_generate(self): - rng1 = list(generate_range(START, END, offset=BDay())) - rng2 = list(generate_range(START, END, time_rule='B')) - assert rng1 == rng2 - - def test_generate_cday(self): - rng1 = list(generate_range(START, END, offset=CDay())) - rng2 = list(generate_range(START, END, time_rule='C')) - assert rng1 == rng2 - - def test_1(self): - rng = list(generate_range(start=datetime(2009, 3, 25), periods=2)) - expected = [datetime(2009, 3, 25), datetime(2009, 3, 26)] - assert rng == expected - - def test_2(self): - rng = list(generate_range(start=datetime(2008, 1, 1), - end=datetime(2008, 1, 3))) - expected = [datetime(2008, 1, 1), - datetime(2008, 1, 2), - datetime(2008, 1, 3)] - assert rng == expected - - def test_3(self): - rng = list(generate_range(start=datetime(2008, 1, 5), - end=datetime(2008, 1, 6))) - expected = [] - assert rng == expected - - def test_precision_finer_than_offset(self): - # GH 9907 - result1 = DatetimeIndex(start='2015-04-15 00:00:03', - end='2016-04-22 00:00:00', freq='Q') - result2 = DatetimeIndex(start='2015-04-15 00:00:03', - end='2015-06-22 00:00:04', freq='W') - expected1_list = ['2015-06-30 00:00:03', '2015-09-30 00:00:03', - '2015-12-31 00:00:03', '2016-03-31 00:00:03'] - expected2_list = ['2015-04-19 00:00:03', '2015-04-26 00:00:03', - '2015-05-03 00:00:03', '2015-05-10 00:00:03', - '2015-05-17 00:00:03', '2015-05-24 00:00:03', - '2015-05-31 00:00:03', '2015-06-07 00:00:03', - '2015-06-14 00:00:03', '2015-06-21 00:00:03'] - expected1 = DatetimeIndex(expected1_list, dtype='datetime64[ns]', - freq='Q-DEC', tz=None) - expected2 = DatetimeIndex(expected2_list, dtype='datetime64[ns]', - freq='W-SUN', tz=None) - tm.assert_index_equal(result1, expected1) - tm.assert_index_equal(result2, expected2) - - dt1, dt2 = '2017-01-01', '2017-01-01' - tz1, tz2 = 'US/Eastern', 'Europe/London' - - @pytest.mark.parametrize("start,end", [ - (pd.Timestamp(dt1, tz=tz1), pd.Timestamp(dt2)), - (pd.Timestamp(dt1), pd.Timestamp(dt2, tz=tz2)), - (pd.Timestamp(dt1, tz=tz1), pd.Timestamp(dt2, tz=tz2)), - (pd.Timestamp(dt1, tz=tz2), pd.Timestamp(dt2, tz=tz1)) - ]) - def test_mismatching_tz_raises_err(self, start, end): - # issue 18488 - with pytest.raises(TypeError): - pd.date_range(start, end) - with pytest.raises(TypeError): - pd.DatetimeIndex(start, end, freq=BDay()) - - -class TestBusinessDateRange(object): - - def test_constructor(self): - bdate_range(START, END, freq=BDay()) - bdate_range(START, periods=20, freq=BDay()) - bdate_range(end=START, periods=20, freq=BDay()) - - msg = 'periods must be a number, got B' - with tm.assert_raises_regex(TypeError, msg): - date_range('2011-1-1', '2012-1-1', 'B') - - with tm.assert_raises_regex(TypeError, msg): - bdate_range('2011-1-1', '2012-1-1', 'B') - - msg = 'freq must be specified for bdate_range; use date_range instead' - with tm.assert_raises_regex(TypeError, msg): - bdate_range(START, END, periods=10, freq=None) - - def test_naive_aware_conflicts(self): - naive = bdate_range(START, END, freq=BDay(), tz=None) - aware = bdate_range(START, END, freq=BDay(), tz="Asia/Hong_Kong") - - msg = 'tz-naive.*tz-aware' - with tm.assert_raises_regex(TypeError, msg): - naive.join(aware) - - with tm.assert_raises_regex(TypeError, msg): - aware.join(naive) - - def test_cached_range(self): - DatetimeIndex._cached_range(START, END, freq=BDay()) - DatetimeIndex._cached_range(START, periods=20, freq=BDay()) - DatetimeIndex._cached_range(end=START, periods=20, freq=BDay()) - - with tm.assert_raises_regex(TypeError, "freq"): - DatetimeIndex._cached_range(START, END) - - with tm.assert_raises_regex(TypeError, "specify period"): - DatetimeIndex._cached_range(START, freq=BDay()) - - with tm.assert_raises_regex(TypeError, "specify period"): - DatetimeIndex._cached_range(end=END, freq=BDay()) - - with tm.assert_raises_regex(TypeError, "start or end"): - DatetimeIndex._cached_range(periods=20, freq=BDay()) - - def test_cached_range_bug(self): - rng = date_range('2010-09-01 05:00:00', periods=50, - freq=DateOffset(hours=6)) - assert len(rng) == 50 - assert rng[0] == datetime(2010, 9, 1, 5) - - def test_timezone_comparaison_bug(self): - # smoke test - start = Timestamp('20130220 10:00', tz='US/Eastern') - result = date_range(start, periods=2, tz='US/Eastern') - assert len(result) == 2 - - def test_timezone_comparaison_assert(self): - start = Timestamp('20130220 10:00', tz='US/Eastern') - msg = 'Inferred time zone not equal to passed time zone' - with tm.assert_raises_regex(AssertionError, msg): - date_range(start, periods=2, tz='Europe/Berlin') - - def test_misc(self): - end = datetime(2009, 5, 13) - dr = bdate_range(end=end, periods=20) - firstDate = end - 19 * BDay() - - assert len(dr) == 20 - assert dr[0] == firstDate - assert dr[-1] == end - - def test_date_parse_failure(self): - badly_formed_date = '2007/100/1' - - with pytest.raises(ValueError): - Timestamp(badly_formed_date) - - with pytest.raises(ValueError): - bdate_range(start=badly_formed_date, periods=10) - - with pytest.raises(ValueError): - bdate_range(end=badly_formed_date, periods=10) - - with pytest.raises(ValueError): - bdate_range(badly_formed_date, badly_formed_date) - - def test_daterange_bug_456(self): - # GH #456 - rng1 = bdate_range('12/5/2011', '12/5/2011') - rng2 = bdate_range('12/2/2011', '12/5/2011') - rng2.freq = BDay() - - result = rng1.union(rng2) - assert isinstance(result, DatetimeIndex) - def test_error_with_zero_monthends(self): msg = r'Offset <0 \* MonthEnds> did not increment date' with tm.assert_raises_regex(ValueError, msg): @@ -658,6 +492,184 @@ def test_freq_divides_end_in_nanos(self): tm.assert_index_equal(result_1, expected_1) tm.assert_index_equal(result_2, expected_2) + def test_cached_range_bug(self): + rng = date_range('2010-09-01 05:00:00', periods=50, + freq=DateOffset(hours=6)) + assert len(rng) == 50 + assert rng[0] == datetime(2010, 9, 1, 5) + + def test_timezone_comparaison_bug(self): + # smoke test + start = Timestamp('20130220 10:00', tz='US/Eastern') + result = date_range(start, periods=2, tz='US/Eastern') + assert len(result) == 2 + + def test_timezone_comparaison_assert(self): + start = Timestamp('20130220 10:00', tz='US/Eastern') + msg = 'Inferred time zone not equal to passed time zone' + with tm.assert_raises_regex(AssertionError, msg): + date_range(start, periods=2, tz='Europe/Berlin') + + +class TestGenRangeGeneration(object): + + def test_generate(self): + rng1 = list(generate_range(START, END, offset=BDay())) + rng2 = list(generate_range(START, END, time_rule='B')) + assert rng1 == rng2 + + def test_generate_cday(self): + rng1 = list(generate_range(START, END, offset=CDay())) + rng2 = list(generate_range(START, END, time_rule='C')) + assert rng1 == rng2 + + def test_1(self): + rng = list(generate_range(start=datetime(2009, 3, 25), periods=2)) + expected = [datetime(2009, 3, 25), datetime(2009, 3, 26)] + assert rng == expected + + def test_2(self): + rng = list(generate_range(start=datetime(2008, 1, 1), + end=datetime(2008, 1, 3))) + expected = [datetime(2008, 1, 1), + datetime(2008, 1, 2), + datetime(2008, 1, 3)] + assert rng == expected + + def test_3(self): + rng = list(generate_range(start=datetime(2008, 1, 5), + end=datetime(2008, 1, 6))) + expected = [] + assert rng == expected + + def test_precision_finer_than_offset(self): + # GH 9907 + result1 = DatetimeIndex(start='2015-04-15 00:00:03', + end='2016-04-22 00:00:00', freq='Q') + result2 = DatetimeIndex(start='2015-04-15 00:00:03', + end='2015-06-22 00:00:04', freq='W') + expected1_list = ['2015-06-30 00:00:03', '2015-09-30 00:00:03', + '2015-12-31 00:00:03', '2016-03-31 00:00:03'] + expected2_list = ['2015-04-19 00:00:03', '2015-04-26 00:00:03', + '2015-05-03 00:00:03', '2015-05-10 00:00:03', + '2015-05-17 00:00:03', '2015-05-24 00:00:03', + '2015-05-31 00:00:03', '2015-06-07 00:00:03', + '2015-06-14 00:00:03', '2015-06-21 00:00:03'] + expected1 = DatetimeIndex(expected1_list, dtype='datetime64[ns]', + freq='Q-DEC', tz=None) + expected2 = DatetimeIndex(expected2_list, dtype='datetime64[ns]', + freq='W-SUN', tz=None) + tm.assert_index_equal(result1, expected1) + tm.assert_index_equal(result2, expected2) + + dt1, dt2 = '2017-01-01', '2017-01-01' + tz1, tz2 = 'US/Eastern', 'Europe/London' + + @pytest.mark.parametrize("start,end", [ + (pd.Timestamp(dt1, tz=tz1), pd.Timestamp(dt2)), + (pd.Timestamp(dt1), pd.Timestamp(dt2, tz=tz2)), + (pd.Timestamp(dt1, tz=tz1), pd.Timestamp(dt2, tz=tz2)), + (pd.Timestamp(dt1, tz=tz2), pd.Timestamp(dt2, tz=tz1)) + ]) + def test_mismatching_tz_raises_err(self, start, end): + # issue 18488 + with pytest.raises(TypeError): + pd.date_range(start, end) + with pytest.raises(TypeError): + pd.DatetimeIndex(start, end, freq=BDay()) + + +class TestBusinessDateRange(object): + + def test_constructor(self): + bdate_range(START, END, freq=BDay()) + bdate_range(START, periods=20, freq=BDay()) + bdate_range(end=START, periods=20, freq=BDay()) + + msg = 'periods must be a number, got B' + with tm.assert_raises_regex(TypeError, msg): + date_range('2011-1-1', '2012-1-1', 'B') + + with tm.assert_raises_regex(TypeError, msg): + bdate_range('2011-1-1', '2012-1-1', 'B') + + msg = 'freq must be specified for bdate_range; use date_range instead' + with tm.assert_raises_regex(TypeError, msg): + bdate_range(START, END, periods=10, freq=None) + + def test_naive_aware_conflicts(self): + naive = bdate_range(START, END, freq=BDay(), tz=None) + aware = bdate_range(START, END, freq=BDay(), tz="Asia/Hong_Kong") + + msg = 'tz-naive.*tz-aware' + with tm.assert_raises_regex(TypeError, msg): + naive.join(aware) + + with tm.assert_raises_regex(TypeError, msg): + aware.join(naive) + + def test_cached_range(self): + DatetimeIndex._cached_range(START, END, freq=BDay()) + DatetimeIndex._cached_range(START, periods=20, freq=BDay()) + DatetimeIndex._cached_range(end=START, periods=20, freq=BDay()) + + with tm.assert_raises_regex(TypeError, "freq"): + DatetimeIndex._cached_range(START, END) + + with tm.assert_raises_regex(TypeError, "specify period"): + DatetimeIndex._cached_range(START, freq=BDay()) + + with tm.assert_raises_regex(TypeError, "specify period"): + DatetimeIndex._cached_range(end=END, freq=BDay()) + + with tm.assert_raises_regex(TypeError, "start or end"): + DatetimeIndex._cached_range(periods=20, freq=BDay()) + + def test_misc(self): + end = datetime(2009, 5, 13) + dr = bdate_range(end=end, periods=20) + firstDate = end - 19 * BDay() + + assert len(dr) == 20 + assert dr[0] == firstDate + assert dr[-1] == end + + def test_date_parse_failure(self): + badly_formed_date = '2007/100/1' + + with pytest.raises(ValueError): + Timestamp(badly_formed_date) + + with pytest.raises(ValueError): + bdate_range(start=badly_formed_date, periods=10) + + with pytest.raises(ValueError): + bdate_range(end=badly_formed_date, periods=10) + + with pytest.raises(ValueError): + bdate_range(badly_formed_date, badly_formed_date) + + def test_daterange_bug_456(self): + # GH #456 + rng1 = bdate_range('12/5/2011', '12/5/2011') + rng2 = bdate_range('12/2/2011', '12/5/2011') + rng2.freq = BDay() + + result = rng1.union(rng2) + assert isinstance(result, DatetimeIndex) + + @pytest.mark.parametrize('closed', ['left', 'right']) + def test_bdays_and_open_boundaries(self, closed): + # GH 6673 + start = '2018-07-21' # Saturday + end = '2018-07-29' # Sunday + result = pd.date_range(start, end, freq='B', closed=closed) + + bday_start = '2018-07-23' # Monday + bday_end = '2018-07-27' # Friday + expected = pd.date_range(bday_start, bday_end, freq='D') + tm.assert_index_equal(result, expected) + class TestCustomDateRange(object): diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index e179286e839db2..71f56c5bc11645 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -989,9 +989,11 @@ def test_itemsize(self): # GH 19209 left = np.arange(0, 4, dtype='i8') right = np.arange(1, 5, dtype='i8') - - result = IntervalIndex.from_arrays(left, right).itemsize expected = 16 # 8 * 2 + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = IntervalIndex.from_arrays(left, right).itemsize + assert result == expected @pytest.mark.parametrize('new_closed', [ diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 923d826fe1a5e8..405edba83dc7a6 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -366,6 +366,19 @@ def test_periods_number_check(self): with pytest.raises(ValueError): period_range('2011-1-1', '2012-1-1', 'B') + def test_start_time(self): + # GH 17157 + index = PeriodIndex(freq='M', start='2016-01-01', end='2016-05-31') + expected_index = date_range('2016-01-01', end='2016-05-31', freq='MS') + tm.assert_index_equal(index.start_time, expected_index) + + def test_end_time(self): + # GH 17157 + index = PeriodIndex(freq='M', start='2016-01-01', end='2016-05-31') + expected_index = date_range('2016-01-01', end='2016-05-31', freq='M') + expected_index = expected_index.shift(1, freq='D').shift(-1, freq='ns') + tm.assert_index_equal(index.end_time, expected_index) + def test_index_duplicate_periods(self): # monotonic idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq='A-JUN') diff --git a/pandas/tests/indexes/period/test_scalar_compat.py b/pandas/tests/indexes/period/test_scalar_compat.py index 56bd2adf587195..a66a81fe99cd46 100644 --- a/pandas/tests/indexes/period/test_scalar_compat.py +++ b/pandas/tests/indexes/period/test_scalar_compat.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- """Tests for PeriodIndex behaving like a vectorized Period scalar""" -from pandas import PeriodIndex, date_range +from pandas import PeriodIndex, date_range, Timedelta import pandas.util.testing as tm @@ -14,4 +14,5 @@ def test_start_time(self): def test_end_time(self): index = PeriodIndex(freq='M', start='2016-01-01', end='2016-05-31') expected_index = date_range('2016-01-01', end='2016-05-31', freq='M') + expected_index += Timedelta(1, 'D') - Timedelta(1, 'ns') tm.assert_index_equal(index.end_time, expected_index) diff --git a/pandas/tests/indexes/period/test_tools.py b/pandas/tests/indexes/period/test_tools.py index 16b558916df2df..c4ed07d98413f0 100644 --- a/pandas/tests/indexes/period/test_tools.py +++ b/pandas/tests/indexes/period/test_tools.py @@ -3,6 +3,7 @@ import pytest import pandas as pd +from pandas import Timedelta import pandas.util.testing as tm import pandas.core.indexes.period as period from pandas.compat import lrange @@ -60,6 +61,7 @@ def test_to_timestamp(self): exp_index = date_range('1/1/2001', end='12/31/2009', freq='A-DEC') result = series.to_timestamp(how='end') + exp_index = exp_index + Timedelta(1, 'D') - Timedelta(1, 'ns') tm.assert_index_equal(result.index, exp_index) assert result.name == 'foo' @@ -74,16 +76,19 @@ def _get_with_delta(delta, freq='A-DEC'): delta = timedelta(hours=23) result = series.to_timestamp('H', 'end') exp_index = _get_with_delta(delta) + exp_index = exp_index + Timedelta(1, 'h') - Timedelta(1, 'ns') tm.assert_index_equal(result.index, exp_index) delta = timedelta(hours=23, minutes=59) result = series.to_timestamp('T', 'end') exp_index = _get_with_delta(delta) + exp_index = exp_index + Timedelta(1, 'm') - Timedelta(1, 'ns') tm.assert_index_equal(result.index, exp_index) result = series.to_timestamp('S', 'end') delta = timedelta(hours=23, minutes=59, seconds=59) exp_index = _get_with_delta(delta) + exp_index = exp_index + Timedelta(1, 's') - Timedelta(1, 'ns') tm.assert_index_equal(result.index, exp_index) index = PeriodIndex(freq='H', start='1/1/2001', end='1/2/2001') @@ -92,6 +97,7 @@ def _get_with_delta(delta, freq='A-DEC'): exp_index = date_range('1/1/2001 00:59:59', end='1/2/2001 00:59:59', freq='H') result = series.to_timestamp(how='end') + exp_index = exp_index + Timedelta(1, 's') - Timedelta(1, 'ns') tm.assert_index_equal(result.index, exp_index) assert result.name == 'foo' @@ -284,6 +290,7 @@ def test_to_timestamp_pi_mult(self): result = idx.to_timestamp(how='E') expected = DatetimeIndex(['2011-02-28', 'NaT', '2011-03-31'], name='idx') + expected = expected + Timedelta(1, 'D') - Timedelta(1, 'ns') tm.assert_index_equal(result, expected) def test_to_timestamp_pi_combined(self): @@ -298,11 +305,13 @@ def test_to_timestamp_pi_combined(self): expected = DatetimeIndex(['2011-01-02 00:59:59', '2011-01-03 01:59:59'], name='idx') + expected = expected + Timedelta(1, 's') - Timedelta(1, 'ns') tm.assert_index_equal(result, expected) result = idx.to_timestamp(how='E', freq='H') expected = DatetimeIndex(['2011-01-02 00:00', '2011-01-03 01:00'], name='idx') + expected = expected + Timedelta(1, 'h') - Timedelta(1, 'ns') tm.assert_index_equal(result, expected) def test_period_astype_to_timestamp(self): @@ -312,6 +321,7 @@ def test_period_astype_to_timestamp(self): tm.assert_index_equal(pi.astype('datetime64[ns]'), exp) exp = pd.DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31']) + exp = exp + Timedelta(1, 'D') - Timedelta(1, 'ns') tm.assert_index_equal(pi.astype('datetime64[ns]', how='end'), exp) exp = pd.DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01'], @@ -321,6 +331,7 @@ def test_period_astype_to_timestamp(self): exp = pd.DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31'], tz='US/Eastern') + exp = exp + Timedelta(1, 'D') - Timedelta(1, 'ns') res = pi.astype('datetime64[ns, US/Eastern]', how='end') tm.assert_index_equal(res, exp) diff --git a/pandas/tests/indexes/timedeltas/test_arithmetic.py b/pandas/tests/indexes/timedeltas/test_arithmetic.py index d47d75d2f3485d..a5e75de2a267ec 100644 --- a/pandas/tests/indexes/timedeltas/test_arithmetic.py +++ b/pandas/tests/indexes/timedeltas/test_arithmetic.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -import operator import pytest import numpy as np @@ -13,7 +12,6 @@ Series, Timestamp, Timedelta) from pandas.errors import PerformanceWarning, NullFrequencyError -from pandas.core import ops @pytest.fixture(params=[pd.offsets.Hour(2), timedelta(hours=2), @@ -270,53 +268,6 @@ def test_tdi_floordiv_timedelta_scalar(self, scalar_td): class TestTimedeltaIndexArithmetic(object): # Addition and Subtraction Operations - # ------------------------------------------------------------- - # Invalid Operations - - @pytest.mark.parametrize('other', [3.14, np.array([2.0, 3.0])]) - @pytest.mark.parametrize('op', [operator.add, ops.radd, - operator.sub, ops.rsub]) - def test_tdi_add_sub_float(self, op, other): - dti = DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D') - tdi = dti - dti.shift(1) - with pytest.raises(TypeError): - op(tdi, other) - - def test_tdi_add_str_invalid(self): - # GH 13624 - tdi = TimedeltaIndex(['1 day', '2 days']) - - with pytest.raises(TypeError): - tdi + 'a' - with pytest.raises(TypeError): - 'a' + tdi - - @pytest.mark.parametrize('freq', [None, 'H']) - def test_tdi_sub_period(self, freq): - # GH#13078 - # not supported, check TypeError - p = pd.Period('2011-01-01', freq='D') - - idx = pd.TimedeltaIndex(['1 hours', '2 hours'], freq=freq) - - with pytest.raises(TypeError): - idx - p - - with pytest.raises(TypeError): - p - idx - - @pytest.mark.parametrize('op', [operator.add, ops.radd, - operator.sub, ops.rsub]) - @pytest.mark.parametrize('pi_freq', ['D', 'W', 'Q', 'H']) - @pytest.mark.parametrize('tdi_freq', [None, 'H']) - def test_dti_sub_pi(self, tdi_freq, pi_freq, op): - # GH#20049 subtracting PeriodIndex should raise TypeError - tdi = pd.TimedeltaIndex(['1 hours', '2 hours'], freq=tdi_freq) - dti = pd.Timestamp('2018-03-07 17:16:40') + tdi - pi = dti.to_period(pi_freq) - with pytest.raises(TypeError): - op(dti, pi) - # ------------------------------------------------------------- # TimedeltaIndex.shift is used by __add__/__sub__ @@ -626,29 +577,6 @@ def test_tdi_isub_timedeltalike(self, delta): rng -= delta tm.assert_index_equal(rng, expected) - # ------------------------------------------------------------- - # Binary operations TimedeltaIndex and datetime-like - - def test_tdi_sub_timestamp_raises(self): - idx = TimedeltaIndex(['1 day', '2 day']) - msg = "cannot subtract a datelike from a TimedeltaIndex" - with tm.assert_raises_regex(TypeError, msg): - idx - Timestamp('2011-01-01') - - def test_tdi_add_timestamp(self): - idx = TimedeltaIndex(['1 day', '2 day']) - - result = idx + Timestamp('2011-01-01') - expected = DatetimeIndex(['2011-01-02', '2011-01-03']) - tm.assert_index_equal(result, expected) - - def test_tdi_radd_timestamp(self): - idx = TimedeltaIndex(['1 day', '2 day']) - - result = Timestamp('2011-01-01') + idx - expected = DatetimeIndex(['2011-01-02', '2011-01-03']) - tm.assert_index_equal(result, expected) - # ------------------------------------------------------------- # __add__/__sub__ with ndarray[datetime64] and ndarray[timedelta64] diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py index eccd86a888fb98..7a97d4ecaa8d57 100644 --- a/pandas/tests/scalar/period/test_period.py +++ b/pandas/tests/scalar/period/test_period.py @@ -5,6 +5,7 @@ from datetime import datetime, date, timedelta import pandas as pd +from pandas import Timedelta import pandas.util.testing as tm import pandas.core.indexes.period as period from pandas.compat import text_type, iteritems @@ -274,12 +275,14 @@ def test_timestamp_tz_arg_dateutil_from_string(self): def test_timestamp_mult(self): p = pd.Period('2011-01', freq='M') - assert p.to_timestamp(how='S') == pd.Timestamp('2011-01-01') - assert p.to_timestamp(how='E') == pd.Timestamp('2011-01-31') + assert p.to_timestamp(how='S') == Timestamp('2011-01-01') + expected = Timestamp('2011-02-01') - Timedelta(1, 'ns') + assert p.to_timestamp(how='E') == expected p = pd.Period('2011-01', freq='3M') - assert p.to_timestamp(how='S') == pd.Timestamp('2011-01-01') - assert p.to_timestamp(how='E') == pd.Timestamp('2011-03-31') + assert p.to_timestamp(how='S') == Timestamp('2011-01-01') + expected = Timestamp('2011-04-01') - Timedelta(1, 'ns') + assert p.to_timestamp(how='E') == expected def test_construction(self): i1 = Period('1/1/2005', freq='M') @@ -611,19 +614,19 @@ def _ex(p): p = Period('1985', freq='A') result = p.to_timestamp('H', how='end') - expected = datetime(1985, 12, 31, 23) + expected = Timestamp(1986, 1, 1) - Timedelta(1, 'ns') assert result == expected result = p.to_timestamp('3H', how='end') assert result == expected result = p.to_timestamp('T', how='end') - expected = datetime(1985, 12, 31, 23, 59) + expected = Timestamp(1986, 1, 1) - Timedelta(1, 'ns') assert result == expected result = p.to_timestamp('2T', how='end') assert result == expected result = p.to_timestamp(how='end') - expected = datetime(1985, 12, 31) + expected = Timestamp(1986, 1, 1) - Timedelta(1, 'ns') assert result == expected expected = datetime(1985, 1, 1) @@ -1038,9 +1041,10 @@ def test_add_raises(self): dt1 + dt2 boxes = [lambda x: x, lambda x: pd.Series([x]), lambda x: pd.Index([x])] + ids = ['identity', 'Series', 'Index'] - @pytest.mark.parametrize('lbox', boxes) - @pytest.mark.parametrize('rbox', boxes) + @pytest.mark.parametrize('lbox', boxes, ids=ids) + @pytest.mark.parametrize('rbox', boxes, ids=ids) def test_add_timestamp_raises(self, rbox, lbox): # GH # 17983 ts = pd.Timestamp('2017') diff --git a/pandas/tests/series/test_alter_axes.py b/pandas/tests/series/test_alter_axes.py index 840c80d6775a55..ed3191cf849c0d 100644 --- a/pandas/tests/series/test_alter_axes.py +++ b/pandas/tests/series/test_alter_axes.py @@ -237,6 +237,23 @@ def test_rename_axis_inplace(self): assert no_return is None assert_series_equal(result, expected) + def test_set_axis_inplace_axes(self, axis_series): + # GH14636 + ser = Series(np.arange(4), index=[1, 3, 5, 7], dtype='int64') + + expected = ser.copy() + expected.index = list('abcd') + + # inplace=True + # The FutureWarning comes from the fact that we would like to have + # inplace default to False some day + for inplace, warn in [(None, FutureWarning), (True, None)]: + result = ser.copy() + kwargs = {'inplace': inplace} + with tm.assert_produces_warning(warn): + result.set_axis(list('abcd'), axis=axis_series, **kwargs) + tm.assert_series_equal(result, expected) + def test_set_axis_inplace(self): # GH14636 @@ -245,17 +262,6 @@ def test_set_axis_inplace(self): expected = s.copy() expected.index = list('abcd') - for axis in 0, 'index': - # inplace=True - # The FutureWarning comes from the fact that we would like to have - # inplace default to False some day - for inplace, warn in (None, FutureWarning), (True, None): - result = s.copy() - kwargs = {'inplace': inplace} - with tm.assert_produces_warning(warn): - result.set_axis(list('abcd'), axis=axis, **kwargs) - tm.assert_series_equal(result, expected) - # inplace=False result = s.set_axis(list('abcd'), axis=0, inplace=False) tm.assert_series_equal(expected, result) @@ -266,7 +272,7 @@ def test_set_axis_inplace(self): tm.assert_series_equal(result, expected) # wrong values for the "axis" parameter - for axis in 2, 'foo': + for axis in [2, 'foo']: with tm.assert_raises_regex(ValueError, 'No axis named'): s.set_axis(list('abcd'), axis=axis, inplace=False) @@ -276,7 +282,7 @@ def test_set_axis_prior_to_deprecation_signature(self): expected = s.copy() expected.index = list('abcd') - for axis in 0, 'index': + for axis in [0, 'index']: with tm.assert_produces_warning(FutureWarning): result = s.set_axis(0, list('abcd'), inplace=False) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index 2571498ca802ce..c091df63fcfc7c 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -6,21 +6,13 @@ import numpy as np import pytest -from pandas import Series, Timestamp, Timedelta, Period, NaT +from pandas import Series, Timestamp, Period from pandas._libs.tslibs.period import IncompatibleFrequency import pandas as pd import pandas.util.testing as tm -@pytest.fixture -def tdser(): - """ - Return a Series with dtype='timedelta64[ns]', including a NaT. - """ - return Series(['59 Days', '59 Days', 'NaT'], dtype='timedelta64[ns]') - - # ------------------------------------------------------------------ # Comparisons @@ -552,342 +544,3 @@ def test_dt64ser_sub_datetime_dtype(self): ser = Series([ts]) result = pd.to_timedelta(np.abs(ser - dt)) assert result.dtype == 'timedelta64[ns]' - - -class TestTimedeltaSeriesAdditionSubtraction(object): - # Tests for Series[timedelta64[ns]] __add__, __sub__, __radd__, __rsub__ - - # ------------------------------------------------------------------ - # Operations with int-like others - - def test_td64series_add_int_series_invalid(self, tdser): - with pytest.raises(TypeError): - tdser + Series([2, 3, 4]) - - @pytest.mark.xfail(reason='GH#19123 integer interpreted as nanoseconds') - def test_td64series_radd_int_series_invalid(self, tdser): - with pytest.raises(TypeError): - Series([2, 3, 4]) + tdser - - def test_td64series_sub_int_series_invalid(self, tdser): - with pytest.raises(TypeError): - tdser - Series([2, 3, 4]) - - @pytest.mark.xfail(reason='GH#19123 integer interpreted as nanoseconds') - def test_td64series_rsub_int_series_invalid(self, tdser): - with pytest.raises(TypeError): - Series([2, 3, 4]) - tdser - - def test_td64_series_add_intlike(self): - # GH#19123 - tdi = pd.TimedeltaIndex(['59 days', '59 days', 'NaT']) - ser = Series(tdi) - - other = Series([20, 30, 40], dtype='uint8') - - pytest.raises(TypeError, ser.__add__, 1) - pytest.raises(TypeError, ser.__sub__, 1) - - pytest.raises(TypeError, ser.__add__, other) - pytest.raises(TypeError, ser.__sub__, other) - - pytest.raises(TypeError, ser.__add__, other.values) - pytest.raises(TypeError, ser.__sub__, other.values) - - pytest.raises(TypeError, ser.__add__, pd.Index(other)) - pytest.raises(TypeError, ser.__sub__, pd.Index(other)) - - @pytest.mark.parametrize('scalar', [1, 1.5, np.array(2)]) - def test_td64series_add_sub_numeric_scalar_invalid(self, scalar, tdser): - with pytest.raises(TypeError): - tdser + scalar - with pytest.raises(TypeError): - scalar + tdser - with pytest.raises(TypeError): - tdser - scalar - with pytest.raises(TypeError): - scalar - tdser - - @pytest.mark.parametrize('dtype', ['int64', 'int32', 'int16', - 'uint64', 'uint32', 'uint16', 'uint8', - 'float64', 'float32', 'float16']) - @pytest.mark.parametrize('vector', [ - np.array([1, 2, 3]), - pd.Index([1, 2, 3]), - pytest.param(Series([1, 2, 3]), - marks=pytest.mark.xfail(reason='GH#19123 integer ' - 'interpreted as nanos')) - ]) - def test_td64series_add_sub_numeric_array_invalid(self, vector, - dtype, tdser): - vector = vector.astype(dtype) - with pytest.raises(TypeError): - tdser + vector - with pytest.raises(TypeError): - vector + tdser - with pytest.raises(TypeError): - tdser - vector - with pytest.raises(TypeError): - vector - tdser - - # ------------------------------------------------------------------ - # Operations with datetime-like others - - def test_td64series_add_sub_timestamp(self): - # GH#11925 - tdser = Series(pd.timedelta_range('1 day', periods=3)) - ts = Timestamp('2012-01-01') - expected = Series(pd.date_range('2012-01-02', periods=3)) - tm.assert_series_equal(ts + tdser, expected) - tm.assert_series_equal(tdser + ts, expected) - - expected2 = Series(pd.date_range('2011-12-31', periods=3, freq='-1D')) - tm.assert_series_equal(ts - tdser, expected2) - tm.assert_series_equal(ts + (-tdser), expected2) - - with pytest.raises(TypeError): - tdser - ts - - # ------------------------------------------------------------------ - # Operations with timedelta-like others (including DateOffsets) - - @pytest.mark.parametrize('names', [(None, None, None), - ('Egon', 'Venkman', None), - ('NCC1701D', 'NCC1701D', 'NCC1701D')]) - def test_td64_series_with_tdi(self, names): - # GH#17250 make sure result dtype is correct - # GH#19043 make sure names are propagated correctly - tdi = pd.TimedeltaIndex(['0 days', '1 day'], name=names[0]) - ser = Series([Timedelta(hours=3), Timedelta(hours=4)], name=names[1]) - expected = Series([Timedelta(hours=3), Timedelta(days=1, hours=4)], - name=names[2]) - - result = tdi + ser - tm.assert_series_equal(result, expected) - assert result.dtype == 'timedelta64[ns]' - - result = ser + tdi - tm.assert_series_equal(result, expected) - assert result.dtype == 'timedelta64[ns]' - - expected = Series([Timedelta(hours=-3), Timedelta(days=1, hours=-4)], - name=names[2]) - - result = tdi - ser - tm.assert_series_equal(result, expected) - assert result.dtype == 'timedelta64[ns]' - - result = ser - tdi - tm.assert_series_equal(result, -expected) - assert result.dtype == 'timedelta64[ns]' - - def test_td64_sub_NaT(self): - # GH#18808 - ser = Series([NaT, Timedelta('1s')]) - res = ser - NaT - expected = Series([NaT, NaT], dtype='timedelta64[ns]') - tm.assert_series_equal(res, expected) - - -class TestTimedeltaSeriesMultiplicationDivision(object): - # Tests for Series[timedelta64[ns]] - # __mul__, __rmul__, __div__, __rdiv__, __floordiv__, __rfloordiv__ - - # ------------------------------------------------------------------ - # __floordiv__, __rfloordiv__ - - @pytest.mark.parametrize('scalar_td', [ - timedelta(minutes=5, seconds=4), - Timedelta('5m4s'), - Timedelta('5m4s').to_timedelta64()]) - def test_timedelta_floordiv(self, scalar_td): - # GH#18831 - td1 = Series([timedelta(minutes=5, seconds=3)] * 3) - td1.iloc[2] = np.nan - - result = td1 // scalar_td - expected = Series([0, 0, np.nan]) - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize('scalar_td', [ - timedelta(minutes=5, seconds=4), - Timedelta('5m4s'), - Timedelta('5m4s').to_timedelta64()]) - def test_timedelta_rfloordiv(self, scalar_td): - # GH#18831 - td1 = Series([timedelta(minutes=5, seconds=3)] * 3) - td1.iloc[2] = np.nan - result = scalar_td // td1 - expected = Series([1, 1, np.nan]) - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize('scalar_td', [ - timedelta(minutes=5, seconds=4), - Timedelta('5m4s'), - Timedelta('5m4s').to_timedelta64()]) - def test_timedelta_rfloordiv_explicit(self, scalar_td): - # GH#18831 - td1 = Series([timedelta(minutes=5, seconds=3)] * 3) - td1.iloc[2] = np.nan - - # We can test __rfloordiv__ using this syntax, - # see `test_timedelta_rfloordiv` - result = td1.__rfloordiv__(scalar_td) - expected = Series([1, 1, np.nan]) - tm.assert_series_equal(result, expected) - - # ------------------------------------------------------------------ - # Operations with int-like others - - @pytest.mark.parametrize('dtype', ['int64', 'int32', 'int16', - 'uint64', 'uint32', 'uint16', 'uint8', - 'float64', 'float32', 'float16']) - @pytest.mark.parametrize('vector', [np.array([20, 30, 40]), - pd.Index([20, 30, 40]), - Series([20, 30, 40])]) - def test_td64series_div_numeric_array(self, vector, dtype, tdser): - # GH#4521 - # divide/multiply by integers - vector = vector.astype(dtype) - expected = Series(['2.95D', '1D 23H 12m', 'NaT'], - dtype='timedelta64[ns]') - - result = tdser / vector - tm.assert_series_equal(result, expected) - - with pytest.raises(TypeError): - vector / tdser - - @pytest.mark.parametrize('dtype', ['int64', 'int32', 'int16', - 'uint64', 'uint32', 'uint16', 'uint8', - 'float64', 'float32', 'float16']) - @pytest.mark.parametrize('vector', [np.array([20, 30, 40]), - pd.Index([20, 30, 40]), - Series([20, 30, 40])]) - def test_td64series_mul_numeric_array(self, vector, dtype, tdser): - # GH#4521 - # divide/multiply by integers - vector = vector.astype(dtype) - - expected = Series(['1180 Days', '1770 Days', 'NaT'], - dtype='timedelta64[ns]') - - result = tdser * vector - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize('dtype', ['int64', 'int32', 'int16', - 'uint64', 'uint32', 'uint16', 'uint8', - 'float64', 'float32', 'float16']) - @pytest.mark.parametrize('vector', [ - np.array([20, 30, 40]), - pytest.param(pd.Index([20, 30, 40]), - marks=pytest.mark.xfail(reason='__mul__ raises ' - 'instead of returning ' - 'NotImplemented')), - Series([20, 30, 40]) - ]) - def test_td64series_rmul_numeric_array(self, vector, dtype, tdser): - # GH#4521 - # divide/multiply by integers - vector = vector.astype(dtype) - - expected = Series(['1180 Days', '1770 Days', 'NaT'], - dtype='timedelta64[ns]') - - result = vector * tdser - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize('one', [1, np.array(1), 1.0, np.array(1.0)]) - def test_td64series_mul_numeric_scalar(self, one, tdser): - # GH#4521 - # divide/multiply by integers - expected = Series(['-59 Days', '-59 Days', 'NaT'], - dtype='timedelta64[ns]') - - result = tdser * (-one) - tm.assert_series_equal(result, expected) - result = (-one) * tdser - tm.assert_series_equal(result, expected) - - expected = Series(['118 Days', '118 Days', 'NaT'], - dtype='timedelta64[ns]') - - result = tdser * (2 * one) - tm.assert_series_equal(result, expected) - result = (2 * one) * tdser - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize('two', [ - 2, 2.0, - pytest.param(np.array(2), - marks=pytest.mark.xfail(reason='GH#19011 is_list_like ' - 'incorrectly True.')), - pytest.param(np.array(2.0), - marks=pytest.mark.xfail(reason='GH#19011 is_list_like ' - 'incorrectly True.')), - ]) - def test_td64series_div_numeric_scalar(self, two, tdser): - # GH#4521 - # divide/multiply by integers - expected = Series(['29.5D', '29.5D', 'NaT'], dtype='timedelta64[ns]') - - result = tdser / two - tm.assert_series_equal(result, expected) - - # ------------------------------------------------------------------ - # Operations with timedelta-like others - - @pytest.mark.parametrize('names', [(None, None, None), - ('Egon', 'Venkman', None), - ('NCC1701D', 'NCC1701D', 'NCC1701D')]) - def test_tdi_mul_int_series(self, names): - # GH#19042 - tdi = pd.TimedeltaIndex(['0days', '1day', '2days', '3days', '4days'], - name=names[0]) - ser = Series([0, 1, 2, 3, 4], dtype=np.int64, name=names[1]) - - expected = Series(['0days', '1day', '4days', '9days', '16days'], - dtype='timedelta64[ns]', - name=names[2]) - - result = ser * tdi - tm.assert_series_equal(result, expected) - - # The direct operation tdi * ser still needs to be fixed. - result = ser.__rmul__(tdi) - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize('names', [(None, None, None), - ('Egon', 'Venkman', None), - ('NCC1701D', 'NCC1701D', 'NCC1701D')]) - def test_float_series_rdiv_tdi(self, names): - # GH#19042 - # TODO: the direct operation TimedeltaIndex / Series still - # needs to be fixed. - tdi = pd.TimedeltaIndex(['0days', '1day', '2days', '3days', '4days'], - name=names[0]) - ser = Series([1.5, 3, 4.5, 6, 7.5], dtype=np.float64, name=names[1]) - - expected = Series([tdi[n] / ser[n] for n in range(len(ser))], - dtype='timedelta64[ns]', - name=names[2]) - - result = ser.__rdiv__(tdi) - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize('scalar_td', [ - timedelta(minutes=5, seconds=4), - Timedelta('5m4s'), - Timedelta('5m4s').to_timedelta64()]) - def test_td64series_mul_timedeltalike_invalid(self, scalar_td): - td1 = Series([timedelta(minutes=5, seconds=3)] * 3) - td1.iloc[2] = np.nan - - # check that we are getting a TypeError - # with 'operate' (from core/ops.py) for the ops that are not - # defined - pattern = 'operate|unsupported|cannot|not supported' - with tm.assert_raises_regex(TypeError, pattern): - td1 * scalar_td - with tm.assert_raises_regex(TypeError, pattern): - scalar_td * td1 diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index 47798d0ddd7f5e..7a02ce3a1fb2e7 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -255,12 +255,9 @@ def get_dir(s): # trying to set a copy with pd.option_context('chained_assignment', 'raise'): - - def f(): + with pytest.raises(com.SettingWithCopyError): s.dt.hour[0] = 5 - pytest.raises(com.SettingWithCopyError, f) - def test_dt_namespace_accessor_categorical(self): # GH 19468 dti = DatetimeIndex(['20171111', '20181212']).repeat(2) @@ -420,12 +417,14 @@ def test_dt_accessor_api(self): s = Series(date_range('2000-01-01', periods=3)) assert isinstance(s.dt, DatetimeProperties) - for s in [Series(np.arange(5)), Series(list('abcde')), - Series(np.random.randn(5))]: - with tm.assert_raises_regex(AttributeError, - "only use .dt accessor"): - s.dt - assert not hasattr(s, 'dt') + @pytest.mark.parametrize('ser', [Series(np.arange(5)), + Series(list('abcde')), + Series(np.random.randn(5))]) + def test_dt_accessor_invalid(self, ser): + # GH#9322 check that series with incorrect dtypes don't have attr + with tm.assert_raises_regex(AttributeError, "only use .dt accessor"): + ser.dt + assert not hasattr(ser, 'dt') def test_between(self): s = Series(bdate_range('1/1/2000', periods=20).astype(object)) diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index ecb74622edf10d..fad2b025dd3e42 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -136,10 +136,14 @@ def test_categorical_comparisons(self): assert ((~(f == a) == (f != a)).all()) # non-equality is not comparable - pytest.raises(TypeError, lambda: a < b) - pytest.raises(TypeError, lambda: b < a) - pytest.raises(TypeError, lambda: a > b) - pytest.raises(TypeError, lambda: b > a) + with pytest.raises(TypeError): + a < b + with pytest.raises(TypeError): + b < a + with pytest.raises(TypeError): + a > b + with pytest.raises(TypeError): + b > a def test_comparison_tuples(self): # GH11339 @@ -204,20 +208,21 @@ def test_comparison_operators_with_nas(self): # expected = f(val, s.dropna()).reindex(s.index) # assert_series_equal(result, expected) - # boolean &, |, ^ should work with object arrays and propagate NAs + @pytest.mark.parametrize('bool_op', [operator.and_, + operator.or_, operator.xor]) + def test_bool_operators_with_nas(self, bool_op): + # boolean &, |, ^ should work with object arrays and propagate NAs + ser = Series(bdate_range('1/1/2000', periods=10), dtype=object) + ser[::2] = np.nan - ops = ['and_', 'or_', 'xor'] mask = ser.isna() - for bool_op in ops: - func = getattr(operator, bool_op) - - filled = ser.fillna(ser[0]) + filled = ser.fillna(ser[0]) - result = func(ser < ser[9], ser > ser[3]) + result = bool_op(ser < ser[9], ser > ser[3]) - expected = func(filled < filled[9], filled > filled[3]) - expected[mask] = False - assert_series_equal(result, expected) + expected = bool_op(filled < filled[9], filled > filled[3]) + expected[mask] = False + assert_series_equal(result, expected) def test_comparison_object_numeric_nas(self): ser = Series(np.random.randn(10), dtype=object) @@ -248,27 +253,26 @@ def test_comparison_invalid(self): def test_unequal_categorical_comparison_raises_type_error(self): # unequal comparison should raise for unordered cats cat = Series(Categorical(list("abc"))) - - def f(): + with pytest.raises(TypeError): cat > "b" - pytest.raises(TypeError, f) cat = Series(Categorical(list("abc"), ordered=False)) - - def f(): + with pytest.raises(TypeError): cat > "b" - pytest.raises(TypeError, f) - # https://github.com/pandas-dev/pandas/issues/9836#issuecomment-92123057 # and following comparisons with scalars not in categories should raise # for unequal comps, but not for equal/not equal cat = Series(Categorical(list("abc"), ordered=True)) - pytest.raises(TypeError, lambda: cat < "d") - pytest.raises(TypeError, lambda: cat > "d") - pytest.raises(TypeError, lambda: "d" < cat) - pytest.raises(TypeError, lambda: "d" > cat) + with pytest.raises(TypeError): + cat < "d" + with pytest.raises(TypeError): + cat > "d" + with pytest.raises(TypeError): + "d" < cat + with pytest.raises(TypeError): + "d" > cat tm.assert_series_equal(cat == "d", Series([False, False, False])) tm.assert_series_equal(cat != "d", Series([True, True, True])) @@ -365,11 +369,13 @@ def test_nat_comparisons_scalar(self, dtype, data): def test_comparison_different_length(self): a = Series(['a', 'b', 'c']) b = Series(['b', 'a']) - pytest.raises(ValueError, a.__lt__, b) + with pytest.raises(ValueError): + a < b a = Series([1, 2]) b = Series([2, 3, 4]) - pytest.raises(ValueError, a.__eq__, b) + with pytest.raises(ValueError): + a == b def test_comparison_label_based(self): @@ -448,7 +454,8 @@ def test_comparison_label_based(self): assert_series_equal(result, expected) for v in [np.nan, 'foo']: - pytest.raises(TypeError, lambda: t | v) + with pytest.raises(TypeError): + t | v for v in [False, 0]: result = Series([True, False, True], index=index) | v @@ -465,7 +472,8 @@ def test_comparison_label_based(self): expected = Series([False, False, False], index=index) assert_series_equal(result, expected) for v in [np.nan]: - pytest.raises(TypeError, lambda: t & v) + with pytest.raises(TypeError): + t & v def test_comparison_flex_basic(self): left = pd.Series(np.random.randn(10)) @@ -930,12 +938,14 @@ def test_operators_datetimelike_with_timezones(self): result = dt1 - td1[0] exp = (dt1.dt.tz_localize(None) - td1[0]).dt.tz_localize(tz) assert_series_equal(result, exp) - pytest.raises(TypeError, lambda: td1[0] - dt1) + with pytest.raises(TypeError): + td1[0] - dt1 result = dt2 - td2[0] exp = (dt2.dt.tz_localize(None) - td2[0]).dt.tz_localize(tz) assert_series_equal(result, exp) - pytest.raises(TypeError, lambda: td2[0] - dt2) + with pytest.raises(TypeError): + td2[0] - dt2 result = dt1 + td1 exp = (dt1.dt.tz_localize(None) + td1).dt.tz_localize(tz) @@ -953,8 +963,10 @@ def test_operators_datetimelike_with_timezones(self): exp = (dt2.dt.tz_localize(None) - td2).dt.tz_localize(tz) assert_series_equal(result, exp) - pytest.raises(TypeError, lambda: td1 - dt1) - pytest.raises(TypeError, lambda: td2 - dt2) + with pytest.raises(TypeError): + td1 - dt1 + with pytest.raises(TypeError): + td2 - dt2 def test_sub_single_tz(self): # GH12290 @@ -1483,11 +1495,16 @@ def test_operators_bitwise(self): expected = Series([1, 1, 3, 3], dtype='int32') assert_series_equal(res, expected) - pytest.raises(TypeError, lambda: s_1111 & 'a') - pytest.raises(TypeError, lambda: s_1111 & ['a', 'b', 'c', 'd']) - pytest.raises(TypeError, lambda: s_0123 & np.NaN) - pytest.raises(TypeError, lambda: s_0123 & 3.14) - pytest.raises(TypeError, lambda: s_0123 & [0.1, 4, 3.14, 2]) + with pytest.raises(TypeError): + s_1111 & 'a' + with pytest.raises(TypeError): + s_1111 & ['a', 'b', 'c', 'd'] + with pytest.raises(TypeError): + s_0123 & np.NaN + with pytest.raises(TypeError): + s_0123 & 3.14 + with pytest.raises(TypeError): + s_0123 & [0.1, 4, 3.14, 2] # s_0123 will be all false now because of reindexing like s_tft if compat.PY3: @@ -1530,14 +1547,16 @@ def test_scalar_na_cmp_corners(self): def tester(a, b): return a & b - pytest.raises(TypeError, tester, s, datetime(2005, 1, 1)) + with pytest.raises(TypeError): + s & datetime(2005, 1, 1) s = Series([2, 3, 4, 5, 6, 7, 8, 9, datetime(2005, 1, 1)]) s[::2] = np.nan expected = Series(True, index=s.index) expected[::2] = False - assert_series_equal(tester(s, list(s)), expected) + result = s & list(s) + assert_series_equal(result, expected) d = DataFrame({'A': s}) # TODO: Fix this exception - needs to be fixed! (see GH5035) @@ -1587,7 +1606,25 @@ def test_operators_reverse_object(self, op): expected = op(1., arr.astype(float)) assert_series_equal(result.astype(float), expected) - def test_operators_combine(self): + pairings = [] + for op in ['add', 'sub', 'mul', 'pow', 'truediv', 'floordiv']: + fv = 0 + lop = getattr(Series, op) + lequiv = getattr(operator, op) + rop = getattr(Series, 'r' + op) + # bind op at definition time... + requiv = lambda x, y, op=op: getattr(operator, op)(y, x) + pairings.append((lop, lequiv, fv)) + pairings.append((rop, requiv, fv)) + if compat.PY3: + pairings.append((Series.div, operator.truediv, 1)) + pairings.append((Series.rdiv, lambda x, y: operator.truediv(y, x), 1)) + else: + pairings.append((Series.div, operator.div, 1)) + pairings.append((Series.rdiv, lambda x, y: operator.div(y, x), 1)) + + @pytest.mark.parametrize('op, equiv_op, fv', pairings) + def test_operators_combine(self, op, equiv_op, fv): def _check_fill(meth, op, a, b, fill_value=0): exp_index = a.index.union(b.index) a = a.reindex(exp_index) @@ -1619,32 +1656,12 @@ def _check_fill(meth, op, a, b, fill_value=0): a = Series([nan, 1., 2., 3., nan], index=np.arange(5)) b = Series([nan, 1, nan, 3, nan, 4.], index=np.arange(6)) - pairings = [] - for op in ['add', 'sub', 'mul', 'pow', 'truediv', 'floordiv']: - fv = 0 - lop = getattr(Series, op) - lequiv = getattr(operator, op) - rop = getattr(Series, 'r' + op) - # bind op at definition time... - requiv = lambda x, y, op=op: getattr(operator, op)(y, x) - pairings.append((lop, lequiv, fv)) - pairings.append((rop, requiv, fv)) - - if compat.PY3: - pairings.append((Series.div, operator.truediv, 1)) - pairings.append((Series.rdiv, lambda x, y: operator.truediv(y, x), - 1)) - else: - pairings.append((Series.div, operator.div, 1)) - pairings.append((Series.rdiv, lambda x, y: operator.div(y, x), 1)) - - for op, equiv_op, fv in pairings: - result = op(a, b) - exp = equiv_op(a, b) - assert_series_equal(result, exp) - _check_fill(op, equiv_op, a, b, fill_value=fv) - # should accept axis=0 or axis='rows' - op(a, b, axis=0) + result = op(a, b) + exp = equiv_op(a, b) + assert_series_equal(result, exp) + _check_fill(op, equiv_op, a, b, fill_value=fv) + # should accept axis=0 or axis='rows' + op(a, b, axis=0) def test_operators_na_handling(self): from decimal import Decimal diff --git a/pandas/tests/series/test_period.py b/pandas/tests/series/test_period.py index 63726f27914f3d..24c2f30bef5692 100644 --- a/pandas/tests/series/test_period.py +++ b/pandas/tests/series/test_period.py @@ -1,9 +1,10 @@ import numpy as np +import pytest import pandas as pd import pandas.util.testing as tm import pandas.core.indexes.period as period -from pandas import Series, period_range, DataFrame +from pandas import Series, period_range, DataFrame, Period def _permute(obj): @@ -72,22 +73,23 @@ def test_between(self): # --------------------------------------------------------------------- # NaT support - """ - # ToDo: Enable when support period dtype + @pytest.mark.xfail(reason="PeriodDtype Series not supported yet", + strict=True) def test_NaT_scalar(self): - series = Series([0, 1000, 2000, iNaT], dtype='period[D]') + series = Series([0, 1000, 2000, pd._libs.iNaT], dtype='period[D]') val = series[3] - assert isna(val) + assert pd.isna(val) series[2] = val - assert isna(series[2]) + assert pd.isna(series[2]) + @pytest.mark.xfail(reason="PeriodDtype Series not supported yet", + strict=True) def test_NaT_cast(self): result = Series([np.nan]).astype('period[D]') - expected = Series([NaT]) + expected = Series([pd.NaT]) tm.assert_series_equal(result, expected) - """ def test_set_none_nan(self): # currently Period is stored as object dtype, not as NaT @@ -167,3 +169,23 @@ def test_truncate(self): pd.Period('2017-09-02') ]) tm.assert_series_equal(result2, pd.Series([2], index=expected_idx2)) + + @pytest.mark.parametrize('input_vals', [ + [Period('2016-01', freq='M'), Period('2016-02', freq='M')], + [Period('2016-01-01', freq='D'), Period('2016-01-02', freq='D')], + [Period('2016-01-01 00:00:00', freq='H'), + Period('2016-01-01 01:00:00', freq='H')], + [Period('2016-01-01 00:00:00', freq='M'), + Period('2016-01-01 00:01:00', freq='M')], + [Period('2016-01-01 00:00:00', freq='S'), + Period('2016-01-01 00:00:01', freq='S')] + ]) + def test_end_time_timevalues(self, input_vals): + # GH 17157 + # Check that the time part of the Period is adjusted by end_time + # when using the dt accessor on a Series + + s = Series(input_vals) + result = s.dt.end_time + expected = s.apply(lambda x: x.end_time) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/test_quantile.py b/pandas/tests/series/test_quantile.py index 3c93ff1d3f31eb..df8799cf5c9007 100644 --- a/pandas/tests/series/test_quantile.py +++ b/pandas/tests/series/test_quantile.py @@ -1,6 +1,8 @@ # coding=utf-8 # pylint: disable-msg=E1101,W0612 +import pytest + import numpy as np import pandas as pd @@ -113,31 +115,30 @@ def test_quantile_nan(self): tm.assert_series_equal(res, pd.Series([np.nan, np.nan], index=[0.2, 0.3])) - def test_quantile_box(self): - cases = [[pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02'), - pd.Timestamp('2011-01-03')], - [pd.Timestamp('2011-01-01', tz='US/Eastern'), - pd.Timestamp('2011-01-02', tz='US/Eastern'), - pd.Timestamp('2011-01-03', tz='US/Eastern')], - [pd.Timedelta('1 days'), pd.Timedelta('2 days'), - pd.Timedelta('3 days')], - # NaT - [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02'), - pd.Timestamp('2011-01-03'), pd.NaT], - [pd.Timestamp('2011-01-01', tz='US/Eastern'), - pd.Timestamp('2011-01-02', tz='US/Eastern'), - pd.Timestamp('2011-01-03', tz='US/Eastern'), pd.NaT], - [pd.Timedelta('1 days'), pd.Timedelta('2 days'), - pd.Timedelta('3 days'), pd.NaT]] - - for case in cases: - s = pd.Series(case, name='XXX') - res = s.quantile(0.5) - assert res == case[1] + @pytest.mark.parametrize('case', [ + [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02'), + pd.Timestamp('2011-01-03')], + [pd.Timestamp('2011-01-01', tz='US/Eastern'), + pd.Timestamp('2011-01-02', tz='US/Eastern'), + pd.Timestamp('2011-01-03', tz='US/Eastern')], + [pd.Timedelta('1 days'), pd.Timedelta('2 days'), + pd.Timedelta('3 days')], + # NaT + [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02'), + pd.Timestamp('2011-01-03'), pd.NaT], + [pd.Timestamp('2011-01-01', tz='US/Eastern'), + pd.Timestamp('2011-01-02', tz='US/Eastern'), + pd.Timestamp('2011-01-03', tz='US/Eastern'), pd.NaT], + [pd.Timedelta('1 days'), pd.Timedelta('2 days'), + pd.Timedelta('3 days'), pd.NaT]]) + def test_quantile_box(self, case): + s = pd.Series(case, name='XXX') + res = s.quantile(0.5) + assert res == case[1] - res = s.quantile([0.5]) - exp = pd.Series([case[1]], index=[0.5], name='XXX') - tm.assert_series_equal(res, exp) + res = s.quantile([0.5]) + exp = pd.Series([case[1]], index=[0.5], name='XXX') + tm.assert_series_equal(res, exp) def test_datetime_timedelta_quantiles(self): # covers #9694 diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 376b4d71f81e8a..72492de4b12473 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -78,7 +78,8 @@ def test_shift(self): assert_series_equal(shifted2, shifted3) assert_series_equal(ps, shifted2.shift(-1, 'B')) - pytest.raises(ValueError, ps.shift, freq='D') + with pytest.raises(ValueError): + ps.shift(freq='D') # legacy support shifted4 = ps.shift(1, freq='B') @@ -109,7 +110,8 @@ def test_shift(self): # incompat tz s2 = Series(date_range('2000-01-01 09:00:00', periods=5, tz='CET'), name='foo') - pytest.raises(TypeError, lambda: s - s2) + with pytest.raises(TypeError): + s - s2 def test_shift2(self): ts = Series(np.random.randn(5), @@ -168,7 +170,8 @@ def test_tshift(self): shifted3 = ps.tshift(freq=BDay()) assert_series_equal(shifted, shifted3) - pytest.raises(ValueError, ps.tshift, freq='M') + with pytest.raises(ValueError): + ps.tshift(freq='M') # DatetimeIndex shifted = self.ts.tshift(1) @@ -187,7 +190,8 @@ def test_tshift(self): assert_series_equal(unshifted, inferred_ts) no_freq = self.ts[[0, 5, 7]] - pytest.raises(ValueError, no_freq.tshift) + with pytest.raises(ValueError): + no_freq.tshift() def test_truncate(self): offset = BDay() @@ -459,7 +463,8 @@ def test_empty_series_ops(self): assert_series_equal(a, a + b) assert_series_equal(a, a - b) assert_series_equal(a, b + a) - pytest.raises(TypeError, lambda x, y: x - y, b, a) + with pytest.raises(TypeError): + b - a def test_contiguous_boolean_preserve_freq(self): rng = date_range('1/1/2000', '3/1/2000', freq='B') @@ -791,16 +796,19 @@ def test_between_time_raises(self): def test_between_time_types(self): # GH11818 rng = date_range('1/1/2000', '1/5/2000', freq='5min') - pytest.raises(ValueError, rng.indexer_between_time, - datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) + with pytest.raises(ValueError): + rng.indexer_between_time(datetime(2010, 1, 2, 1), + datetime(2010, 1, 2, 5)) frame = DataFrame({'A': 0}, index=rng) - pytest.raises(ValueError, frame.between_time, - datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) + with pytest.raises(ValueError): + frame.between_time(datetime(2010, 1, 2, 1), + datetime(2010, 1, 2, 5)) series = Series(0, index=rng) - pytest.raises(ValueError, series.between_time, - datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) + with pytest.raises(ValueError): + series.between_time(datetime(2010, 1, 2, 1), + datetime(2010, 1, 2, 5)) @td.skip_if_has_locale def test_between_time_formats(self): @@ -921,40 +929,40 @@ def test_pickle(self): idx_p = tm.round_trip_pickle(idx) tm.assert_index_equal(idx, idx_p) - def test_setops_preserve_freq(self): - for tz in [None, 'Asia/Tokyo', 'US/Eastern']: - rng = date_range('1/1/2000', '1/1/2002', name='idx', tz=tz) - - result = rng[:50].union(rng[50:100]) - assert result.name == rng.name - assert result.freq == rng.freq - assert result.tz == rng.tz - - result = rng[:50].union(rng[30:100]) - assert result.name == rng.name - assert result.freq == rng.freq - assert result.tz == rng.tz - - result = rng[:50].union(rng[60:100]) - assert result.name == rng.name - assert result.freq is None - assert result.tz == rng.tz - - result = rng[:50].intersection(rng[25:75]) - assert result.name == rng.name - assert result.freqstr == 'D' - assert result.tz == rng.tz - - nofreq = DatetimeIndex(list(rng[25:75]), name='other') - result = rng[:50].union(nofreq) - assert result.name is None - assert result.freq == rng.freq - assert result.tz == rng.tz - - result = rng[:50].intersection(nofreq) - assert result.name is None - assert result.freq == rng.freq - assert result.tz == rng.tz + @pytest.mark.parametrize('tz', [None, 'Asia/Tokyo', 'US/Eastern']) + def test_setops_preserve_freq(self, tz): + rng = date_range('1/1/2000', '1/1/2002', name='idx', tz=tz) + + result = rng[:50].union(rng[50:100]) + assert result.name == rng.name + assert result.freq == rng.freq + assert result.tz == rng.tz + + result = rng[:50].union(rng[30:100]) + assert result.name == rng.name + assert result.freq == rng.freq + assert result.tz == rng.tz + + result = rng[:50].union(rng[60:100]) + assert result.name == rng.name + assert result.freq is None + assert result.tz == rng.tz + + result = rng[:50].intersection(rng[25:75]) + assert result.name == rng.name + assert result.freqstr == 'D' + assert result.tz == rng.tz + + nofreq = DatetimeIndex(list(rng[25:75]), name='other') + result = rng[:50].union(nofreq) + assert result.name is None + assert result.freq == rng.freq + assert result.tz == rng.tz + + result = rng[:50].intersection(nofreq) + assert result.name is None + assert result.freq == rng.freq + assert result.tz == rng.tz def test_min_max(self): rng = date_range('1/1/2000', '12/31/2000') diff --git a/pandas/tests/test_arithmetic.py b/pandas/tests/test_arithmetic.py index f15b629f15ae38..8ee0bf9ec874ad 100644 --- a/pandas/tests/test_arithmetic.py +++ b/pandas/tests/test_arithmetic.py @@ -2,6 +2,7 @@ # Arithmetc tests for DataFrame/Series/Index/Array classes that should # behave identically. from datetime import timedelta +import operator import pytest import numpy as np @@ -9,7 +10,22 @@ import pandas as pd import pandas.util.testing as tm -from pandas import Timedelta +from pandas.core import ops +from pandas.errors import NullFrequencyError +from pandas._libs.tslibs import IncompatibleFrequency +from pandas import ( + Timedelta, Timestamp, NaT, Series, TimedeltaIndex, DatetimeIndex) + + +# ------------------------------------------------------------------ +# Fixtures + +@pytest.fixture +def tdser(): + """ + Return a Series with dtype='timedelta64[ns]', including a NaT. + """ + return Series(['59 Days', '59 Days', 'NaT'], dtype='timedelta64[ns]') # ------------------------------------------------------------------ @@ -19,7 +35,7 @@ class TestNumericArraylikeArithmeticWithTimedeltaScalar(object): @pytest.mark.parametrize('box', [ pd.Index, - pd.Series, + Series, pytest.param(pd.DataFrame, marks=pytest.mark.xfail(reason="block.eval incorrect", strict=True)) @@ -35,10 +51,10 @@ class TestNumericArraylikeArithmeticWithTimedeltaScalar(object): Timedelta(days=1).to_timedelta64(), Timedelta(days=1).to_pytimedelta()], ids=lambda x: type(x).__name__) - def test_index_mul_timedelta(self, scalar_td, index, box): + def test_numeric_arr_mul_tdscalar(self, scalar_td, index, box): # GH#19333 - if (box is pd.Series and + if (box is Series and type(scalar_td) is timedelta and index.dtype == 'f8'): raise pytest.xfail(reason="Cannot multiply timedelta by float") @@ -53,7 +69,7 @@ def test_index_mul_timedelta(self, scalar_td, index, box): commute = scalar_td * index tm.assert_equal(commute, expected) - @pytest.mark.parametrize('box', [pd.Index, pd.Series, pd.DataFrame]) + @pytest.mark.parametrize('box', [pd.Index, Series, pd.DataFrame]) @pytest.mark.parametrize('index', [ pd.Int64Index(range(1, 3)), pd.UInt64Index(range(1, 3)), @@ -65,14 +81,14 @@ def test_index_mul_timedelta(self, scalar_td, index, box): Timedelta(days=1).to_timedelta64(), Timedelta(days=1).to_pytimedelta()], ids=lambda x: type(x).__name__) - def test_index_rdiv_timedelta(self, scalar_td, index, box): + def test_numeric_arr_rdiv_tdscalar(self, scalar_td, index, box): - if box is pd.Series and type(scalar_td) is timedelta: + if box is Series and type(scalar_td) is timedelta: raise pytest.xfail(reason="TODO: Figure out why this case fails") if box is pd.DataFrame and isinstance(scalar_td, timedelta): raise pytest.xfail(reason="TODO: Figure out why this case fails") - expected = pd.TimedeltaIndex(['1 Day', '12 Hours']) + expected = TimedeltaIndex(['1 Day', '12 Hours']) index = tm.box_expected(index, box) expected = tm.box_expected(expected, box) @@ -87,12 +103,727 @@ def test_index_rdiv_timedelta(self, scalar_td, index, box): # ------------------------------------------------------------------ # Timedelta64[ns] dtype Arithmetic Operations +class TestTimedeltaArraylikeAddSubOps(object): + # Tests for timedelta64[ns] __add__, __sub__, __radd__, __rsub__ + + # ------------------------------------------------------------- + # Invalid Operations + + @pytest.mark.parametrize('box', [pd.Index, Series, pd.DataFrame], + ids=lambda x: x.__name__) + def test_td64arr_add_str_invalid(self, box): + # GH#13624 + tdi = TimedeltaIndex(['1 day', '2 days']) + tdi = tm.box_expected(tdi, box) + + with pytest.raises(TypeError): + tdi + 'a' + with pytest.raises(TypeError): + 'a' + tdi + + @pytest.mark.parametrize('box', [pd.Index, Series, pd.DataFrame], + ids=lambda x: x.__name__) + @pytest.mark.parametrize('other', [3.14, np.array([2.0, 3.0])]) + @pytest.mark.parametrize('op', [operator.add, ops.radd, + operator.sub, ops.rsub], + ids=lambda x: x.__name__) + def test_td64arr_add_sub_float(self, box, op, other): + tdi = TimedeltaIndex(['-1 days', '-1 days']) + tdi = tm.box_expected(tdi, box) + + if box is pd.DataFrame and op in [operator.add, operator.sub]: + pytest.xfail(reason="Tries to align incorrectly, " + "raises ValueError") + + with pytest.raises(TypeError): + op(tdi, other) + + @pytest.mark.parametrize('box', [ + pd.Index, + Series, + pytest.param(pd.DataFrame, + marks=pytest.mark.xfail(reason="Tries to cast df to " + "Period", + strict=True, + raises=IncompatibleFrequency)) + ], ids=lambda x: x.__name__) + @pytest.mark.parametrize('freq', [None, 'H']) + def test_td64arr_sub_period(self, box, freq): + # GH#13078 + # not supported, check TypeError + p = pd.Period('2011-01-01', freq='D') + idx = TimedeltaIndex(['1 hours', '2 hours'], freq=freq) + idx = tm.box_expected(idx, box) + + with pytest.raises(TypeError): + idx - p + + with pytest.raises(TypeError): + p - idx + + @pytest.mark.parametrize('box', [ + pd.Index, + Series, + pytest.param(pd.DataFrame, + marks=pytest.mark.xfail(reason="broadcasts along " + "wrong axis", + raises=ValueError, + strict=True)) + ], ids=lambda x: x.__name__) + @pytest.mark.parametrize('pi_freq', ['D', 'W', 'Q', 'H']) + @pytest.mark.parametrize('tdi_freq', [None, 'H']) + def test_td64arr_sub_pi(self, box, tdi_freq, pi_freq): + # GH#20049 subtracting PeriodIndex should raise TypeError + tdi = TimedeltaIndex(['1 hours', '2 hours'], freq=tdi_freq) + dti = Timestamp('2018-03-07 17:16:40') + tdi + pi = dti.to_period(pi_freq) + + # TODO: parametrize over box for pi? + tdi = tm.box_expected(tdi, box) + with pytest.raises(TypeError): + tdi - pi + + # ------------------------------------------------------------- + # Binary operations td64 arraylike and datetime-like + + @pytest.mark.parametrize('box', [pd.Index, Series, pd.DataFrame], + ids=lambda x: x.__name__) + def test_td64arr_sub_timestamp_raises(self, box): + idx = TimedeltaIndex(['1 day', '2 day']) + idx = tm.box_expected(idx, box) + + msg = "cannot subtract a datelike from|Could not operate" + with tm.assert_raises_regex(TypeError, msg): + idx - Timestamp('2011-01-01') + + @pytest.mark.parametrize('box', [ + pd.Index, + Series, + pytest.param(pd.DataFrame, + marks=pytest.mark.xfail(reason="Returns object dtype", + strict=True)) + ], ids=lambda x: x.__name__) + def test_td64arr_add_timestamp(self, box): + idx = TimedeltaIndex(['1 day', '2 day']) + expected = DatetimeIndex(['2011-01-02', '2011-01-03']) + + idx = tm.box_expected(idx, box) + expected = tm.box_expected(expected, box) + + result = idx + Timestamp('2011-01-01') + tm.assert_equal(result, expected) + + @pytest.mark.parametrize('box', [ + pd.Index, + Series, + pytest.param(pd.DataFrame, + marks=pytest.mark.xfail(reason="Returns object dtype", + strict=True)) + ], ids=lambda x: x.__name__) + def test_td64_radd_timestamp(self, box): + idx = TimedeltaIndex(['1 day', '2 day']) + expected = DatetimeIndex(['2011-01-02', '2011-01-03']) + + idx = tm.box_expected(idx, box) + expected = tm.box_expected(expected, box) + + # TODO: parametrize over scalar datetime types? + result = Timestamp('2011-01-01') + idx + tm.assert_equal(result, expected) + + # ------------------------------------------------------------------ + # Operations with int-like others + + @pytest.mark.parametrize('box', [ + pd.Index, + Series, + pytest.param(pd.DataFrame, + marks=pytest.mark.xfail(reason="Attempts to broadcast " + "incorrectly", + strict=True, raises=ValueError)) + ], ids=lambda x: x.__name__) + def test_td64arr_add_int_series_invalid(self, box, tdser): + tdser = tm.box_expected(tdser, box) + err = TypeError if box is not pd.Index else NullFrequencyError + with pytest.raises(err): + tdser + Series([2, 3, 4]) + + @pytest.mark.parametrize('box', [ + pd.Index, + pytest.param(Series, + marks=pytest.mark.xfail(reason="GH#19123 integer " + "interpreted as " + "nanoseconds", + strict=True)), + pytest.param(pd.DataFrame, + marks=pytest.mark.xfail(reason="Attempts to broadcast " + "incorrectly", + strict=True, raises=ValueError)) + ], ids=lambda x: x.__name__) + def test_td64arr_radd_int_series_invalid(self, box, tdser): + tdser = tm.box_expected(tdser, box) + err = TypeError if box is not pd.Index else NullFrequencyError + with pytest.raises(err): + Series([2, 3, 4]) + tdser + + @pytest.mark.parametrize('box', [ + pd.Index, + Series, + pytest.param(pd.DataFrame, + marks=pytest.mark.xfail(reason="Attempts to broadcast " + "incorrectly", + strict=True, raises=ValueError)) + ], ids=lambda x: x.__name__) + def test_td64arr_sub_int_series_invalid(self, box, tdser): + tdser = tm.box_expected(tdser, box) + err = TypeError if box is not pd.Index else NullFrequencyError + with pytest.raises(err): + tdser - Series([2, 3, 4]) + + @pytest.mark.parametrize('box', [pd.Index, Series, pd.DataFrame], + ids=lambda x: x.__name__) + @pytest.mark.xfail(reason='GH#19123 integer interpreted as nanoseconds', + strict=True) + def test_td64arr_rsub_int_series_invalid(self, box, tdser): + tdser = tm.box_expected(tdser, box) + with pytest.raises(TypeError): + Series([2, 3, 4]) - tdser + + @pytest.mark.parametrize('box', [ + pd.Index, + Series, + pytest.param(pd.DataFrame, + marks=pytest.mark.xfail(reason="Tries to broadcast " + "incorrectly", + strict=True, raises=ValueError)) + ], ids=lambda x: x.__name__) + def test_td64arr_add_intlike(self, box): + # GH#19123 + tdi = TimedeltaIndex(['59 days', '59 days', 'NaT']) + ser = tm.box_expected(tdi, box) + err = TypeError if box is not pd.Index else NullFrequencyError + + other = Series([20, 30, 40], dtype='uint8') + + # TODO: separate/parametrize + with pytest.raises(err): + ser + 1 + with pytest.raises(err): + ser - 1 + + with pytest.raises(err): + ser + other + with pytest.raises(err): + ser - other + + with pytest.raises(err): + ser + np.array(other) + with pytest.raises(err): + ser - np.array(other) + + with pytest.raises(err): + ser + pd.Index(other) + with pytest.raises(err): + ser - pd.Index(other) + + @pytest.mark.parametrize('box', [pd.Index, Series, pd.DataFrame], + ids=lambda x: x.__name__) + @pytest.mark.parametrize('scalar', [1, 1.5, np.array(2)]) + def test_td64arr_add_sub_numeric_scalar_invalid(self, box, scalar, tdser): + + if box is pd.DataFrame and isinstance(scalar, np.ndarray): + # raises ValueError + pytest.xfail(reason="DataFrame to broadcast incorrectly") + + tdser = tm.box_expected(tdser, box) + err = TypeError + if box is pd.Index and not isinstance(scalar, float): + err = NullFrequencyError + + with pytest.raises(err): + tdser + scalar + with pytest.raises(err): + scalar + tdser + with pytest.raises(err): + tdser - scalar + with pytest.raises(err): + scalar - tdser + + @pytest.mark.parametrize('box', [ + pd.Index, + Series, + pytest.param(pd.DataFrame, + marks=pytest.mark.xfail(reason="Tries to broadcast " + "incorrectly", + strict=True, raises=ValueError)) + ], ids=lambda x: x.__name__) + @pytest.mark.parametrize('dtype', ['int64', 'int32', 'int16', + 'uint64', 'uint32', 'uint16', 'uint8', + 'float64', 'float32', 'float16']) + @pytest.mark.parametrize('vec', [ + np.array([1, 2, 3]), + pd.Index([1, 2, 3]), + Series([1, 2, 3]) + # TODO: Add DataFrame in here? + ], ids=lambda x: type(x).__name__) + def test_td64arr_add_sub_numeric_arr_invalid(self, box, vec, dtype, tdser): + if type(vec) is Series and not dtype.startswith('float'): + pytest.xfail(reason='GH#19123 integer interpreted as nanos') + + tdser = tm.box_expected(tdser, box) + err = TypeError + if box is pd.Index and not dtype.startswith('float'): + err = NullFrequencyError + + vector = vec.astype(dtype) + # TODO: parametrize over these four ops? + with pytest.raises(err): + tdser + vector + with pytest.raises(err): + vector + tdser + with pytest.raises(err): + tdser - vector + with pytest.raises(err): + vector - tdser + + # ------------------------------------------------------------------ + # Operations with datetime-like others + + @pytest.mark.parametrize('box', [ + pd.Index, + Series, + pytest.param(pd.DataFrame, + marks=pytest.mark.xfail(reason="Returns object dtype " + "instead of " + "datetime64[ns]", + strict=True)) + ], ids=lambda x: x.__name__) + def test_td64arr_add_sub_timestamp(self, box): + # GH#11925 + ts = Timestamp('2012-01-01') + # TODO: parametrize over types of datetime scalar? + + tdser = Series(pd.timedelta_range('1 day', periods=3)) + expected = Series(pd.date_range('2012-01-02', periods=3)) + + tdser = tm.box_expected(tdser, box) + expected = tm.box_expected(expected, box) + + tm.assert_equal(ts + tdser, expected) + tm.assert_equal(tdser + ts, expected) + + expected2 = Series(pd.date_range('2011-12-31', + periods=3, freq='-1D')) + expected2 = tm.box_expected(expected2, box) + + tm.assert_equal(ts - tdser, expected2) + tm.assert_equal(ts + (-tdser), expected2) + + with pytest.raises(TypeError): + tdser - ts + + # ------------------------------------------------------------------ + # Operations with timedelta-like others (including DateOffsets) + + # TODO: parametrize over [add, sub, radd, rsub]? + @pytest.mark.parametrize('box', [ + pd.Index, + Series, + pytest.param(pd.DataFrame, + marks=pytest.mark.xfail(reason="Tries to broadcast " + "incorrectly leading " + "to alignment error", + strict=True, raises=ValueError)) + ], ids=lambda x: x.__name__) + @pytest.mark.parametrize('names', [(None, None, None), + ('Egon', 'Venkman', None), + ('NCC1701D', 'NCC1701D', 'NCC1701D')]) + def test_td64arr_add_sub_tdi(self, box, names): + # GH#17250 make sure result dtype is correct + # GH#19043 make sure names are propagated correctly + tdi = TimedeltaIndex(['0 days', '1 day'], name=names[0]) + ser = Series([Timedelta(hours=3), Timedelta(hours=4)], name=names[1]) + expected = Series([Timedelta(hours=3), Timedelta(days=1, hours=4)], + name=names[2]) + + ser = tm.box_expected(ser, box) + expected = tm.box_expected(expected, box) + + result = tdi + ser + tm.assert_equal(result, expected) + if box is not pd.DataFrame: + assert result.dtype == 'timedelta64[ns]' + else: + assert result.dtypes[0] == 'timedelta64[ns]' + + result = ser + tdi + tm.assert_equal(result, expected) + if box is not pd.DataFrame: + assert result.dtype == 'timedelta64[ns]' + else: + assert result.dtypes[0] == 'timedelta64[ns]' + + expected = Series([Timedelta(hours=-3), Timedelta(days=1, hours=-4)], + name=names[2]) + expected = tm.box_expected(expected, box) + + result = tdi - ser + tm.assert_equal(result, expected) + if box is not pd.DataFrame: + assert result.dtype == 'timedelta64[ns]' + else: + assert result.dtypes[0] == 'timedelta64[ns]' + + result = ser - tdi + tm.assert_equal(result, -expected) + if box is not pd.DataFrame: + assert result.dtype == 'timedelta64[ns]' + else: + assert result.dtypes[0] == 'timedelta64[ns]' + + @pytest.mark.parametrize('box', [pd.Index, Series, pd.DataFrame], + ids=lambda x: x.__name__) + def test_td64arr_sub_NaT(self, box): + # GH#18808 + ser = Series([NaT, Timedelta('1s')]) + expected = Series([NaT, NaT], dtype='timedelta64[ns]') + + ser = tm.box_expected(ser, box) + expected = tm.box_expected(expected, box) + + res = ser - NaT + tm.assert_equal(res, expected) + + +class TestTimedeltaArraylikeMulDivOps(object): + # Tests for timedelta64[ns] + # __mul__, __rmul__, __div__, __rdiv__, __floordiv__, __rfloordiv__ + + # ------------------------------------------------------------------ + # __floordiv__, __rfloordiv__ + + @pytest.mark.parametrize('box', [ + pd.Index, + Series, + pytest.param(pd.DataFrame, + marks=pytest.mark.xfail(reason="Incorrectly returns " + "m8[ns] instead of f8", + strict=True)) + ], ids=lambda x: x.__name__) + @pytest.mark.parametrize('scalar_td', [ + timedelta(minutes=5, seconds=4), + Timedelta('5m4s'), + Timedelta('5m4s').to_timedelta64()]) + def test_td64arr_floordiv_tdscalar(self, box, scalar_td): + # GH#18831 + td1 = Series([timedelta(minutes=5, seconds=3)] * 3) + td1.iloc[2] = np.nan + + expected = Series([0, 0, np.nan]) + + td1 = tm.box_expected(td1, box) + expected = tm.box_expected(expected, box) + + result = td1 // scalar_td + tm.assert_equal(result, expected) + + @pytest.mark.parametrize('box', [ + pd.Index, + Series, + pytest.param(pd.DataFrame, + marks=pytest.mark.xfail(reason="Incorrectly casts to f8", + strict=True)) + ], ids=lambda x: x.__name__) + @pytest.mark.parametrize('scalar_td', [ + timedelta(minutes=5, seconds=4), + Timedelta('5m4s'), + Timedelta('5m4s').to_timedelta64()]) + def test_td64arr_rfloordiv_tdscalar(self, box, scalar_td): + # GH#18831 + td1 = Series([timedelta(minutes=5, seconds=3)] * 3) + td1.iloc[2] = np.nan + + expected = Series([1, 1, np.nan]) + + td1 = tm.box_expected(td1, box) + expected = tm.box_expected(expected, box) + + result = scalar_td // td1 + tm.assert_equal(result, expected) + + @pytest.mark.parametrize('box', [ + pd.Index, + Series, + pytest.param(pd.DataFrame, + marks=pytest.mark.xfail(reason="Returns m8[ns] dtype " + "instead of f8", + strict=True)) + ], ids=lambda x: x.__name__) + @pytest.mark.parametrize('scalar_td', [ + timedelta(minutes=5, seconds=4), + Timedelta('5m4s'), + Timedelta('5m4s').to_timedelta64()]) + def test_td64arr_rfloordiv_tdscalar_explicit(self, box, scalar_td): + # GH#18831 + td1 = Series([timedelta(minutes=5, seconds=3)] * 3) + td1.iloc[2] = np.nan + + expected = Series([1, 1, np.nan]) + + td1 = tm.box_expected(td1, box) + expected = tm.box_expected(expected, box) + + # We can test __rfloordiv__ using this syntax, + # see `test_timedelta_rfloordiv` + result = td1.__rfloordiv__(scalar_td) + tm.assert_equal(result, expected) + + # ------------------------------------------------------------------ + # Operations with timedelta-like others + + @pytest.mark.parametrize('box', [ + pd.Index, + Series, + pytest.param(pd.DataFrame, + marks=pytest.mark.xfail(reason="__mul__ op treats " + "timedelta other as i8; " + "rmul OK", + strict=True)) + ], ids=lambda x: x.__name__) + @pytest.mark.parametrize('scalar_td', [ + timedelta(minutes=5, seconds=4), + Timedelta('5m4s'), + Timedelta('5m4s').to_timedelta64()]) + def test_td64arr_mul_tdscalar_invalid(self, box, scalar_td): + td1 = Series([timedelta(minutes=5, seconds=3)] * 3) + td1.iloc[2] = np.nan + + td1 = tm.box_expected(td1, box) + + # check that we are getting a TypeError + # with 'operate' (from core/ops.py) for the ops that are not + # defined + pattern = 'operate|unsupported|cannot|not supported' + with tm.assert_raises_regex(TypeError, pattern): + td1 * scalar_td + with tm.assert_raises_regex(TypeError, pattern): + scalar_td * td1 + + # ------------------------------------------------------------------ + # Operations with numeric others + + @pytest.mark.parametrize('box', [ + pd.Index, + Series, + pytest.param(pd.DataFrame, + marks=pytest.mark.xfail(reason="Returns object-dtype", + strict=True)) + ], ids=lambda x: x.__name__) + @pytest.mark.parametrize('one', [1, np.array(1), 1.0, np.array(1.0)]) + def test_td64arr_mul_numeric_scalar(self, box, one, tdser): + # GH#4521 + # divide/multiply by integers + expected = Series(['-59 Days', '-59 Days', 'NaT'], + dtype='timedelta64[ns]') + + tdser = tm.box_expected(tdser, box) + expected = tm.box_expected(expected, box) + + result = tdser * (-one) + tm.assert_equal(result, expected) + result = (-one) * tdser + tm.assert_equal(result, expected) + + expected = Series(['118 Days', '118 Days', 'NaT'], + dtype='timedelta64[ns]') + expected = tm.box_expected(expected, box) + + result = tdser * (2 * one) + tm.assert_equal(result, expected) + result = (2 * one) * tdser + tm.assert_equal(result, expected) + + @pytest.mark.parametrize('box', [ + pd.Index, + Series, + pytest.param(pd.DataFrame, + marks=pytest.mark.xfail(reason="Returns object-dtype", + strict=True)) + ], ids=lambda x: x.__name__) + @pytest.mark.parametrize('two', [2, 2.0, np.array(2), np.array(2.0)]) + def test_td64arr_div_numeric_scalar(self, box, two, tdser): + # GH#4521 + # divide/multiply by integers + expected = Series(['29.5D', '29.5D', 'NaT'], dtype='timedelta64[ns]') + + tdser = tm.box_expected(tdser, box) + expected = tm.box_expected(expected, box) + + result = tdser / two + tm.assert_equal(result, expected) + + @pytest.mark.parametrize('box', [ + pd.Index, + Series, + pytest.param(pd.DataFrame, + marks=pytest.mark.xfail(reason="broadcasts along " + "wrong axis", + strict=True)) + ], ids=lambda x: x.__name__) + @pytest.mark.parametrize('dtype', ['int64', 'int32', 'int16', + 'uint64', 'uint32', 'uint16', 'uint8', + 'float64', 'float32', 'float16']) + @pytest.mark.parametrize('vector', [np.array([20, 30, 40]), + pd.Index([20, 30, 40]), + Series([20, 30, 40])]) + def test_td64arr_mul_numeric_array(self, box, vector, dtype, tdser): + # GH#4521 + # divide/multiply by integers + vector = vector.astype(dtype) + + expected = Series(['1180 Days', '1770 Days', 'NaT'], + dtype='timedelta64[ns]') + + tdser = tm.box_expected(tdser, box) + # TODO: Make this up-casting more systematic? + box = Series if (box is pd.Index and type(vector) is Series) else box + expected = tm.box_expected(expected, box) + + result = tdser * vector + tm.assert_equal(result, expected) + + @pytest.mark.parametrize('box', [ + pd.Index, + Series, + pytest.param(pd.DataFrame, + marks=pytest.mark.xfail(reason="broadcasts along " + "wrong axis", + strict=True)) + ], ids=lambda x: x.__name__) + @pytest.mark.parametrize('dtype', ['int64', 'int32', 'int16', + 'uint64', 'uint32', 'uint16', 'uint8', + 'float64', 'float32', 'float16']) + @pytest.mark.parametrize('vector', [np.array([20, 30, 40]), + pd.Index([20, 30, 40]), + Series([20, 30, 40])], + ids=lambda x: type(x).__name__) + def test_td64arr_rmul_numeric_array(self, box, vector, dtype, tdser): + # GH#4521 + # divide/multiply by integers + vector = vector.astype(dtype) + + expected = Series(['1180 Days', '1770 Days', 'NaT'], + dtype='timedelta64[ns]') + + tdser = tm.box_expected(tdser, box) + box = Series if (box is pd.Index and type(vector) is Series) else box + expected = tm.box_expected(expected, box) + + result = vector * tdser + tm.assert_equal(result, expected) + + @pytest.mark.parametrize('box', [ + pd.Index, + Series, + pytest.param(pd.DataFrame, + marks=pytest.mark.xfail(reason="broadcasts along " + "wrong axis", + strict=True)) + ], ids=lambda x: x.__name__) + @pytest.mark.parametrize('dtype', ['int64', 'int32', 'int16', + 'uint64', 'uint32', 'uint16', 'uint8', + 'float64', 'float32', 'float16']) + @pytest.mark.parametrize('vector', [np.array([20, 30, 40]), + pd.Index([20, 30, 40]), + Series([20, 30, 40])]) + def test_td64arr_div_numeric_array(self, box, vector, dtype, tdser): + # GH#4521 + # divide/multiply by integers + vector = vector.astype(dtype) + expected = Series(['2.95D', '1D 23H 12m', 'NaT'], + dtype='timedelta64[ns]') + + tdser = tm.box_expected(tdser, box) + box = Series if (box is pd.Index and type(vector) is Series) else box + expected = tm.box_expected(expected, box) + + result = tdser / vector + tm.assert_equal(result, expected) + + with pytest.raises(TypeError): + vector / tdser + + # TODO: Should we be parametrizing over types for `ser` too? + @pytest.mark.parametrize('box', [ + pd.Index, + Series, + pytest.param(pd.DataFrame, + marks=pytest.mark.xfail(reason="broadcasts along " + "wrong axis", + strict=True)) + ], ids=lambda x: x.__name__) + @pytest.mark.parametrize('names', [(None, None, None), + ('Egon', 'Venkman', None), + ('NCC1701D', 'NCC1701D', 'NCC1701D')]) + def test_td64arr_mul_int_series(self, box, names): + # GH#19042 test for correct name attachment + tdi = TimedeltaIndex(['0days', '1day', '2days', '3days', '4days'], + name=names[0]) + ser = Series([0, 1, 2, 3, 4], dtype=np.int64, name=names[1]) + + expected = Series(['0days', '1day', '4days', '9days', '16days'], + dtype='timedelta64[ns]', + name=names[2]) + + tdi = tm.box_expected(tdi, box) + box = Series if (box is pd.Index and type(ser) is Series) else box + expected = tm.box_expected(expected, box) + + result = ser * tdi + tm.assert_equal(result, expected) + + # The direct operation tdi * ser still needs to be fixed. + result = ser.__rmul__(tdi) + tm.assert_equal(result, expected) + + # TODO: Should we be parametrizing over types for `ser` too? + @pytest.mark.parametrize('box', [ + pd.Index, + Series, + pd.DataFrame + ], ids=lambda x: x.__name__) + @pytest.mark.parametrize('names', [(None, None, None), + ('Egon', 'Venkman', None), + ('NCC1701D', 'NCC1701D', 'NCC1701D')]) + def test_float_series_rdiv_td64arr(self, box, names): + # GH#19042 test for correct name attachment + # TODO: the direct operation TimedeltaIndex / Series still + # needs to be fixed. + tdi = TimedeltaIndex(['0days', '1day', '2days', '3days', '4days'], + name=names[0]) + ser = Series([1.5, 3, 4.5, 6, 7.5], dtype=np.float64, name=names[1]) + + expected = Series([tdi[n] / ser[n] for n in range(len(ser))], + dtype='timedelta64[ns]', + name=names[2]) + + tdi = tm.box_expected(tdi, box) + box = Series if (box is pd.Index and type(ser) is Series) else box + expected = tm.box_expected(expected, box) + + result = ser.__rdiv__(tdi) + if box is pd.DataFrame: + # TODO: Should we skip this case sooner or test something else? + assert result is NotImplemented + else: + tm.assert_equal(result, expected) + class TestTimedeltaArraylikeInvalidArithmeticOps(object): @pytest.mark.parametrize('box', [ pd.Index, - pd.Series, + Series, pytest.param(pd.DataFrame, marks=pytest.mark.xfail(reason="raises ValueError " "instead of TypeError", @@ -102,8 +833,8 @@ class TestTimedeltaArraylikeInvalidArithmeticOps(object): timedelta(minutes=5, seconds=4), Timedelta('5m4s'), Timedelta('5m4s').to_timedelta64()]) - def test_td64series_pow_invalid(self, scalar_td, box): - td1 = pd.Series([timedelta(minutes=5, seconds=3)] * 3) + def test_td64arr_pow_invalid(self, scalar_td, box): + td1 = Series([timedelta(minutes=5, seconds=3)] * 3) td1.iloc[2] = np.nan td1 = tm.box_expected(td1, box) diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 1f70d09e43b378..de4dc2bcf25a47 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -21,7 +21,7 @@ import pandas as pd from pandas import (Series, DataFrame, Panel, Index, isna, - notna, Timestamp) + notna, Timestamp, Timedelta) from pandas.compat import range, lrange, zip, OrderedDict from pandas.errors import UnsupportedFunctionCall @@ -1702,12 +1702,14 @@ def test_resample_anchored_intraday(self): result = df.resample('M').mean() expected = df.resample( 'M', kind='period').mean().to_timestamp(how='end') + expected.index += Timedelta(1, 'ns') - Timedelta(1, 'D') tm.assert_frame_equal(result, expected) result = df.resample('M', closed='left').mean() exp = df.tshift(1, freq='D').resample('M', kind='period').mean() exp = exp.to_timestamp(how='end') + exp.index = exp.index + Timedelta(1, 'ns') - Timedelta(1, 'D') tm.assert_frame_equal(result, exp) rng = date_range('1/1/2012', '4/1/2012', freq='100min') @@ -1716,12 +1718,14 @@ def test_resample_anchored_intraday(self): result = df.resample('Q').mean() expected = df.resample( 'Q', kind='period').mean().to_timestamp(how='end') + expected.index += Timedelta(1, 'ns') - Timedelta(1, 'D') tm.assert_frame_equal(result, expected) result = df.resample('Q', closed='left').mean() expected = df.tshift(1, freq='D').resample('Q', kind='period', closed='left').mean() expected = expected.to_timestamp(how='end') + expected.index += Timedelta(1, 'ns') - Timedelta(1, 'D') tm.assert_frame_equal(result, expected) ts = _simple_ts('2012-04-29 23:00', '2012-04-30 5:00', freq='h') @@ -2473,7 +2477,7 @@ def test_resample_to_timestamps(self): ts = _simple_pts('1/1/1990', '12/31/1995', freq='M') result = ts.resample('A-DEC', kind='timestamp').mean() - expected = ts.to_timestamp(how='end').resample('A-DEC').mean() + expected = ts.to_timestamp(how='start').resample('A-DEC').mean() assert_series_equal(result, expected) def test_resample_to_quarterly(self): diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 60981f41ec716f..9d41401a7eefc4 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -1321,7 +1321,7 @@ def _end_apply_index(self, dtindex): roll = self.n base = (base_period + roll).to_timestamp(how='end') - return base + off + return base + off + Timedelta(1, 'ns') - Timedelta(1, 'D') def onOffset(self, dt): if self.normalize and not _is_normalized(dt): diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 6dffbcb0b4f010..2225daf10d90fb 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1348,7 +1348,9 @@ def assert_frame_equal(left, right, check_dtype=True, check_categorical : bool, default True Whether to compare internal Categorical exactly. check_like : bool, default False - If true, ignore the order of rows & columns + If True, ignore the order of index & columns. + Note: index labels must match their respective rows + (same as in columns) - same labels must be with the same data obj : str, default 'DataFrame' Specify object name being compared, internally used to show appropriate assertion message