diff --git a/.travis.yml b/.travis.yml index 40baee2c03ea0..c9bdb91283d42 100644 --- a/.travis.yml +++ b/.travis.yml @@ -53,11 +53,7 @@ matrix: - dist: trusty env: - JOB="3.6, coverage" ENV_FILE="ci/travis-36.yaml" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" COVERAGE=true DOCTEST=true - # In allow_failures - - dist: trusty - env: - - JOB="3.6, slow" ENV_FILE="ci/travis-36-slow.yaml" SLOW=true - # In allow_failures + - dist: trusty env: - JOB="3.7, NumPy dev" ENV_FILE="ci/travis-37-numpydev.yaml" TEST_ARGS="--skip-slow --skip-network -W error" PANDAS_TESTING_MODE="deprecate" @@ -65,6 +61,12 @@ matrix: apt: packages: - xsel + + # In allow_failures + - dist: trusty + env: + - JOB="3.6, slow" ENV_FILE="ci/travis-36-slow.yaml" SLOW=true + # In allow_failures - dist: trusty env: @@ -73,13 +75,6 @@ matrix: - dist: trusty env: - JOB="3.6, slow" ENV_FILE="ci/travis-36-slow.yaml" SLOW=true - - dist: trusty - env: - - JOB="3.7, NumPy dev" ENV_FILE="ci/travis-37-numpydev.yaml" TEST_ARGS="--skip-slow --skip-network -W error" PANDAS_TESTING_MODE="deprecate" - addons: - apt: - packages: - - xsel - dist: trusty env: - JOB="3.6, doc" ENV_FILE="ci/travis-36-doc.yaml" DOC=true diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index c5b147b152aa6..2850fa249725c 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -2,10 +2,10 @@ import numpy as np import pandas.util.testing as tm -from pandas import (Series, DataFrame, MultiIndex, Int64Index, Float64Index, - IntervalIndex, CategoricalIndex, - IndexSlice, concat, date_range) -from .pandas_vb_common import setup, Panel # noqa +from pandas import (Series, DataFrame, MultiIndex, Panel, + Int64Index, Float64Index, IntervalIndex, + CategoricalIndex, IndexSlice, concat, date_range) +from .pandas_vb_common import setup # noqa class NumericSeriesIndexing(object): diff --git a/asv_bench/benchmarks/join_merge.py b/asv_bench/benchmarks/join_merge.py index 7487a0d8489b7..6624c3d0aaf49 100644 --- a/asv_bench/benchmarks/join_merge.py +++ b/asv_bench/benchmarks/join_merge.py @@ -3,14 +3,15 @@ import numpy as np import pandas.util.testing as tm -from pandas import (DataFrame, Series, MultiIndex, date_range, concat, merge, - merge_asof) +from pandas import (DataFrame, Series, Panel, MultiIndex, + date_range, concat, merge, merge_asof) + try: from pandas import merge_ordered except ImportError: from pandas import ordered_merge as merge_ordered -from .pandas_vb_common import Panel, setup # noqa +from .pandas_vb_common import setup # noqa class Append(object): diff --git a/asv_bench/benchmarks/panel_ctor.py b/asv_bench/benchmarks/panel_ctor.py index ce946c76ed199..4614bbd198afa 100644 --- a/asv_bench/benchmarks/panel_ctor.py +++ b/asv_bench/benchmarks/panel_ctor.py @@ -1,9 +1,9 @@ import warnings from datetime import datetime, timedelta -from pandas import DataFrame, DatetimeIndex, date_range +from pandas import DataFrame, Panel, DatetimeIndex, date_range -from .pandas_vb_common import Panel, setup # noqa +from .pandas_vb_common import setup # noqa class DifferentIndexes(object): diff --git a/asv_bench/benchmarks/panel_methods.py b/asv_bench/benchmarks/panel_methods.py index a5b1a92e9cf67..4d19e9a87c507 100644 --- a/asv_bench/benchmarks/panel_methods.py +++ b/asv_bench/benchmarks/panel_methods.py @@ -1,8 +1,9 @@ import warnings import numpy as np +from pandas import Panel -from .pandas_vb_common import Panel, setup # noqa +from .pandas_vb_common import setup # noqa class PanelMethods(object): diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index f246ebad3aa2c..c9874b4dd03d6 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -505,6 +505,7 @@ ExtensionType Changes - :meth:`Series.astype` and :meth:`DataFrame.astype` now dispatch to :meth:`ExtensionArray.astype` (:issue:`21185:`). - Slicing a single row of a ``DataFrame`` with multiple ExtensionArrays of the same type now preserves the dtype, rather than coercing to object (:issue:`22784`) - Added :meth:`pandas.api.types.register_extension_dtype` to register an extension type with pandas (:issue:`22664`) +- Updated the ``.type`` attribute for ``PeriodDtype``, ``DatetimeTZDtype``, and ``IntervalDtype`` to be instances of the dtype (``Period``, ``Timestamp``, and ``Interval`` respectively) (:issue:`22938`) .. _whatsnew_0240.api.incompatibilities: diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 5a18a1049ee9c..29d01d6702796 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -739,14 +739,22 @@ def _create_method(cls, op, coerce_to_dtype=True): ---------- op : function An operator that takes arguments op(a, b) - coerce_to_dtype : bool + coerce_to_dtype : bool, default True boolean indicating whether to attempt to convert - the result to the underlying ExtensionArray dtype - (default True) + the result to the underlying ExtensionArray dtype. + If it's not possible to create a new ExtensionArray with the + values, an ndarray is returned instead. Returns ------- - A method that can be bound to a method of a class + Callable[[Any, Any], Union[ndarray, ExtensionArray]] + A method that can be bound to a class. When used, the method + receives the two arguments, one of which is the instance of + this class, and should return an ExtensionArray or an ndarray. + + Returning an ndarray may be necessary when the result of the + `op` cannot be stored in the ExtensionArray. The dtype of the + ndarray uses NumPy's normal inference rules. Example ------- @@ -757,7 +765,6 @@ def _create_method(cls, op, coerce_to_dtype=True): in the class definition of MyExtensionArray to create the operator for addition, that will be based on the operator implementation of the underlying elements of the ExtensionArray - """ def _binop(self, other): @@ -774,20 +781,24 @@ def convert_values(param): # a TypeError should be raised res = [op(a, b) for (a, b) in zip(lvalues, rvalues)] - if coerce_to_dtype: - if op.__name__ in {'divmod', 'rdivmod'}: + def _maybe_convert(arr): + if coerce_to_dtype: + # https://github.com/pandas-dev/pandas/issues/22850 + # We catch all regular exceptions here, and fall back + # to an ndarray. try: - a, b = zip(*res) - res = (self._from_sequence(a), - self._from_sequence(b)) - except TypeError: - pass + res = self._from_sequnce(arr) + except Exception: + res = np.asarray(arr) else: - try: - res = self._from_sequence(res) - except TypeError: - pass + res = np.asarray(arr) + return res + if op.__name__ in {'divmod', 'rdivmod'}: + a, b = zip(*res) + res = _maybe_convert(a), _maybe_convert(b) + else: + res = _maybe_convert(res) return res op_name = ops._get_op_name(op, True) diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index a552251ebbafa..b0fa55e346613 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -2,6 +2,7 @@ import numpy as np from pandas import compat +from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass, ABCDataFrame from pandas.errors import AbstractMethodError @@ -83,7 +84,12 @@ def is_dtype(cls, dtype): """ dtype = getattr(dtype, 'dtype', dtype) - if isinstance(dtype, np.dtype): + if isinstance(dtype, (ABCSeries, ABCIndexClass, + ABCDataFrame, np.dtype)): + # https://github.com/pandas-dev/pandas/issues/22960 + # avoid passing data to `construct_from_string`. This could + # cause a FutureWarning from numpy about failing elementwise + # comparison from, e.g., comparing DataFrame == 'category'. return False elif dtype is None: return False @@ -175,7 +181,9 @@ def type(self): """The scalar type for the array, e.g. ``int`` It's expected ``ExtensionArray[item]`` returns an instance - of ``ExtensionDtype.type`` for scalar ``item``. + of ``ExtensionDtype.type`` for scalar ``item``, assuming + that value is valid (not NA). NA values do not need to be + instances of `type`. """ raise AbstractMethodError(self) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 5f0b71d4505c2..a9fc9d13d4ab3 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -4,12 +4,13 @@ from pandas.compat import (string_types, text_type, binary_type, PY3, PY36) from pandas._libs import algos, lib -from pandas._libs.tslibs import conversion +from pandas._libs.tslibs import conversion, Period, Timestamp +from pandas._libs.interval import Interval from pandas.core.dtypes.dtypes import ( registry, CategoricalDtype, CategoricalDtypeType, DatetimeTZDtype, - DatetimeTZDtypeType, PeriodDtype, PeriodDtypeType, IntervalDtype, - IntervalDtypeType, PandasExtensionDtype, ExtensionDtype, + PeriodDtype, IntervalDtype, + PandasExtensionDtype, ExtensionDtype, _pandas_registry) from pandas.core.dtypes.generic import ( ABCCategorical, ABCPeriodIndex, ABCDatetimeIndex, ABCSeries, @@ -1905,20 +1906,20 @@ def _get_dtype_type(arr_or_dtype): elif isinstance(arr_or_dtype, CategoricalDtype): return CategoricalDtypeType elif isinstance(arr_or_dtype, DatetimeTZDtype): - return DatetimeTZDtypeType + return Timestamp elif isinstance(arr_or_dtype, IntervalDtype): - return IntervalDtypeType + return Interval elif isinstance(arr_or_dtype, PeriodDtype): - return PeriodDtypeType + return Period elif isinstance(arr_or_dtype, string_types): if is_categorical_dtype(arr_or_dtype): return CategoricalDtypeType elif is_datetime64tz_dtype(arr_or_dtype): - return DatetimeTZDtypeType + return Timestamp elif is_period_dtype(arr_or_dtype): - return PeriodDtypeType + return Period elif is_interval_dtype(arr_or_dtype): - return IntervalDtypeType + return Interval return _get_dtype_type(np.dtype(arr_or_dtype)) try: return arr_or_dtype.dtype.type diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index fe5cc9389a8ba..beda9bc02f4d5 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -4,6 +4,8 @@ import numpy as np from pandas import compat from pandas.core.dtypes.generic import ABCIndexClass, ABCCategoricalIndex +from pandas._libs.tslibs import Period, NaT, Timestamp +from pandas._libs.interval import Interval from .base import ExtensionDtype, _DtypeOpsMixin @@ -469,13 +471,6 @@ def _is_boolean(self): return is_bool_dtype(self.categories) -class DatetimeTZDtypeType(type): - """ - the type of DatetimeTZDtype, this metaclass determines subclass ability - """ - pass - - class DatetimeTZDtype(PandasExtensionDtype): """ @@ -485,7 +480,7 @@ class DatetimeTZDtype(PandasExtensionDtype): THIS IS NOT A REAL NUMPY DTYPE, but essentially a sub-class of np.datetime64[ns] """ - type = DatetimeTZDtypeType + type = Timestamp kind = 'M' str = '|M8[ns]' num = 101 @@ -583,20 +578,13 @@ def __eq__(self, other): str(self.tz) == str(other.tz)) -class PeriodDtypeType(type): - """ - the type of PeriodDtype, this metaclass determines subclass ability - """ - pass - - class PeriodDtype(PandasExtensionDtype): """ A Period duck-typed class, suitable for holding a period with freq dtype. THIS IS NOT A REAL NUMPY DTYPE, but essentially a sub-class of np.int64. """ - type = PeriodDtypeType + type = Period kind = 'O' str = '|O08' base = np.dtype('O') @@ -666,11 +654,15 @@ def construct_from_string(cls, string): raise TypeError("could not construct PeriodDtype") def __unicode__(self): - return "period[{freq}]".format(freq=self.freq.freqstr) + return compat.text_type(self.name) @property def name(self): - return str(self) + return str("period[{freq}]".format(freq=self.freq.freqstr)) + + @property + def na_value(self): + return NaT def __hash__(self): # make myself hashable @@ -705,13 +697,6 @@ def is_dtype(cls, dtype): return super(PeriodDtype, cls).is_dtype(dtype) -class IntervalDtypeType(type): - """ - the type of IntervalDtype, this metaclass determines subclass ability - """ - pass - - @register_extension_dtype class IntervalDtype(PandasExtensionDtype, ExtensionDtype): """ @@ -800,7 +785,6 @@ def construct_from_string(cls, string): @property def type(self): - from pandas import Interval return Interval def __unicode__(self): diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ff7590f6d5358..f4b7ccb0fdf5b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4908,7 +4908,8 @@ def _combine_match_index(self, other, func, level=None): return ops.dispatch_to_series(left, right, func) else: # fastpath --> operate directly on values - new_data = func(left.values.T, right.values).T + with np.errstate(all="ignore"): + new_data = func(left.values.T, right.values).T return self._constructor(new_data, index=left.index, columns=self.columns, copy=False) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index af04a846ed787..51c84d6e28cb4 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -530,7 +530,7 @@ def _shallow_copy(self, values=None, **kwargs): return self._simple_new(values, **attributes) - def _shallow_copy_with_infer(self, values=None, **kwargs): + def _shallow_copy_with_infer(self, values, **kwargs): """ create a new Index inferring the class with passed value, don't copy the data, use the same object attributes with passed in attributes @@ -543,8 +543,6 @@ def _shallow_copy_with_infer(self, values=None, **kwargs): values : the values to create the new Index, optional kwargs : updates the default attributes for this Index """ - if values is None: - values = self.values attributes = self._get_attributes_dict() attributes.update(kwargs) attributes['copy'] = False diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 6091df776a01b..3cccb65503378 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -556,7 +556,7 @@ def view(self, cls=None): result._id = self._id return result - def _shallow_copy_with_infer(self, values=None, **kwargs): + def _shallow_copy_with_infer(self, values, **kwargs): # On equal MultiIndexes the difference is empty. # Therefore, an empty MultiIndex is returned GH13490 if len(values) == 0: diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 969391569ce50..cc008694a8b84 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -287,7 +287,7 @@ def _from_ordinals(cls, values, name=None, freq=None, **kwargs): result._reset_identity() return result - def _shallow_copy_with_infer(self, values=None, **kwargs): + def _shallow_copy_with_infer(self, values, **kwargs): """ we always want to return a PeriodIndex """ return self._shallow_copy(values=values, **kwargs) diff --git a/pandas/core/series.py b/pandas/core/series.py index 82198c2b3edd5..a613b22ea9046 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2323,10 +2323,14 @@ def combine(self, other, func, fill_value=None): pass elif is_extension_array_dtype(self.values): # The function can return something of any type, so check - # if the type is compatible with the calling EA + # if the type is compatible with the calling EA. try: new_values = self._values._from_sequence(new_values) - except TypeError: + except Exception: + # https://github.com/pandas-dev/pandas/issues/22850 + # pandas has no control over what 3rd-party ExtensionArrays + # do in _values_from_sequence. We still want ops to work + # though, so we catch any regular Exception. pass return self._constructor(new_values, index=new_index, name=new_name) @@ -4224,7 +4228,7 @@ def _try_cast(arr, take_fast_path): try: # gh-15832: Check if we are requesting a numeric dype and # that we can convert the data to the requested dtype. - if is_float_dtype(dtype) or is_integer_dtype(dtype): + if is_integer_dtype(dtype): subarr = maybe_cast_to_integer_array(arr, dtype) subarr = maybe_cast_to_datetime(arr, dtype) diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index a7a9faa9e77eb..f87c51a4ee16b 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -605,15 +605,15 @@ def test__get_dtype_fails(input_param): (pd.DatetimeIndex([1, 2]), np.datetime64), (pd.DatetimeIndex([1, 2]).dtype, np.datetime64), ('= @@ -52,7 +50,7 @@ def _simple_ts(start, end, freq='D'): return Series(np.random.randn(len(rng)), index=rng) -class TestSeriesMissingData(TestData): +class TestSeriesMissingData(): def test_remove_na_deprecation(self): # see gh-16971 @@ -489,7 +487,7 @@ def test_isnull_for_inf_deprecated(self): tm.assert_series_equal(r, e) tm.assert_series_equal(dr, de) - def test_fillna(self): + def test_fillna(self, datetime_series): ts = Series([0., 1., 2., 3., 4.], index=tm.makeDateIndex(5)) tm.assert_series_equal(ts, ts.fillna(method='ffill')) @@ -506,7 +504,8 @@ def test_fillna(self): tm.assert_series_equal(ts.fillna(value=5), exp) pytest.raises(ValueError, ts.fillna) - pytest.raises(ValueError, self.ts.fillna, value=0, method='ffill') + pytest.raises(ValueError, datetime_series.fillna, value=0, + method='ffill') # GH 5703 s1 = Series([np.nan]) @@ -576,9 +575,9 @@ def test_fillna_inplace(self): expected = x.fillna(value=0) assert_series_equal(y, expected) - def test_fillna_invalid_method(self): + def test_fillna_invalid_method(self, datetime_series): try: - self.ts.fillna(method='ffil') + datetime_series.fillna(method='ffil') except ValueError as inst: assert 'ffil' in str(inst) @@ -632,8 +631,8 @@ def test_timedelta64_nan(self): # def test_logical_range_select(self): # np.random.seed(12345) - # selector = -0.5 <= self.ts <= 0.5 - # expected = (self.ts >= -0.5) & (self.ts <= 0.5) + # selector = -0.5 <= datetime_series <= 0.5 + # expected = (datetime_series >= -0.5) & (datetime_series <= 0.5) # assert_series_equal(selector, expected) def test_dropna_empty(self): @@ -688,8 +687,8 @@ def test_dropna_intervals(self): expected = s.iloc[1:] assert_series_equal(result, expected) - def test_valid(self): - ts = self.ts.copy() + def test_valid(self, datetime_series): + ts = datetime_series.copy() ts[::2] = np.NaN result = ts.dropna() @@ -734,12 +733,12 @@ def test_pad_require_monotonicity(self): pytest.raises(ValueError, rng2.get_indexer, rng, method='pad') - def test_dropna_preserve_name(self): - self.ts[:5] = np.nan - result = self.ts.dropna() - assert result.name == self.ts.name - name = self.ts.name - ts = self.ts.copy() + def test_dropna_preserve_name(self, datetime_series): + datetime_series[:5] = np.nan + result = datetime_series.dropna() + assert result.name == datetime_series.name + name = datetime_series.name + ts = datetime_series.copy() ts.dropna(inplace=True) assert ts.name == name @@ -825,10 +824,11 @@ def test_series_pad_backfill_limit(self): assert_series_equal(result, expected) -class TestSeriesInterpolateData(TestData): +class TestSeriesInterpolateData(): - def test_interpolate(self): - ts = Series(np.arange(len(self.ts), dtype=float), self.ts.index) + def test_interpolate(self, datetime_series, string_series): + ts = Series(np.arange(len(datetime_series), dtype=float), + datetime_series.index) ts_copy = ts.copy() ts_copy[5:10] = np.NaN @@ -836,8 +836,8 @@ def test_interpolate(self): linear_interp = ts_copy.interpolate(method='linear') tm.assert_series_equal(linear_interp, ts) - ord_ts = Series([d.toordinal() for d in self.ts.index], - index=self.ts.index).astype(float) + ord_ts = Series([d.toordinal() for d in datetime_series.index], + index=datetime_series.index).astype(float) ord_ts_copy = ord_ts.copy() ord_ts_copy[5:10] = np.NaN @@ -847,7 +847,7 @@ def test_interpolate(self): # try time interpolation on a non-TimeSeries # Only raises ValueError if there are NaNs. - non_ts = self.series.copy() + non_ts = string_series.copy() non_ts[0] = np.NaN pytest.raises(ValueError, non_ts.interpolate, method='time')