From ce774cc58bbfb437f715cd00c82ee784d252c690 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 4 Dec 2018 07:09:16 -0600 Subject: [PATCH] Add default repr for EAs (#23601) --- doc/source/whatsnew/v0.24.0.rst | 3 + pandas/core/arrays/base.py | 63 +++++++++++++++-- pandas/core/arrays/categorical.py | 11 ++- pandas/core/arrays/integer.py | 35 +++------- pandas/core/arrays/interval.py | 3 - pandas/core/arrays/period.py | 11 ++- pandas/core/arrays/sparse.py | 5 ++ pandas/core/indexes/period.py | 2 +- pandas/core/internals/blocks.py | 16 ++++- pandas/io/formats/format.py | 67 +++++++------------ pandas/io/formats/printing.py | 31 ++++++--- pandas/tests/arrays/test_integer.py | 35 +++++----- pandas/tests/arrays/test_period.py | 33 +++++++++ pandas/tests/extension/base/__init__.py | 1 + pandas/tests/extension/base/interface.py | 25 ------- pandas/tests/extension/base/printing.py | 44 ++++++++++++ pandas/tests/extension/decimal/array.py | 3 - .../tests/extension/decimal/test_decimal.py | 22 +++++- pandas/tests/extension/json/array.py | 3 - pandas/tests/extension/json/test_json.py | 4 ++ pandas/tests/extension/test_integer.py | 4 ++ pandas/tests/extension/test_interval.py | 6 ++ pandas/tests/extension/test_period.py | 4 ++ pandas/tests/extension/test_sparse.py | 6 ++ pandas/tests/frame/test_repr_info.py | 12 ++-- pandas/tests/series/test_repr.py | 40 +++++------ 26 files changed, 316 insertions(+), 173 deletions(-) create mode 100644 pandas/tests/extension/base/printing.py diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index a6be90c3ad84b0..03ea6fdd6593eb 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1002,6 +1002,7 @@ update the ``ExtensionDtype._metadata`` tuple to match the signature of your - :meth:`DataFrame.stack` no longer converts to object dtype for DataFrames where each column has the same extension dtype. The output Series will have the same dtype as the columns (:issue:`23077`). - :meth:`Series.unstack` and :meth:`DataFrame.unstack` no longer convert extension arrays to object-dtype ndarrays. Each column in the output ``DataFrame`` will now have the same dtype as the input (:issue:`23077`). - Bug when grouping :meth:`Dataframe.groupby()` and aggregating on ``ExtensionArray`` it was not returning the actual ``ExtensionArray`` dtype (:issue:`23227`). +- A default repr for :class:`ExtensionArray` is now provided (:issue:`23601`). .. _whatsnew_0240.api.incompatibilities: @@ -1117,6 +1118,7 @@ Deprecations - The methods :meth:`Series.str.partition` and :meth:`Series.str.rpartition` have deprecated the ``pat`` keyword in favor of ``sep`` (:issue:`22676`) - Deprecated the `nthreads` keyword of :func:`pandas.read_feather` in favor of `use_threads` to reflect the changes in pyarrow 0.11.0. (:issue:`23053`) +- :meth:`ExtensionArray._formatting_values` is deprecated. Use :attr:`ExtensionArray._formatter` instead. (:issue:`23601`) - :func:`pandas.read_excel` has deprecated accepting ``usecols`` as an integer. Please pass in a list of ints from 0 to ``usecols`` inclusive instead (:issue:`23527`) - Constructing a :class:`TimedeltaIndex` from data with ``datetime64``-dtyped data is deprecated, will raise ``TypeError`` in a future version (:issue:`23539`) - Constructing a :class:`DatetimeIndex` from data with ``timedelta64``-dtyped data is deprecated, will raise ``TypeError`` in a future version (:issue:`23675`) @@ -1284,6 +1286,7 @@ Datetimelike - Bug in rounding methods of :class:`DatetimeIndex` (:meth:`~DatetimeIndex.round`, :meth:`~DatetimeIndex.ceil`, :meth:`~DatetimeIndex.floor`) and :class:`Timestamp` (:meth:`~Timestamp.round`, :meth:`~Timestamp.ceil`, :meth:`~Timestamp.floor`) could give rise to loss of precision (:issue:`22591`) - Bug in :func:`to_datetime` with an :class:`Index` argument that would drop the ``name`` from the result (:issue:`21697`) - Bug in :class:`PeriodIndex` where adding or subtracting a :class:`timedelta` or :class:`Tick` object produced incorrect results (:issue:`22988`) +- Bug in the :class:`Series` repr with period-dtype data missing a space before the data (:issue:`23601`) - Bug in :func:`date_range` when decrementing a start date to a past end date by a negative frequency (:issue:`23270`) - Bug in :meth:`Series.min` which would return ``NaN`` instead of ``NaT`` when called on a series of ``NaT`` (:issue:`23282`) - Bug in :func:`DataFrame.combine` with datetimelike values raising a TypeError (:issue:`23079`) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 8877436dcf51c5..9c6aa4a12923f4 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -47,10 +47,12 @@ class ExtensionArray(object): * copy * _concat_same_type - An additional method is available to satisfy pandas' internal, - private block API. + A default repr displaying the type, (truncated) data, length, + and dtype is provided. It can be customized or replaced by + by overriding: - * _formatting_values + * __repr__ : A default repr for the ExtensionArray. + * _formatter : Print scalars inside a Series or DataFrame. Some methods require casting the ExtensionArray to an ndarray of Python objects with ``self.astype(object)``, which may be expensive. When @@ -676,17 +678,70 @@ def copy(self, deep=False): raise AbstractMethodError(self) # ------------------------------------------------------------------------ - # Block-related methods + # Printing # ------------------------------------------------------------------------ + def __repr__(self): + from pandas.io.formats.printing import format_object_summary + + template = ( + u'{class_name}' + u'{data}\n' + u'Length: {length}, dtype: {dtype}' + ) + # the short repr has no trailing newline, while the truncated + # repr does. So we include a newline in our template, and strip + # any trailing newlines from format_object_summary + data = format_object_summary(self, self._formatter(), + indent_for_name=False).rstrip(', \n') + class_name = u'<{}>\n'.format(self.__class__.__name__) + return template.format(class_name=class_name, data=data, + length=len(self), + dtype=self.dtype) + + def _formatter(self, boxed=False): + # type: (bool) -> Callable[[Any], Optional[str]] + """Formatting function for scalar values. + + This is used in the default '__repr__'. The returned formatting + function receives instances of your scalar type. + + Parameters + ---------- + boxed: bool, default False + An indicated for whether or not your array is being printed + within a Series, DataFrame, or Index (True), or just by + itself (False). This may be useful if you want scalar values + to appear differently within a Series versus on its own (e.g. + quoted or not). + + Returns + ------- + Callable[[Any], str] + A callable that gets instances of the scalar type and + returns a string. By default, :func:`repr` is used + when ``boxed=False`` and :func:`str` is used when + ``boxed=True``. + """ + if boxed: + return str + return repr def _formatting_values(self): # type: () -> np.ndarray # At the moment, this has to be an array since we use result.dtype """ An array of values to be printed in, e.g. the Series repr + + .. deprecated:: 0.24.0 + + Use :meth:`ExtensionArray._formatter` instead. """ return np.array(self) + # ------------------------------------------------------------------------ + # Reshaping + # ------------------------------------------------------------------------ + @classmethod def _concat_same_type(cls, to_concat): # type: (Sequence[ExtensionArray]) -> ExtensionArray diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 3ed2a3b3955e47..ac1c34edba9148 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -500,6 +500,10 @@ def _constructor(self): def _from_sequence(cls, scalars, dtype=None, copy=False): return Categorical(scalars, dtype=dtype) + def _formatter(self, boxed=False): + # Defer to CategoricalFormatter's formatter. + return None + def copy(self): """ Copy constructor. @@ -2036,6 +2040,10 @@ def __unicode__(self): return result + def __repr__(self): + # We want PandasObject.__repr__, which dispatches to __unicode__ + return super(ExtensionArray, self).__repr__() + def _maybe_coerce_indexer(self, indexer): """ return an indexer coerced to the codes dtype @@ -2392,9 +2400,6 @@ def _concat_same_type(self, to_concat): return _concat_categorical(to_concat) - def _formatting_values(self): - return self - def isin(self, values): """ Check whether `values` are contained in Categorical. diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index f500422f0cbc57..38dc68e8f77a38 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -5,7 +5,7 @@ import numpy as np from pandas._libs import lib -from pandas.compat import range, set_function_name, string_types, u +from pandas.compat import range, set_function_name, string_types from pandas.util._decorators import cache_readonly from pandas.core.dtypes.base import ExtensionDtype @@ -20,9 +20,6 @@ from pandas.core import nanops from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin -from pandas.io.formats.printing import ( - default_pprint, format_object_attrs, format_object_summary) - class _IntegerDtype(ExtensionDtype): """ @@ -268,6 +265,13 @@ def _from_sequence(cls, scalars, dtype=None, copy=False): def _from_factorized(cls, values, original): return integer_array(values, dtype=original.dtype) + def _formatter(self, boxed=False): + def fmt(x): + if isna(x): + return 'NaN' + return str(x) + return fmt + def __getitem__(self, item): if is_integer(item): if self._mask[item]: @@ -301,10 +305,6 @@ def __iter__(self): else: yield self._data[i] - def _formatting_values(self): - # type: () -> np.ndarray - return self._coerce_to_ndarray() - def take(self, indexer, allow_fill=False, fill_value=None): from pandas.api.extensions import take @@ -354,25 +354,6 @@ def __setitem__(self, key, value): def __len__(self): return len(self._data) - def __repr__(self): - """ - Return a string representation for this object. - - Invoked by unicode(df) in py2 only. Yields a Unicode String in both - py2/py3. - """ - klass = self.__class__.__name__ - data = format_object_summary(self, default_pprint, False) - attrs = format_object_attrs(self) - space = " " - - prepr = (u(",%s") % - space).join(u("%s=%s") % (k, v) for k, v in attrs) - - res = u("%s(%s%s)") % (klass, data, prepr) - - return res - @property def nbytes(self): return self._data.nbytes + self._mask.nbytes diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index b055bc3f2eb526..785fb02c4d95d5 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -690,9 +690,6 @@ def copy(self, deep=False): # TODO: Could skip verify_integrity here. return type(self).from_arrays(left, right, closed=closed) - def _formatting_values(self): - return np.asarray(self) - def isna(self): return isna(self.left) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 9aa83892d3b644..4d466ef7281b7a 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -341,13 +341,10 @@ def to_timestamp(self, freq=None, how='start'): # -------------------------------------------------------------------- # Array-like / EA-Interface Methods - def __repr__(self): - return '<{}>\n{}\nLength: {}, dtype: {}'.format( - self.__class__.__name__, - [str(s) for s in self], - len(self), - self.dtype - ) + def _formatter(self, boxed=False): + if boxed: + return str + return "'{}'".format def __setitem__( self, diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py index ae5a4eb7075def..96724b6c4b362f 100644 --- a/pandas/core/arrays/sparse.py +++ b/pandas/core/arrays/sparse.py @@ -1746,6 +1746,11 @@ def __unicode__(self): fill=printing.pprint_thing(self.fill_value), index=printing.pprint_thing(self.sp_index)) + def _formatter(self, boxed=False): + # Defer to the formatter from the GenericArrayFormatter calling us. + # This will infer the correct formatter from the dtype of the values. + return None + SparseArray._add_arithmetic_ops() SparseArray._add_comparison_ops() diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index c1a78b985fec92..26e51e4f63101a 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -503,7 +503,7 @@ def __array_wrap__(self, result, context=None): @property def _formatter_func(self): - return lambda x: "'%s'" % x + return self.array._formatter(boxed=False) def asof_locs(self, where, mask): """ diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 828b0df73b341a..198e832ca46030 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -33,7 +33,7 @@ _isna_compat, array_equivalent, is_null_datelike_scalar, isna, notna) import pandas.core.algorithms as algos -from pandas.core.arrays import Categorical +from pandas.core.arrays import Categorical, ExtensionArray from pandas.core.base import PandasObject import pandas.core.common as com from pandas.core.indexes.datetimes import DatetimeIndex @@ -1915,7 +1915,19 @@ def _slice(self, slicer): return self.values[slicer] def formatting_values(self): - return self.values._formatting_values() + # Deprecating the ability to override _formatting_values. + # Do the warning here, it's only user in pandas, since we + # have to check if the subclass overrode it. + fv = getattr(type(self.values), '_formatting_values', None) + if fv and fv != ExtensionArray._formatting_values: + msg = ( + "'ExtensionArray._formatting_values' is deprecated. " + "Specify 'ExtensionArray._formatter' instead." + ) + warnings.warn(msg, DeprecationWarning, stacklevel=10) + return self.values._formatting_values() + + return self.values def concat_same_type(self, to_concat, placement=None): """ diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index b35f5d1e548b74..8452eb562a8e64 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -16,11 +16,12 @@ from pandas.compat import StringIO, lzip, map, u, zip from pandas.core.dtypes.common import ( - is_categorical_dtype, is_datetime64_dtype, is_datetime64tz_dtype, is_float, - is_float_dtype, is_integer, is_integer_dtype, is_interval_dtype, - is_list_like, is_numeric_dtype, is_period_arraylike, is_scalar, + is_categorical_dtype, is_datetime64_dtype, is_datetime64tz_dtype, + is_extension_array_dtype, is_float, is_float_dtype, is_integer, + is_integer_dtype, is_list_like, is_numeric_dtype, is_scalar, is_timedelta64_dtype) -from pandas.core.dtypes.generic import ABCMultiIndex, ABCSparseArray +from pandas.core.dtypes.generic import ( + ABCIndexClass, ABCMultiIndex, ABCSeries, ABCSparseArray) from pandas.core.dtypes.missing import isna, notna from pandas import compat @@ -29,7 +30,6 @@ from pandas.core.config import get_option, set_option from pandas.core.index import Index, ensure_index from pandas.core.indexes.datetimes import DatetimeIndex -from pandas.core.indexes.period import PeriodIndex from pandas.io.common import _expand_user, _stringify_path from pandas.io.formats.printing import adjoin, justify, pprint_thing @@ -842,22 +842,18 @@ def _get_column_name_list(self): def format_array(values, formatter, float_format=None, na_rep='NaN', digits=None, space=None, justify='right', decimal='.'): - if is_categorical_dtype(values): - fmt_klass = CategoricalArrayFormatter - elif is_interval_dtype(values): - fmt_klass = IntervalArrayFormatter + if is_datetime64_dtype(values.dtype): + fmt_klass = Datetime64Formatter + elif is_timedelta64_dtype(values.dtype): + fmt_klass = Timedelta64Formatter + elif is_extension_array_dtype(values.dtype): + fmt_klass = ExtensionArrayFormatter elif is_float_dtype(values.dtype): fmt_klass = FloatArrayFormatter - elif is_period_arraylike(values): - fmt_klass = PeriodArrayFormatter elif is_integer_dtype(values.dtype): fmt_klass = IntArrayFormatter elif is_datetime64tz_dtype(values): fmt_klass = Datetime64TZFormatter - elif is_datetime64_dtype(values.dtype): - fmt_klass = Datetime64Formatter - elif is_timedelta64_dtype(values.dtype): - fmt_klass = Timedelta64Formatter else: fmt_klass = GenericArrayFormatter @@ -1121,39 +1117,22 @@ def _format_strings(self): return fmt_values.tolist() -class IntervalArrayFormatter(GenericArrayFormatter): - - def __init__(self, values, *args, **kwargs): - GenericArrayFormatter.__init__(self, values, *args, **kwargs) - - def _format_strings(self): - formatter = self.formatter or str - fmt_values = np.array([formatter(x) for x in self.values]) - return fmt_values - - -class PeriodArrayFormatter(IntArrayFormatter): - +class ExtensionArrayFormatter(GenericArrayFormatter): def _format_strings(self): - from pandas.core.indexes.period import IncompatibleFrequency - try: - values = PeriodIndex(self.values).to_native_types() - except IncompatibleFrequency: - # periods may contains different freq - values = Index(self.values, dtype='object').to_native_types() - - formatter = self.formatter or (lambda x: '{x}'.format(x=x)) - fmt_values = [formatter(x) for x in values] - return fmt_values - + values = self.values + if isinstance(values, (ABCIndexClass, ABCSeries)): + values = values._values -class CategoricalArrayFormatter(GenericArrayFormatter): + formatter = values._formatter(boxed=True) - def __init__(self, values, *args, **kwargs): - GenericArrayFormatter.__init__(self, values, *args, **kwargs) + if is_categorical_dtype(values.dtype): + # Categorical is special for now, so that we can preserve tzinfo + array = values.get_values() + else: + array = np.asarray(values) - def _format_strings(self): - fmt_values = format_array(self.values.get_values(), self.formatter, + fmt_values = format_array(array, + formatter, float_format=self.float_format, na_rep=self.na_rep, digits=self.digits, space=self.space, justify=self.justify) diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py index e671571560b192..6d45d1e5dfceef 100644 --- a/pandas/io/formats/printing.py +++ b/pandas/io/formats/printing.py @@ -271,7 +271,8 @@ class TableSchemaFormatter(BaseFormatter): max_seq_items=max_seq_items) -def format_object_summary(obj, formatter, is_justify=True, name=None): +def format_object_summary(obj, formatter, is_justify=True, name=None, + indent_for_name=True): """ Return the formatted obj as a unicode string @@ -283,8 +284,11 @@ def format_object_summary(obj, formatter, is_justify=True, name=None): string formatter for an element is_justify : boolean should justify the display - name : name, optiona + name : name, optional defaults to the class name of the obj + indent_for_name : bool, default True + Whether subsequent lines should be be indented to + align with the name. Returns ------- @@ -300,8 +304,13 @@ def format_object_summary(obj, formatter, is_justify=True, name=None): if name is None: name = obj.__class__.__name__ - space1 = "\n%s" % (' ' * (len(name) + 1)) - space2 = "\n%s" % (' ' * (len(name) + 2)) + if indent_for_name: + name_len = len(name) + space1 = "\n%s" % (' ' * (name_len + 1)) + space2 = "\n%s" % (' ' * (name_len + 2)) + else: + space1 = "\n" + space2 = "\n " # space for the opening '[' n = len(obj) sep = ',' @@ -328,15 +337,17 @@ def best_len(values): else: return 0 + close = u', ' + if n == 0: - summary = '[], ' + summary = u'[]{}'.format(close) elif n == 1: first = formatter(obj[0]) - summary = '[%s], ' % first + summary = u'[{}]{}'.format(first, close) elif n == 2: first = formatter(obj[0]) last = formatter(obj[-1]) - summary = '[%s, %s], ' % (first, last) + summary = u'[{}, {}]{}'.format(first, last, close) else: if n > max_seq_items: @@ -381,7 +392,11 @@ def best_len(values): summary, line = _extend_line(summary, line, tail[-1], display_width - 2, space2) summary += line - summary += '],' + + # right now close is either '' or ', ' + # Now we want to include the ']', but not the maybe space. + close = ']' + close.rstrip(' ') + summary += close if len(summary) > (display_width): summary += space1 diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index 51cd139a6ccadb..173f9707e76c28 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -57,24 +57,27 @@ def test_dtypes(dtype): assert dtype.name is not None -class TestInterface(object): - - def test_repr_array(self, data): - result = repr(data) - - # not long - assert '...' not in result - - assert 'dtype=' in result - assert 'IntegerArray' in result +def test_repr_array(): + result = repr(integer_array([1, None, 3])) + expected = ( + '\n' + '[1, NaN, 3]\n' + 'Length: 3, dtype: Int64' + ) + assert result == expected - def test_repr_array_long(self, data): - # some arrays may be able to assert a ... in the repr - with pd.option_context('display.max_seq_items', 1): - result = repr(data) - assert '...' in result - assert 'length' in result +def test_repr_array_long(): + data = integer_array([1, 2, None] * 1000) + expected = ( + "\n" + "[ 1, 2, NaN, 1, 2, NaN, 1, 2, NaN, 1,\n" + " ...\n" + " NaN, 1, 2, NaN, 1, 2, NaN, 1, 2, NaN]\n" + "Length: 3000, dtype: Int64" + ) + result = repr(data) + assert result == expected class TestConstructors(object): diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py index 63b34db13705eb..bf139bb0ce6164 100644 --- a/pandas/tests/arrays/test_period.py +++ b/pandas/tests/arrays/test_period.py @@ -195,3 +195,36 @@ def test_sub_period(): other = pd.Period("2000", freq="M") with pytest.raises(IncompatibleFrequency, match="freq"): arr - other + + +# ---------------------------------------------------------------------------- +# Printing + +def test_repr_small(): + arr = period_array(['2000', '2001'], freq='D') + result = str(arr) + expected = ( + "\n" + "['2000-01-01', '2001-01-01']\n" + "Length: 2, dtype: period[D]" + ) + assert result == expected + + +def test_repr_large(): + arr = period_array(['2000', '2001'] * 500, freq='D') + result = str(arr) + expected = ( + "\n" + "['2000-01-01', '2001-01-01', '2000-01-01', '2001-01-01', " + "'2000-01-01',\n" + " '2001-01-01', '2000-01-01', '2001-01-01', '2000-01-01', " + "'2001-01-01',\n" + " ...\n" + " '2000-01-01', '2001-01-01', '2000-01-01', '2001-01-01', " + "'2000-01-01',\n" + " '2001-01-01', '2000-01-01', '2001-01-01', '2000-01-01', " + "'2001-01-01']\n" + "Length: 1000, dtype: period[D]" + ) + assert result == expected diff --git a/pandas/tests/extension/base/__init__.py b/pandas/tests/extension/base/__init__.py index d11bb8b6beb779..57704b77bb233c 100644 --- a/pandas/tests/extension/base/__init__.py +++ b/pandas/tests/extension/base/__init__.py @@ -48,6 +48,7 @@ class TestMyDtype(BaseDtypeTests): from .interface import BaseInterfaceTests # noqa from .methods import BaseMethodsTests # noqa from .ops import BaseArithmeticOpsTests, BaseComparisonOpsTests, BaseOpsUtil # noqa +from .printing import BasePrintingTests # noqa from .reduce import BaseNoReduceTests, BaseNumericReduceTests, BaseBooleanReduceTests # noqa from .missing import BaseMissingTests # noqa from .reshaping import BaseReshapingTests # noqa diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py index 00a480d311b58f..f8464dbac80534 100644 --- a/pandas/tests/extension/base/interface.py +++ b/pandas/tests/extension/base/interface.py @@ -1,7 +1,5 @@ import numpy as np -from pandas.compat import StringIO - from pandas.core.dtypes.common import is_extension_array_dtype from pandas.core.dtypes.dtypes import ExtensionDtype @@ -35,29 +33,6 @@ def test_array_interface(self, data): result = np.array(data) assert result[0] == data[0] - def test_repr(self, data): - ser = pd.Series(data) - assert data.dtype.name in repr(ser) - - df = pd.DataFrame({"A": data}) - repr(df) - - def test_repr_array(self, data): - # some arrays may be able to assert - # attributes in the repr - repr(data) - - def test_repr_array_long(self, data): - # some arrays may be able to assert a ... in the repr - with pd.option_context('display.max_seq_items', 1): - repr(data) - - def test_dtype_name_in_info(self, data): - buf = StringIO() - pd.DataFrame({"A": data}).info(buf=buf) - result = buf.getvalue() - assert data.dtype.name in result - def test_is_extension_array_dtype(self, data): assert is_extension_array_dtype(data) assert is_extension_array_dtype(data.dtype) diff --git a/pandas/tests/extension/base/printing.py b/pandas/tests/extension/base/printing.py new file mode 100644 index 00000000000000..b2ba1d95cf33ea --- /dev/null +++ b/pandas/tests/extension/base/printing.py @@ -0,0 +1,44 @@ +import io + +import pytest + +import pandas as pd +from pandas import compat + +from .base import BaseExtensionTests + + +class BasePrintingTests(BaseExtensionTests): + """Tests checking the formatting of your EA when printed.""" + + @pytest.mark.parametrize("size", ["big", "small"]) + def test_array_repr(self, data, size): + if size == "small": + data = data[:5] + else: + data = type(data)._concat_same_type([data] * 5) + + result = repr(data) + assert data.__class__.__name__ in result + assert 'Length: {}'.format(len(data)) in result + assert str(data.dtype) in result + if size == 'big': + assert '...' in result + + def test_array_repr_unicode(self, data): + result = compat.text_type(data) + assert isinstance(result, compat.text_type) + + def test_series_repr(self, data): + ser = pd.Series(data) + assert data.dtype.name in repr(ser) + + def test_dataframe_repr(self, data): + df = pd.DataFrame({"A": data}) + repr(df) + + def test_dtype_name_in_info(self, data): + buf = io.StringIO() + pd.DataFrame({"A": data}).info(buf=buf) + result = buf.getvalue() + assert data.dtype.name in result diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 3c8905c578c4f3..79e81f1034c6dd 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -114,9 +114,6 @@ def __setitem__(self, key, value): def __len__(self): return len(self._data) - def __repr__(self): - return 'DecimalArray({!r})'.format(self._data) - @property def nbytes(self): n = len(self) diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index 01efd7ec7e5906..6281c5360cd031 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -188,7 +188,8 @@ def test_value_counts(self, all_data, dropna): class TestCasting(BaseDecimal, base.BaseCastingTests): - pass + pytestmark = pytest.mark.skipif(compat.PY2, + reason="Unhashble dtype in Py2.") class TestGroupby(BaseDecimal, base.BaseGroupbyTests): @@ -200,6 +201,11 @@ class TestSetitem(BaseDecimal, base.BaseSetitemTests): pass +class TestPrinting(BaseDecimal, base.BasePrintingTests): + pytestmark = pytest.mark.skipif(compat.PY2, + reason="Unhashble dtype in Py2.") + + # TODO(extension) @pytest.mark.xfail(reason=( "raising AssertionError as this is not implemented, " @@ -379,3 +385,17 @@ def test_divmod_array(reverse, expected_div, expected_mod): tm.assert_extension_array_equal(div, expected_div) tm.assert_extension_array_equal(mod, expected_mod) + + +def test_formatting_values_deprecated(): + class DecimalArray2(DecimalArray): + def _formatting_values(self): + return np.array(self) + + ser = pd.Series(DecimalArray2([decimal.Decimal('1.0')])) + # different levels for 2 vs. 3 + check_stacklevel = compat.PY3 + + with tm.assert_produces_warning(DeprecationWarning, + check_stacklevel=check_stacklevel): + repr(ser) diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 2c6e74fda8a0e6..d58b7ddf291238 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -115,9 +115,6 @@ def __setitem__(self, key, value): def __len__(self): return len(self.data) - def __repr__(self): - return 'JSONArary({!r})'.format(self.data) - @property def nbytes(self): return sys.getsizeof(self.data) diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index 66e5f6b6dc7328..a941b562fe1ecc 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -283,3 +283,7 @@ def _check_divmod_op(self, s, op, other, exc=NotImplementedError): class TestComparisonOps(BaseJSON, base.BaseComparisonOpsTests): pass + + +class TestPrinting(BaseJSON, base.BasePrintingTests): + pass diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py index 218b2e9bd0e118..e21ca81bcf5c33 100644 --- a/pandas/tests/extension/test_integer.py +++ b/pandas/tests/extension/test_integer.py @@ -214,3 +214,7 @@ class TestNumericReduce(base.BaseNumericReduceTests): class TestBooleanReduce(base.BaseBooleanReduceTests): pass + + +class TestPrinting(base.BasePrintingTests): + pass diff --git a/pandas/tests/extension/test_interval.py b/pandas/tests/extension/test_interval.py index d67c0d0a9c05ae..644f3ef94f40ba 100644 --- a/pandas/tests/extension/test_interval.py +++ b/pandas/tests/extension/test_interval.py @@ -146,3 +146,9 @@ class TestReshaping(BaseInterval, base.BaseReshapingTests): class TestSetitem(BaseInterval, base.BaseSetitemTests): pass + + +class TestPrinting(BaseInterval, base.BasePrintingTests): + @pytest.mark.skip(reason="custom repr") + def test_array_repr(self, data, size): + pass diff --git a/pandas/tests/extension/test_period.py b/pandas/tests/extension/test_period.py index 2e629ccb2981e6..08e21fc30ad100 100644 --- a/pandas/tests/extension/test_period.py +++ b/pandas/tests/extension/test_period.py @@ -152,3 +152,7 @@ class TestSetitem(BasePeriodTests, base.BaseSetitemTests): class TestGroupby(BasePeriodTests, base.BaseGroupbyTests): pass + + +class TestPrinting(BasePeriodTests, base.BasePrintingTests): + pass diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index 4f67a13215cfdd..891e5f4dd9a95d 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -316,3 +316,9 @@ def _compare_other(self, s, data, op_name, other): s = pd.Series(data) result = op(s, other) tm.assert_series_equal(result, expected) + + +class TestPrinting(BaseSparseTests, base.BasePrintingTests): + @pytest.mark.xfail(reason='Different repr', strict=True) + def test_array_repr(self, data, size): + super(TestPrinting, self).test_array_repr(data, size) diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index 01dee47fffe496..07cbb8cdcde0ac 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -513,12 +513,12 @@ def test_repr_categorical_dates_periods(self): tz='US/Eastern') p = period_range('2011-01', freq='M', periods=5) df = DataFrame({'dt': dt, 'p': p}) - exp = """ dt p -0 2011-01-01 09:00:00-05:00 2011-01 -1 2011-01-01 10:00:00-05:00 2011-02 -2 2011-01-01 11:00:00-05:00 2011-03 -3 2011-01-01 12:00:00-05:00 2011-04 -4 2011-01-01 13:00:00-05:00 2011-05""" + exp = """ dt p +0 2011-01-01 09:00:00-05:00 2011-01 +1 2011-01-01 10:00:00-05:00 2011-02 +2 2011-01-01 11:00:00-05:00 2011-03 +3 2011-01-01 12:00:00-05:00 2011-04 +4 2011-01-01 13:00:00-05:00 2011-05""" df = DataFrame({'dt': Categorical(dt), 'p': Categorical(p)}) assert repr(df) == exp diff --git a/pandas/tests/series/test_repr.py b/pandas/tests/series/test_repr.py index ef962747466559..c4a0496f7fb275 100644 --- a/pandas/tests/series/test_repr.py +++ b/pandas/tests/series/test_repr.py @@ -364,11 +364,11 @@ def test_categorical_series_repr_datetime_ordered(self): def test_categorical_series_repr_period(self): idx = period_range('2011-01-01 09:00', freq='H', periods=5) s = Series(Categorical(idx)) - exp = """0 2011-01-01 09:00 -1 2011-01-01 10:00 -2 2011-01-01 11:00 -3 2011-01-01 12:00 -4 2011-01-01 13:00 + exp = """0 2011-01-01 09:00 +1 2011-01-01 10:00 +2 2011-01-01 11:00 +3 2011-01-01 12:00 +4 2011-01-01 13:00 dtype: category Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]""" # noqa @@ -377,11 +377,11 @@ def test_categorical_series_repr_period(self): idx = period_range('2011-01', freq='M', periods=5) s = Series(Categorical(idx)) - exp = """0 2011-01 -1 2011-02 -2 2011-03 -3 2011-04 -4 2011-05 + exp = """0 2011-01 +1 2011-02 +2 2011-03 +3 2011-04 +4 2011-05 dtype: category Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]""" @@ -390,11 +390,11 @@ def test_categorical_series_repr_period(self): def test_categorical_series_repr_period_ordered(self): idx = period_range('2011-01-01 09:00', freq='H', periods=5) s = Series(Categorical(idx, ordered=True)) - exp = """0 2011-01-01 09:00 -1 2011-01-01 10:00 -2 2011-01-01 11:00 -3 2011-01-01 12:00 -4 2011-01-01 13:00 + exp = """0 2011-01-01 09:00 +1 2011-01-01 10:00 +2 2011-01-01 11:00 +3 2011-01-01 12:00 +4 2011-01-01 13:00 dtype: category Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 < 2011-01-01 13:00]""" # noqa @@ -403,11 +403,11 @@ def test_categorical_series_repr_period_ordered(self): idx = period_range('2011-01', freq='M', periods=5) s = Series(Categorical(idx, ordered=True)) - exp = """0 2011-01 -1 2011-02 -2 2011-03 -3 2011-04 -4 2011-05 + exp = """0 2011-01 +1 2011-02 +2 2011-03 +3 2011-04 +4 2011-05 dtype: category Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]"""