Skip to content

Commit

Permalink
COMPAT: followup to #17491 (#17503)
Browse files Browse the repository at this point in the history
  • Loading branch information
jreback authored Sep 13, 2017
1 parent f11bbf2 commit eef810e
Show file tree
Hide file tree
Showing 7 changed files with 119 additions and 66 deletions.
14 changes: 7 additions & 7 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -190,19 +190,19 @@ the target. Now, a ``ValueError`` will be raised when such an input is passed in

.. _whatsnew_0210.api_breaking.iteration_scalars:

Iteration of Series/Index will now return python scalars
Iteration of Series/Index will now return Python scalars
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Previously, when using certain iteration methods for a ``Series`` with dtype ``int`` or ``float``, you would receive a ``numpy`` scalar, e.g. a ``np.int64``, rather than a python ``int``. Issue (:issue:`10904`) corrected this for ``Series.tolist()`` and ``list(Series)``. This change makes all iteration methods consistent, in particular, for ``__iter__()`` and ``.map()``; note that this only affect int/float dtypes. (:issue:`13236`, :issue:`13258`, :issue:`14216`).
Previously, when using certain iteration methods for a ``Series`` with dtype ``int`` or ``float``, you would receive a ``numpy`` scalar, e.g. a ``np.int64``, rather than a Python ``int``. Issue (:issue:`10904`) corrected this for ``Series.tolist()`` and ``list(Series)``. This change makes all iteration methods consistent, in particular, for ``__iter__()`` and ``.map()``; note that this only affects int/float dtypes. (:issue:`13236`, :issue:`13258`, :issue:`14216`).

.. ipython:: python

s = Series([1, 2, 3])
s = pd.Series([1, 2, 3])
s

Previously:

.. code-block:: python
.. code-block:: ipython

In [2]: type(list(s)[0])
Out[2]: numpy.int64
Expand All @@ -215,14 +215,14 @@ New Behaviour:

Furthermore this will now correctly box the results of iteration for :func:`DataFrame.to_dict` as well.

.. ipython:: python
.. ipython:: ipython

d = {'a':[1], 'b':['b']}
df = DataFrame(d)
df = pd,DataFrame(d)

Previously:

.. code-block:: python
.. code-block:: ipython

In [8]: type(df.to_dict()['a'][0])
Out[8]: numpy.int64
Expand Down
27 changes: 20 additions & 7 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -892,18 +892,31 @@ def argmin(self, axis=None):

def tolist(self):
"""
return a list of the values; box to scalars
Return a list of the values.
These are each a scalar type, which is a Python scalar
(for str, int, float) or a pandas scalar
(for Timestamp/Timedelta/Interval/Period)
See Also
--------
numpy.tolist
"""
return list(self.__iter__())

if is_datetimelike(self):
return [_maybe_box_datetimelike(x) for x in self._values]
else:
return self._values.tolist()

def __iter__(self):
"""
provide iteration over the values; box to scalars
Return an iterator of the values.
These are each a scalar type, which is a Python scalar
(for str, int, float) or a pandas scalar
(for Timestamp/Timedelta/Interval/Period)
"""
if is_datetimelike(self):
return (_maybe_box_datetimelike(x) for x in self._values)
else:
return iter(self._values.tolist())
return iter(self.tolist())

@cache_readonly
def hasnans(self):
Expand Down
10 changes: 8 additions & 2 deletions pandas/core/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
is_integer_dtype, is_bool,
is_list_like, is_sequence,
is_scalar)
from pandas.core.common import is_null_slice
from pandas.core.common import is_null_slice, _maybe_box_datetimelike

from pandas.core.algorithms import factorize, take_1d, unique1d
from pandas.core.base import (PandasObject, PandasDelegate,
Expand Down Expand Up @@ -401,8 +401,14 @@ def itemsize(self):

def tolist(self):
"""
return a list of my values
Return a list of the values.
These are each a scalar type, which is a Python scalar
(for str, int, float) or a pandas scalar
(for Timestamp/Timedelta/Interval/Period)
"""
if is_datetimelike(self.categories):
return [_maybe_box_datetimelike(x) for x in self]
return np.array(self).tolist()

def reshape(self, new_shape, *args, **kwargs):
Expand Down
5 changes: 2 additions & 3 deletions pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,9 +251,8 @@ def get_values(self):
""" return the underlying data as an ndarray """
return self._data.get_values()

def __iter__(self):
""" iterate like Categorical """
return self._data.__iter__()
def tolist(self):
return self._data.tolist()

@property
def codes(self):
Expand Down
13 changes: 7 additions & 6 deletions pandas/tests/indexes/test_category.py
Original file line number Diff line number Diff line change
Expand Up @@ -576,12 +576,13 @@ def test_isin(self):
ci.isin(['c', 'a', 'b', np.nan]), np.array([True] * 6))

# mismatched categorical -> coerced to ndarray so doesn't matter
tm.assert_numpy_array_equal(
ci.isin(ci.set_categories(list('abcdefghi'))), np.array([True] *
6))
tm.assert_numpy_array_equal(
ci.isin(ci.set_categories(list('defghi'))),
np.array([False] * 5 + [True]))
result = ci.isin(ci.set_categories(list('abcdefghi')))
expected = np.array([True] * 6)
tm.assert_numpy_array_equal(result, expected)

result = ci.isin(ci.set_categories(list('defghi')))
expected = np.array([False] * 5 + [True])
tm.assert_numpy_array_equal(result, expected)

def test_identical(self):

Expand Down
37 changes: 0 additions & 37 deletions pandas/tests/series/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,43 +245,6 @@ def test_iter(self):
for i, val in enumerate(self.ts):
assert val == self.ts[i]

def test_iter_box(self):
vals = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]
s = pd.Series(vals)
assert s.dtype == 'datetime64[ns]'
for res, exp in zip(s, vals):
assert isinstance(res, pd.Timestamp)
assert res.tz is None
assert res == exp

vals = [pd.Timestamp('2011-01-01', tz='US/Eastern'),
pd.Timestamp('2011-01-02', tz='US/Eastern')]
s = pd.Series(vals)

assert s.dtype == 'datetime64[ns, US/Eastern]'
for res, exp in zip(s, vals):
assert isinstance(res, pd.Timestamp)
assert res.tz == exp.tz
assert res == exp

# timedelta
vals = [pd.Timedelta('1 days'), pd.Timedelta('2 days')]
s = pd.Series(vals)
assert s.dtype == 'timedelta64[ns]'
for res, exp in zip(s, vals):
assert isinstance(res, pd.Timedelta)
assert res == exp

# period (object dtype, not boxed)
vals = [pd.Period('2011-01-01', freq='M'),
pd.Period('2011-01-02', freq='M')]
s = pd.Series(vals)
assert s.dtype == 'object'
for res, exp in zip(s, vals):
assert isinstance(res, pd.Period)
assert res.freq == 'M'
assert res == exp

def test_keys(self):
# HACK: By doing this in two stages, we avoid 2to3 wrapping the call
# to .keys() in a list()
Expand Down
79 changes: 75 additions & 4 deletions pandas/tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1054,10 +1054,7 @@ class TestToIterable(object):
('timedelta64[ns]', Timedelta)]

@pytest.mark.parametrize(
'dtype, rdtype',
dtypes + [
('object', object),
('category', object)])
'dtype, rdtype', dtypes)
@pytest.mark.parametrize(
'method',
[
Expand All @@ -1074,6 +1071,43 @@ def test_iterable(self, typ, method, dtype, rdtype):
result = method(s)[0]
assert isinstance(result, rdtype)

@pytest.mark.parametrize(
'dtype, rdtype, obj',
[
('object', object, 'a'),
('object', (int, long), 1),
('category', object, 'a'),
('category', (int, long), 1)])
@pytest.mark.parametrize(
'method',
[
lambda x: x.tolist(),
lambda x: list(x),
lambda x: list(x.__iter__()),
], ids=['tolist', 'list', 'iter'])
@pytest.mark.parametrize('typ', [Series, Index])
def test_iterable_object_and_category(self, typ, method,
dtype, rdtype, obj):
# gh-10904
# gh-13258
# coerce iteration to underlying python / pandas types
s = typ([obj], dtype=dtype)
result = method(s)[0]
assert isinstance(result, rdtype)

@pytest.mark.parametrize(
'dtype, rdtype', dtypes)
def test_iterable_items(self, dtype, rdtype):
# gh-13258
# test items / iteritems yields the correct boxed scalars
# this only applies to series
s = Series([1], dtype=dtype)
_, result = list(s.items())[0]
assert isinstance(result, rdtype)

_, result = list(s.iteritems())[0]
assert isinstance(result, rdtype)

@pytest.mark.parametrize(
'dtype, rdtype',
dtypes + [
Expand Down Expand Up @@ -1102,3 +1136,40 @@ def test_categorial_datetimelike(self, method):

result = method(i)[0]
assert isinstance(result, Timestamp)

def test_iter_box(self):
vals = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]
s = pd.Series(vals)
assert s.dtype == 'datetime64[ns]'
for res, exp in zip(s, vals):
assert isinstance(res, pd.Timestamp)
assert res.tz is None
assert res == exp

vals = [pd.Timestamp('2011-01-01', tz='US/Eastern'),
pd.Timestamp('2011-01-02', tz='US/Eastern')]
s = pd.Series(vals)

assert s.dtype == 'datetime64[ns, US/Eastern]'
for res, exp in zip(s, vals):
assert isinstance(res, pd.Timestamp)
assert res.tz == exp.tz
assert res == exp

# timedelta
vals = [pd.Timedelta('1 days'), pd.Timedelta('2 days')]
s = pd.Series(vals)
assert s.dtype == 'timedelta64[ns]'
for res, exp in zip(s, vals):
assert isinstance(res, pd.Timedelta)
assert res == exp

# period (object dtype, not boxed)
vals = [pd.Period('2011-01-01', freq='M'),
pd.Period('2011-01-02', freq='M')]
s = pd.Series(vals)
assert s.dtype == 'object'
for res, exp in zip(s, vals):
assert isinstance(res, pd.Period)
assert res.freq == 'M'
assert res == exp

0 comments on commit eef810e

Please sign in to comment.