diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt index 48efc02480e673..94669c5b02410b 100644 --- a/doc/source/whatsnew/v0.23.2.txt +++ b/doc/source/whatsnew/v0.23.2.txt @@ -52,8 +52,9 @@ Bug Fixes **Conversion** +- Bug in constructing :class:`Index` with an iterator or generator (:issue:`21470`) - Bug in :meth:`Series.nlargest` for signed and unsigned integer dtypes when the minimum value is present (:issue:`21426`) -- + **Indexing** diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index d466198b648ef6..e22b0d626a2188 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -3,7 +3,6 @@ import numpy as np from warnings import warn import textwrap -import types from pandas import compat from pandas.compat import u, lzip @@ -28,7 +27,7 @@ is_categorical, is_categorical_dtype, is_list_like, is_sequence, - is_scalar, + is_scalar, is_iterator, is_dict_like) from pandas.core.algorithms import factorize, take_1d, unique1d, take @@ -2483,7 +2482,7 @@ def _convert_to_list_like(list_like): if isinstance(list_like, list): return list_like if (is_sequence(list_like) or isinstance(list_like, tuple) or - isinstance(list_like, types.GeneratorType)): + is_iterator(list_like)): return list(list_like) elif is_scalar(list_like): return [list_like] diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 6a56278b0da497..27cc368a696e36 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -428,12 +428,14 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, elif data is None or is_scalar(data): cls._scalar_data_error(data) else: - if tupleize_cols and is_list_like(data) and data: + if tupleize_cols and is_list_like(data): + # GH21470: convert iterable to list before determining if empty if is_iterator(data): data = list(data) - # we must be all tuples, otherwise don't construct - # 10697 - if all(isinstance(e, tuple) for e in data): + + if data and all(isinstance(e, tuple) for e in data): + # we must be all tuples, otherwise don't construct + # 10697 from .multi import MultiIndex return MultiIndex.from_tuples( data, names=name or kwargs.get('names')) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index b8bd218ec25ab3..1d8a958c3413f3 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -445,21 +445,24 @@ def test_constructor_dtypes_timedelta(self, attr, klass): result = klass(list(values), dtype=dtype) tm.assert_index_equal(result, index) - def test_constructor_empty_gen(self): - skip_index_keys = ["repeats", "periodIndex", "rangeIndex", - "tuples"] - for key, index in self.generate_index_types(skip_index_keys): - empty = index.__class__([]) - assert isinstance(empty, index.__class__) - assert not len(empty) + @pytest.mark.parametrize("value", [[], iter([]), (x for x in [])]) + @pytest.mark.parametrize("klass", + [Index, Float64Index, Int64Index, UInt64Index, + CategoricalIndex, DatetimeIndex, TimedeltaIndex]) + def test_constructor_empty(self, value, klass): + empty = klass(value) + assert isinstance(empty, klass) + assert not len(empty) @pytest.mark.parametrize("empty,klass", [ (PeriodIndex([], freq='B'), PeriodIndex), + (PeriodIndex(iter([]), freq='B'), PeriodIndex), + (PeriodIndex((x for x in []), freq='B'), PeriodIndex), (RangeIndex(step=1), pd.RangeIndex), (MultiIndex(levels=[[1, 2], ['blue', 'red']], labels=[[], []]), MultiIndex) ]) - def test_constructor_empty(self, empty, klass): + def test_constructor_empty_special(self, empty, klass): assert isinstance(empty, klass) assert not len(empty)