Skip to content

Commit

Permalink
BUG: in Python3 MultiIndex.from_tuples cannot take "zipped" tuples (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
Xbar authored and jreback committed Nov 25, 2017
1 parent be66ef8 commit 0bcd77e
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 5 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.22.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ Indexing
- Bug in :func:`Series.truncate` which raises ``TypeError`` with a monotonic ``PeriodIndex`` (:issue:`17717`)
- Bug in :func:`DataFrame.groupby` where tuples were interpreted as lists of keys rather than as keys (:issue:`17979`, :issue:`18249`)
- Bug in :func:`MultiIndex.remove_unused_levels`` which would fill nan values (:issue:`18417`)
- Bug in :func:`MultiIndex.from_tuples`` which would fail to take zipped tuples in python3 (:issue:`18434`)
- Bug in :class:`IntervalIndex` where empty and purely NA data was constructed inconsistently depending on the construction method (:issue:`18421`)
-

Expand Down
15 changes: 15 additions & 0 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -1162,6 +1162,11 @@ def from_arrays(cls, arrays, sortorder=None, names=None):
MultiIndex.from_product : Make a MultiIndex from cartesian product
of iterables
"""
if not is_list_like(arrays):
raise TypeError("Input must be a list / sequence of array-likes.")
elif is_iterator(arrays):
arrays = list(arrays)

# Check if lengths of all arrays are equal or not,
# raise ValueError, if not
for i in range(1, len(arrays)):
Expand Down Expand Up @@ -1206,6 +1211,11 @@ def from_tuples(cls, tuples, sortorder=None, names=None):
MultiIndex.from_product : Make a MultiIndex from cartesian product
of iterables
"""
if not is_list_like(tuples):
raise TypeError('Input must be a list / sequence of tuple-likes.')
elif is_iterator(tuples):
tuples = list(tuples)

if len(tuples) == 0:
if names is None:
msg = 'Cannot infer number of levels from empty list'
Expand Down Expand Up @@ -1260,6 +1270,11 @@ def from_product(cls, iterables, sortorder=None, names=None):
from pandas.core.categorical import _factorize_from_iterables
from pandas.core.reshape.util import cartesian_product

if not is_list_like(iterables):
raise TypeError("Input must be a list / sequence of iterables.")
elif is_iterator(iterables):
iterables = list(iterables)

labels, levels = _factorize_from_iterables(iterables)
labels = cartesian_product(labels)
return MultiIndex(levels, labels, sortorder=sortorder, names=names)
Expand Down
64 changes: 59 additions & 5 deletions pandas/tests/indexes/test_multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -672,15 +672,31 @@ def test_from_arrays(self):
for lev, lab in zip(self.index.levels, self.index.labels):
arrays.append(np.asarray(lev).take(lab))

result = MultiIndex.from_arrays(arrays)
assert list(result) == list(self.index)
# list of arrays as input
result = MultiIndex.from_arrays(arrays, names=self.index.names)
tm.assert_index_equal(result, self.index)

# infer correctly
result = MultiIndex.from_arrays([[pd.NaT, Timestamp('20130101')],
['a', 'b']])
assert result.levels[0].equals(Index([Timestamp('20130101')]))
assert result.levels[1].equals(Index(['a', 'b']))

def test_from_arrays_iterator(self):
# GH 18434
arrays = []
for lev, lab in zip(self.index.levels, self.index.labels):
arrays.append(np.asarray(lev).take(lab))

# iterator as input
result = MultiIndex.from_arrays(iter(arrays), names=self.index.names)
tm.assert_index_equal(result, self.index)

# invalid iterator input
with tm.assert_raises_regex(
TypeError, "Input must be a list / sequence of array-likes."):
MultiIndex.from_arrays(0)

def test_from_arrays_index_series_datetimetz(self):
idx1 = pd.date_range('2015-01-01 10:00', freq='D', periods=3,
tz='US/Eastern')
Expand Down Expand Up @@ -825,7 +841,25 @@ def test_from_product(self):
expected = MultiIndex.from_tuples(tuples, names=names)

tm.assert_index_equal(result, expected)
assert result.names == names

def test_from_product_iterator(self):
# GH 18434
first = ['foo', 'bar', 'buz']
second = ['a', 'b', 'c']
names = ['first', 'second']
tuples = [('foo', 'a'), ('foo', 'b'), ('foo', 'c'), ('bar', 'a'),
('bar', 'b'), ('bar', 'c'), ('buz', 'a'), ('buz', 'b'),
('buz', 'c')]
expected = MultiIndex.from_tuples(tuples, names=names)

# iterator as input
result = MultiIndex.from_product(iter([first, second]), names=names)
tm.assert_index_equal(result, expected)

# Invalid non-iterable input
with tm.assert_raises_regex(
TypeError, "Input must be a list / sequence of iterables."):
MultiIndex.from_product(0)

def test_from_product_empty(self):
# 0 levels
Expand Down Expand Up @@ -1725,8 +1759,28 @@ def test_from_tuples(self):
'from empty list',
MultiIndex.from_tuples, [])

idx = MultiIndex.from_tuples(((1, 2), (3, 4)), names=['a', 'b'])
assert len(idx) == 2
expected = MultiIndex(levels=[[1, 3], [2, 4]],
labels=[[0, 1], [0, 1]],
names=['a', 'b'])

# input tuples
result = MultiIndex.from_tuples(((1, 2), (3, 4)), names=['a', 'b'])
tm.assert_index_equal(result, expected)

def test_from_tuples_iterator(self):
# GH 18434
# input iterator for tuples
expected = MultiIndex(levels=[[1, 3], [2, 4]],
labels=[[0, 1], [0, 1]],
names=['a', 'b'])

result = MultiIndex.from_tuples(zip([1, 3], [2, 4]), names=['a', 'b'])
tm.assert_index_equal(result, expected)

# input non-iterables
with tm.assert_raises_regex(
TypeError, 'Input must be a list / sequence of tuple-likes.'):
MultiIndex.from_tuples(0)

def test_from_tuples_empty(self):
# GH 16777
Expand Down

0 comments on commit 0bcd77e

Please sign in to comment.