-
-
Notifications
You must be signed in to change notification settings - Fork 18.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ENH: Series constructor converts dicts with tuples in keys to MultiIndex #4805
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1157,8 +1157,7 @@ def groups(self): | |
else: | ||
to_groupby = lzip(*(ping.grouper for ping in self.groupings)) | ||
to_groupby = Index(to_groupby) | ||
|
||
return self.axis.groupby(to_groupby) | ||
return self.axis.groupby(to_groupby.values) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why was this changed? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Anything converted to a ".values" exists because pandas segfaulted for me under certain cases. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if you're getting segfaults please post them as sep issue (that's pretty bad, we should catch) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @adgaudio I looked this, need to nail down what is causing this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we talked about this already in this pr. The seg faults only appear to happen if we make the changes to this PR introduces. Check out the methods:
Here are some of my earlier comments on them:
Also, I explicitly mentioned in #issuecomment-24477652 that I'd merge the seg fault fixes into this pr. If you want to remove it from here and make a separate pr addressing just the seg faults, someone else will need to take this pr over. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. that is all fine; however; where is THIS fault coming from (e.g. the downstream function)? (if its the same ones ok) |
||
|
||
@cache_readonly | ||
def group_info(self): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -181,12 +181,12 @@ def test_getitem_list(self): | |
# tuples | ||
df = DataFrame(randn(8, 3), | ||
columns=Index([('foo', 'bar'), ('baz', 'qux'), | ||
('peek', 'aboo')], name='sth')) | ||
('peek', 'aboo')], name=['sth', 'sth2'])) | ||
|
||
result = df[[('foo', 'bar'), ('baz', 'qux')]] | ||
expected = df.ix[:, :2] | ||
assert_frame_equal(result, expected) | ||
self.assertEqual(result.columns.name, 'sth') | ||
self.assertEqual(result.columns.names, ['sth', 'sth2']) | ||
|
||
def test_setitem_list(self): | ||
|
||
|
@@ -2490,6 +2490,31 @@ def test_constructor_dict_of_tuples(self): | |
expected = DataFrame(dict((k, list(v)) for k, v in compat.iteritems(data))) | ||
assert_frame_equal(result, expected, check_dtype=False) | ||
|
||
def test_constructor_dict_multiindex(self): | ||
check = lambda result, expected: tm.assert_frame_equal( | ||
result, expected, check_dtype=True, check_index_type=True, | ||
check_column_type=True, check_names=True) | ||
d = {('a', 'a'): {('i', 'i'): 0, ('i', 'j'): 1, ('j', 'i'): 2}, | ||
('b', 'a'): {('i', 'i'): 6, ('i', 'j'): 5, ('j', 'i'): 4}, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you can use one of these examples in v0.14.0 |
||
('b', 'c'): {('i', 'i'): 7, ('i', 'j'): 8, ('j', 'i'): 9}} | ||
_d = sorted(d.items()) | ||
df = DataFrame(d) | ||
expected = DataFrame( | ||
[x[1] for x in _d], | ||
index=MultiIndex.from_tuples([x[0] for x in _d])).T | ||
expected.index = MultiIndex.from_tuples(expected.index) | ||
check(df, expected) | ||
|
||
d['z'] = {'y': 123., ('i', 'i'): 111, ('i', 'j'): 111, ('j', 'i'): 111} | ||
_d.insert(0, ('z', d['z'])) | ||
expected = DataFrame( | ||
[x[1] for x in _d], | ||
index=Index([x[0] for x in _d], tupleize_cols=False)).T | ||
expected.index = Index(expected.index, tupleize_cols=False) | ||
df = DataFrame(d) | ||
df = df.reindex(columns=expected.columns, index=expected.index) | ||
check(df, expected) | ||
|
||
def _check_basic_constructor(self, empty): | ||
"mat: 2d matrix with shpae (3, 2) to input. empty - makes sized objects" | ||
mat = empty((2, 3), dtype=float) | ||
|
@@ -2913,8 +2938,8 @@ class CustomDict(dict): | |
def test_constructor_ragged(self): | ||
data = {'A': randn(10), | ||
'B': randn(8)} | ||
assertRaisesRegexp(ValueError, 'arrays must all be same length', | ||
DataFrame, data) | ||
with assertRaisesRegexp(ValueError, 'arrays must all be same length'): | ||
DataFrame(data) | ||
|
||
def test_constructor_scalar(self): | ||
idx = Index(lrange(3)) | ||
|
@@ -12042,7 +12067,8 @@ def test_index_namedtuple(self): | |
IndexType = namedtuple("IndexType", ["a", "b"]) | ||
idx1 = IndexType("foo", "bar") | ||
idx2 = IndexType("baz", "bof") | ||
index = Index([idx1, idx2], name="composite_index") | ||
index = Index([idx1, idx2], | ||
name="composite_index", tupleize_cols=False) | ||
df = DataFrame([(1, 2), (3, 4)], index=index, columns=["A", "B"]) | ||
self.assertEqual(df.ix[IndexType("foo", "bar")]["A"], 1) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -48,7 +48,8 @@ def setUp(self): | |
intIndex = tm.makeIntIndex(100), | ||
floatIndex = tm.makeFloatIndex(100), | ||
empty = Index([]), | ||
tuples = Index(lzip(['foo', 'bar', 'baz'], [1, 2, 3])), | ||
tuples = MultiIndex.from_tuples(lzip(['foo', 'bar', 'baz'], | ||
[1, 2, 3])) | ||
) | ||
for name, ind in self.indices.items(): | ||
setattr(self, name, ind) | ||
|
@@ -230,6 +231,10 @@ def test_identical(self): | |
i2 = i2.rename('foo') | ||
self.assert_(i1.identical(i2)) | ||
|
||
i3 = Index([('a', 'a'), ('a', 'b'), ('b', 'a')]) | ||
i4 = Index([('a', 'a'), ('a', 'b'), ('b', 'a')], tupleize_cols=False) | ||
self.assertFalse(i3.identical(i4)) | ||
|
||
def test_is_(self): | ||
ind = Index(range(10)) | ||
self.assertTrue(ind.is_(ind)) | ||
|
@@ -987,18 +992,24 @@ def test_equals(self): | |
self.assert_(same_values.equals(self.index)) | ||
|
||
def test_identical(self): | ||
i = Index(self.index.copy()) | ||
self.assertTrue(i.identical(self.index)) | ||
|
||
i = self.index.copy() | ||
same_values = Index(i, dtype=object) | ||
self.assert_(i.identical(same_values)) | ||
same_values_different_type = Index(i, dtype=object) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. instad of doing this, do There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why equals rather than identical for assertTrue? If you don't check identical, then you aren't verifying that index name(s) persist through different kinds of copies. ... I can change the self.assert_ to self.assertTrue, though. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. identical is ok |
||
self.assertFalse(i.identical(same_values_different_type)) | ||
|
||
i = self.index.copy() | ||
i = self.index.copy(dtype=object) | ||
i = i.rename('foo') | ||
same_values = Index(i, dtype=object) | ||
self.assert_(same_values.identical(self.index)) | ||
self.assertTrue(same_values.identical(self.index.copy(dtype=object))) | ||
|
||
self.assertFalse(i.identical(self.index)) | ||
self.assert_(Index(same_values, name='foo').identical(i)) | ||
self.assertTrue(Index(same_values, name='foo', dtype=object | ||
).identical(i)) | ||
|
||
self.assertFalse( | ||
self.index.copy(dtype=object) | ||
.identical(self.index.copy(dtype='int64'))) | ||
|
||
def test_get_indexer(self): | ||
target = Int64Index(np.arange(10)) | ||
|
@@ -2210,6 +2221,12 @@ def test_identical(self): | |
mi2 = mi2.set_names(['new1', 'new2']) | ||
self.assert_(mi.identical(mi2)) | ||
|
||
mi3 = Index(mi.tolist(), names=mi.names) | ||
mi4 = Index(mi.tolist(), names=mi.names, tupleize_cols=False) | ||
self.assert_(mi.identical(mi3)) | ||
self.assert_(not mi.identical(mi4)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. but this is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sure I can add an assert_(mi.equal(mi4)). I was assuming this is covered by the mi3 test, but you're right that it's probably better to add the assert. |
||
self.assert_(mi.equals(mi4)) | ||
|
||
def test_is_(self): | ||
mi = MultiIndex.from_tuples(lzip(range(10), range(10))) | ||
self.assertTrue(mi.is_(mi)) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -633,6 +633,26 @@ def test_constructor_dict(self): | |
expected.ix[1] = 1 | ||
assert_series_equal(result, expected) | ||
|
||
def test_constructor_dict_multiindex(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. all the tests here should use |
||
check = lambda result, expected: tm.assert_series_equal( | ||
result, expected, check_dtype=True, check_index_type=True, | ||
check_series_type=True) | ||
d = {('a', 'a'): 0., ('b', 'a'): 1., ('b', 'c'): 2.} | ||
_d = sorted(d.items()) | ||
ser = Series(d) | ||
expected = Series([x[1] for x in _d], | ||
index=MultiIndex.from_tuples([x[0] for x in _d])) | ||
check(ser, expected) | ||
|
||
d['z'] = 111. | ||
_d.insert(0, ('z', d['z'])) | ||
ser = Series(d) | ||
expected = Series( | ||
[x[1] for x in _d], | ||
index=Index([x[0] for x in _d], tupleize_cols=False)) | ||
ser = ser.reindex(index=expected.index) | ||
check(ser, expected) | ||
|
||
def test_constructor_subclass_dict(self): | ||
data = tm.TestSubDict((x, 10.0 * x) for x in range(10)) | ||
series = Series(data) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you make this issue 3323 (which is the one its closing)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
be more specific, what does more applicatble mean?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
need to fix these references
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is this not issue 3323?