Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: allow index to be referenced by name #12404

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.18.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,7 @@ Other enhancements
- Added Google ``BigQuery`` service account authentication support, which enables authentication on remote servers. (:issue:`11881`). For further details see :ref:`here <io.bigquery_authentication>`
- ``HDFStore`` is now iterable: ``for k in store`` is equivalent to ``for k in store.keys()`` (:issue:`12221`).
- The entire codebase has been ``PEP``-ified (:issue:`12096`)
- Index (or index levels, with a MultiIndex) can now be referenced like column names (:issue:`8162`, :issue:`10816`).
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this won't be in 0.18.0. so remove for now


.. _whatsnew_0180.api_breaking:

Expand Down
18 changes: 17 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2018,7 +2018,14 @@ def _getitem_array(self, key):
indexer = key.nonzero()[0]
return self.take(indexer, axis=0, convert=False)
else:
indexer = self.ix._convert_to_indexer(key, axis=1)
try:
indexer = self.ix._convert_to_indexer(key, axis=1)
except KeyError:
if (hasattr(self, 'index') and
any(item in self.index.names for item in key)):
return self._getitem_array_with_index_name(key)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is still way too messy. This should all be encompassed in a single function that is invoked on a KeyError

raise

return self.take(indexer, axis=1, convert=True)

def _getitem_multilevel(self, key):
Expand Down Expand Up @@ -2054,6 +2061,15 @@ def _getitem_frame(self, key):
raise ValueError('Must pass DataFrame with boolean values only')
return self.where(key)

def _getitem_array_with_index_name(self, key):
ix_ix, ix_name = next((i, k) for i, k in enumerate(key)
if k in self.index.names)
key.remove(ix_name)
ix_col = self[ix_name]
result = self[key]
result.insert(ix_ix, ix_name, ix_col)
return result

def query(self, expr, inplace=False, **kwargs):
"""Query the columns of a frame with a boolean expression.

Expand Down
19 changes: 14 additions & 5 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1306,15 +1306,25 @@ def _get_item_cache(self, item):
cache = self._item_cache
res = cache.get(item)
if res is None:
values = self._data.get(item)
res = self._box_item_values(item, values)
try:
values = self._data.get(item)
res = self._box_item_values(item, values)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same here

except KeyError:
if hasattr(self, 'index') and item in self.index.names:
res = self._get_item_index_name(item)
else:
raise
cache[item] = res
res._set_as_cached(item, self)

# for a chain
res.is_copy = self.is_copy
return res

def _get_item_index_name(self, item):
return pd.Series(self.index.get_level_values(item),
index=self.index, name=item)

def _set_as_cached(self, item, cacher):
"""Set the _cacher attribute on the calling object with a weakref to
cacher.
Expand Down Expand Up @@ -2623,10 +2633,9 @@ def __getattr__(self, name):
if (name in self._internal_names_set or name in self._metadata or
name in self._accessors):
return object.__getattribute__(self, name)
else:
if name in self._info_axis:
elif name in self._info_axis or name in self.index.names:
return self[name]
return object.__getattribute__(self, name)
return object.__getattribute__(self, name)

def __setattr__(self, name, value):
"""After regular attribute access, try setting the name
Expand Down
75 changes: 75 additions & 0 deletions pandas/tests/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1853,6 +1853,81 @@ def test_to_xarray(self):
expected,
check_index_type=False)

def test_getitem_index(self):
# GH8162
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you need to see what this does on Series and what the failure cases are (e.g. duplicated names), does it raise errors anywhere, what?

idx = pd.Index(list('abc'), name='idx')
df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=idx)
expected = pd.Series(['a', 'b', 'c'], index=idx, name='idx')

assert_series_equal(df['idx'], expected)
assert_series_equal(df.idx, expected)

def test_getitem_index_listlike(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

move these tests to tests/frame/test_indexing

idx = pd.Index(list('abc'), name='idx')
df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=idx)
assert_frame_equal(
df[['idx', 'B']],
pd.DataFrame([
['a', 4],
['b', 5],
['c', 6],
],
columns=['idx', 'B'],
index=idx)
)
assert_frame_equal(
df[['idx', 'A', 'B']],
pd.DataFrame([
['a', 1, 4],
['b', 2, 5],
['c', 3, 6],
],
columns=['idx', 'A', 'B'],
index=idx)
)

def test_getitem_multiindex_level(self):
# GH10816
idx = pd.MultiIndex.from_product([list('abc'), list('fg')],
names=['lev0', 'lev1'])
df = pd.DataFrame({'A': range(6), 'B': range(10, 16)}, index=idx)
expected = pd.Series(list('aabbcc'), index=idx, name='lev0')

assert_series_equal(df['lev0'], expected)
assert_series_equal(df.lev0, expected)

def test_getitem_multiindex_level_listlike(self):
idx = pd.MultiIndex.from_product([list('abc'), list('fg')],
names=['lev0', 'lev1'])
df = pd.DataFrame({'A': range(6), 'B': range(10, 16)}, index=idx)
assert_frame_equal(
df[['A', 'lev1']],
pd.DataFrame([
[0, 'f'],
[1, 'g'],
[2, 'f'],
[3, 'g'],
[4, 'f'],
[5, 'g'],
],
columns=['A', 'lev1'],
index=idx)
)

assert_frame_equal(
df[['A', 'B', 'lev1', 'lev0']],
pd.DataFrame([
[0, 10, 'f', 'a'],
[1, 11, 'g', 'a'],
[2, 12, 'f', 'b'],
[3, 13, 'g', 'b'],
[4, 14, 'f', 'c'],
[5, 15, 'g', 'c'],
],
columns=['A', 'B', 'lev1', 'lev0'],
index=idx)
)


class TestPanel(tm.TestCase, Generic):
_typ = Panel
Expand Down