diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index b3bbc5cf5ef8c..0f0c925b366a5 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -432,6 +432,7 @@ Other enhancements - Added Google ``BigQuery`` service account authentication support, which enables authentication on remote servers. (:issue:`11881`). For further details see :ref:`here ` - ``HDFStore`` is now iterable: ``for k in store`` is equivalent to ``for k in store.keys()`` (:issue:`12221`). - The entire codebase has been ``PEP``-ified (:issue:`12096`) +- Index (or index levels, with a MultiIndex) can now be referenced like column names (:issue:`8162`, :issue:`10816`). .. _whatsnew_0180.api_breaking: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index cd32ff2133cae..5ddceeee7e548 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2018,7 +2018,14 @@ def _getitem_array(self, key): indexer = key.nonzero()[0] return self.take(indexer, axis=0, convert=False) else: - indexer = self.ix._convert_to_indexer(key, axis=1) + try: + indexer = self.ix._convert_to_indexer(key, axis=1) + except KeyError: + if (hasattr(self, 'index') and + any(item in self.index.names for item in key)): + return self._getitem_array_with_index_name(key) + raise + return self.take(indexer, axis=1, convert=True) def _getitem_multilevel(self, key): @@ -2054,6 +2061,15 @@ def _getitem_frame(self, key): raise ValueError('Must pass DataFrame with boolean values only') return self.where(key) + def _getitem_array_with_index_name(self, key): + ix_ix, ix_name = next((i, k) for i, k in enumerate(key) + if k in self.index.names) + key.remove(ix_name) + ix_col = self[ix_name] + result = self[key] + result.insert(ix_ix, ix_name, ix_col) + return result + def query(self, expr, inplace=False, **kwargs): """Query the columns of a frame with a boolean expression. diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 14d788fdded7e..bd9cfc28ebc33 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1306,8 +1306,14 @@ def _get_item_cache(self, item): cache = self._item_cache res = cache.get(item) if res is None: - values = self._data.get(item) - res = self._box_item_values(item, values) + try: + values = self._data.get(item) + res = self._box_item_values(item, values) + except KeyError: + if hasattr(self, 'index') and item in self.index.names: + res = self._get_item_index_name(item) + else: + raise cache[item] = res res._set_as_cached(item, self) @@ -1315,6 +1321,10 @@ def _get_item_cache(self, item): res.is_copy = self.is_copy return res + def _get_item_index_name(self, item): + return pd.Series(self.index.get_level_values(item), + index=self.index, name=item) + def _set_as_cached(self, item, cacher): """Set the _cacher attribute on the calling object with a weakref to cacher. @@ -2623,10 +2633,9 @@ def __getattr__(self, name): if (name in self._internal_names_set or name in self._metadata or name in self._accessors): return object.__getattribute__(self, name) - else: - if name in self._info_axis: + elif name in self._info_axis or name in self.index.names: return self[name] - return object.__getattribute__(self, name) + return object.__getattribute__(self, name) def __setattr__(self, name, value): """After regular attribute access, try setting the name diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index 4c7510783eda0..b80522e2f76f0 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -1853,6 +1853,81 @@ def test_to_xarray(self): expected, check_index_type=False) + def test_getitem_index(self): + # GH8162 + idx = pd.Index(list('abc'), name='idx') + df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=idx) + expected = pd.Series(['a', 'b', 'c'], index=idx, name='idx') + + assert_series_equal(df['idx'], expected) + assert_series_equal(df.idx, expected) + + def test_getitem_index_listlike(self): + idx = pd.Index(list('abc'), name='idx') + df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=idx) + assert_frame_equal( + df[['idx', 'B']], + pd.DataFrame([ + ['a', 4], + ['b', 5], + ['c', 6], + ], + columns=['idx', 'B'], + index=idx) + ) + assert_frame_equal( + df[['idx', 'A', 'B']], + pd.DataFrame([ + ['a', 1, 4], + ['b', 2, 5], + ['c', 3, 6], + ], + columns=['idx', 'A', 'B'], + index=idx) + ) + + def test_getitem_multiindex_level(self): + # GH10816 + idx = pd.MultiIndex.from_product([list('abc'), list('fg')], + names=['lev0', 'lev1']) + df = pd.DataFrame({'A': range(6), 'B': range(10, 16)}, index=idx) + expected = pd.Series(list('aabbcc'), index=idx, name='lev0') + + assert_series_equal(df['lev0'], expected) + assert_series_equal(df.lev0, expected) + + def test_getitem_multiindex_level_listlike(self): + idx = pd.MultiIndex.from_product([list('abc'), list('fg')], + names=['lev0', 'lev1']) + df = pd.DataFrame({'A': range(6), 'B': range(10, 16)}, index=idx) + assert_frame_equal( + df[['A', 'lev1']], + pd.DataFrame([ + [0, 'f'], + [1, 'g'], + [2, 'f'], + [3, 'g'], + [4, 'f'], + [5, 'g'], + ], + columns=['A', 'lev1'], + index=idx) + ) + + assert_frame_equal( + df[['A', 'B', 'lev1', 'lev0']], + pd.DataFrame([ + [0, 10, 'f', 'a'], + [1, 11, 'g', 'a'], + [2, 12, 'f', 'b'], + [3, 13, 'g', 'b'], + [4, 14, 'f', 'c'], + [5, 15, 'g', 'c'], + ], + columns=['A', 'B', 'lev1', 'lev0'], + index=idx) + ) + class TestPanel(tm.TestCase, Generic): _typ = Panel