From fb577b727f21cce4932389ffda84304989ac2b8f Mon Sep 17 00:00:00 2001 From: "Henry S. Harrison" Date: Sun, 21 Feb 2016 02:11:32 -0500 Subject: [PATCH] ENH: allow index to be referenced by name closes #8162, #10816 --- doc/source/whatsnew/v0.18.0.txt | 1 + pandas/core/frame.py | 25 ++++++++++- pandas/core/generic.py | 25 ++++++++--- pandas/tests/test_generic.py | 75 +++++++++++++++++++++++++++++++++ 4 files changed, 120 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index b3bbc5cf5ef8c..0f0c925b366a5 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -432,6 +432,7 @@ Other enhancements - Added Google ``BigQuery`` service account authentication support, which enables authentication on remote servers. (:issue:`11881`). For further details see :ref:`here ` - ``HDFStore`` is now iterable: ``for k in store`` is equivalent to ``for k in store.keys()`` (:issue:`12221`). - The entire codebase has been ``PEP``-ified (:issue:`12096`) +- Index (or index levels, with a MultiIndex) can now be referenced like column names (:issue:`8162`, :issue:`10816`). .. _whatsnew_0180.api_breaking: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index cd32ff2133cae..2e739c3e20181 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2018,7 +2018,30 @@ def _getitem_array(self, key): indexer = key.nonzero()[0] return self.take(indexer, axis=0, convert=False) else: - indexer = self.ix._convert_to_indexer(key, axis=1) + try: + indexer = self.ix._convert_to_indexer(key, axis=1) + + except KeyError: + if self.index.name in key: + ix_name = self.index.name + ix_ix = key.index(ix_name) + + elif (isinstance(self.index, MultiIndex) and + any(item in self.index.names for item in key)): + for item in key: + if item in self.index.names: + ix_name = item + ix_ix = key.index(item) + + else: + raise + + key.remove(ix_name) + ix_col = self[ix_name] + other_cols = self[key] + other_cols.insert(ix_ix, ix_name, ix_col) + return other_cols + return self.take(indexer, axis=1, convert=True) def _getitem_multilevel(self, key): diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 14d788fdded7e..f32365a89f27c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1306,8 +1306,20 @@ def _get_item_cache(self, item): cache = self._item_cache res = cache.get(item) if res is None: - values = self._data.get(item) - res = self._box_item_values(item, values) + try: + values = self._data.get(item) + res = self._box_item_values(item, values) + except KeyError: + if hasattr(self, 'index') and self.index.name == item: + res = self.index.to_series() + + elif (isinstance(self.index, MultiIndex) and + item in self.index.names): + res = pd.Series(self.index.get_level_values(item).values, + index=self.index, name=item) + + else: + raise cache[item] = res res._set_as_cached(item, self) @@ -2623,10 +2635,13 @@ def __getattr__(self, name): if (name in self._internal_names_set or name in self._metadata or name in self._accessors): return object.__getattribute__(self, name) - else: - if name in self._info_axis: + elif ( + name in self._info_axis or + name == self.index.name or + (isinstance(self.index, MultiIndex) and name in self.index.names) + ): return self[name] - return object.__getattribute__(self, name) + return object.__getattribute__(self, name) def __setattr__(self, name, value): """After regular attribute access, try setting the name diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index 4c7510783eda0..b80522e2f76f0 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -1853,6 +1853,81 @@ def test_to_xarray(self): expected, check_index_type=False) + def test_getitem_index(self): + # GH8162 + idx = pd.Index(list('abc'), name='idx') + df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=idx) + expected = pd.Series(['a', 'b', 'c'], index=idx, name='idx') + + assert_series_equal(df['idx'], expected) + assert_series_equal(df.idx, expected) + + def test_getitem_index_listlike(self): + idx = pd.Index(list('abc'), name='idx') + df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=idx) + assert_frame_equal( + df[['idx', 'B']], + pd.DataFrame([ + ['a', 4], + ['b', 5], + ['c', 6], + ], + columns=['idx', 'B'], + index=idx) + ) + assert_frame_equal( + df[['idx', 'A', 'B']], + pd.DataFrame([ + ['a', 1, 4], + ['b', 2, 5], + ['c', 3, 6], + ], + columns=['idx', 'A', 'B'], + index=idx) + ) + + def test_getitem_multiindex_level(self): + # GH10816 + idx = pd.MultiIndex.from_product([list('abc'), list('fg')], + names=['lev0', 'lev1']) + df = pd.DataFrame({'A': range(6), 'B': range(10, 16)}, index=idx) + expected = pd.Series(list('aabbcc'), index=idx, name='lev0') + + assert_series_equal(df['lev0'], expected) + assert_series_equal(df.lev0, expected) + + def test_getitem_multiindex_level_listlike(self): + idx = pd.MultiIndex.from_product([list('abc'), list('fg')], + names=['lev0', 'lev1']) + df = pd.DataFrame({'A': range(6), 'B': range(10, 16)}, index=idx) + assert_frame_equal( + df[['A', 'lev1']], + pd.DataFrame([ + [0, 'f'], + [1, 'g'], + [2, 'f'], + [3, 'g'], + [4, 'f'], + [5, 'g'], + ], + columns=['A', 'lev1'], + index=idx) + ) + + assert_frame_equal( + df[['A', 'B', 'lev1', 'lev0']], + pd.DataFrame([ + [0, 10, 'f', 'a'], + [1, 11, 'g', 'a'], + [2, 12, 'f', 'b'], + [3, 13, 'g', 'b'], + [4, 14, 'f', 'c'], + [5, 15, 'g', 'c'], + ], + columns=['A', 'B', 'lev1', 'lev0'], + index=idx) + ) + class TestPanel(tm.TestCase, Generic): _typ = Panel