Skip to content

Commit

Permalink
ENH: _dir_additions returns also the first level of a MultiIndex (#16326
Browse files Browse the repository at this point in the history
)
  • Loading branch information
BibMartin authored and jreback committed Dec 11, 2017
1 parent e909ea0 commit 2aa4aa9
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 5 deletions.
10 changes: 10 additions & 0 deletions asv_bench/benchmarks/series_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,3 +177,13 @@ def time_value_counts_float64(self):

def time_value_counts_strings(self):
self.s.value_counts()


class series_dir(object):
goal_time = 0.2

def setup(self):
self.s = Series(index=tm.makeStringIndex(10000))

def time_dir_strings(self):
dir(self.s)
4 changes: 3 additions & 1 deletion doc/source/whatsnew/v0.22.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ Other Enhancements
- Improved wording of ``ValueError`` raised in :func:`read_csv` when the ``usecols`` argument cannot match all columns. (:issue:`17301`)
- :func:`DataFrame.corrwith` now silently drops non-numeric columns when passed a Series. Before, an exception was raised (:issue:`18570`).
- :class:`IntervalIndex` now supports time zone aware ``Interval`` objects (:issue:`18537`, :issue:`18538`)
- :func:`Series` / :func:`DataFrame` tab completion also returns identifiers in the first level of a :func:`MultiIndex`. (:issue:`16326`)
- :func:`read_excel()` has gained the ``nrows`` parameter (:issue:`16645`)
- :func:``DataFrame.to_json`` and ``Series.to_json`` now accept an ``index`` argument which allows the user to exclude the index from the JSON output (:issue:`17394`)

Expand Down Expand Up @@ -232,9 +233,10 @@ Performance Improvements
- The overriden ``Timedelta`` properties of days, seconds and microseconds have been removed, leveraging their built-in Python versions instead (:issue:`18242`)
- ``Series`` construction will reduce the number of copies made of the input data in certain cases (:issue:`17449`)
- Improved performance of :func:`Series.dt.date` and :func:`DatetimeIndex.date` (:issue:`18058`)
- Improved performance of :func:`Series.dt.time` and :func:`DatetimeIndex.time` (:issue:`18461`)
- Improved performance of :func:`IntervalIndex.symmetric_difference()` (:issue:`18475`)
- Improved performance of ``DatetimeIndex`` and ``Series`` arithmetic operations with Business-Month and Business-Quarter frequencies (:issue:`18489`)
- Improved performance of :func:`Series.dt.time` and :func:`DatetimeIndex.time`
- :func:`Series` / :func:`DataFrame` tab completion limits to 100 values, for better performance. (:issue:`18587`)

.. _whatsnew_0220.docs:

Expand Down
9 changes: 6 additions & 3 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,9 +195,12 @@ def __unicode__(self):
return '%s(%s)' % (self.__class__.__name__, prepr)

def _dir_additions(self):
""" add the string-like attributes from the info_axis """
additions = set([c for c in self._info_axis
if isinstance(c, string_types) and isidentifier(c)])
""" add the string-like attributes from the info_axis.
If info_axis is a MultiIndex, it's first level values are used.
"""
additions = set(
[c for c in self._info_axis.unique(level=0)[:100]
if isinstance(c, string_types) and isidentifier(c)])
return super(NDFrame, self)._dir_additions().union(additions)

@property
Expand Down
18 changes: 18 additions & 0 deletions pandas/tests/frame/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,24 @@ def test_column_contains_typeerror(self):
except TypeError:
pass

def test_tab_completion(self):
# DataFrame whose columns are identifiers shall have them in __dir__.
df = pd.DataFrame([list('abcd'), list('efgh')], columns=list('ABCD'))
for key in list('ABCD'):
assert key in dir(df)
assert isinstance(df.__getitem__('A'), pd.Series)

# DataFrame whose first-level columns are identifiers shall have
# them in __dir__.
df = pd.DataFrame(
[list('abcd'), list('efgh')],
columns=pd.MultiIndex.from_tuples(list(zip('ABCD', 'EFGH'))))
for key in list('ABCD'):
assert key in dir(df)
for key in list('EFGH'):
assert key not in dir(df)
assert isinstance(df.__getitem__('A'), pd.DataFrame)

def test_not_hashable(self):
df = self.klass([1])
pytest.raises(TypeError, hash, df)
Expand Down
29 changes: 28 additions & 1 deletion pandas/tests/series/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from pandas import Index, Series, DataFrame, date_range
from pandas.core.indexes.datetimes import Timestamp

from pandas.compat import range
from pandas.compat import range, lzip, isidentifier, string_types
from pandas import (compat, Categorical, period_range, timedelta_range,
DatetimeIndex, PeriodIndex, TimedeltaIndex)
import pandas.io.formats.printing as printing
Expand Down Expand Up @@ -250,6 +250,33 @@ def get_dir(s):
results = get_dir(s)